Home | History | Annotate | Download | only in genrb
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1998-2015, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *
      9 * File parse.cpp
     10 *
     11 * Modification History:
     12 *
     13 *   Date          Name          Description
     14 *   05/26/99     stephen       Creation.
     15 *   02/25/00     weiv          Overhaul to write udata
     16 *   5/10/01      Ram           removed ustdio dependency
     17 *   06/10/2001  Dominic Ludlam <dom (at) recoil.org> Rewritten
     18 *******************************************************************************
     19 */
     20 
     21 // Safer use of UnicodeString.
     22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     23 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     24 #endif
     25 
     26 // Less important, but still a good idea.
     27 #ifndef UNISTR_FROM_STRING_EXPLICIT
     28 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     29 #endif
     30 
     31 #include <assert.h>
     32 #include "parse.h"
     33 #include "errmsg.h"
     34 #include "uhash.h"
     35 #include "cmemory.h"
     36 #include "cstring.h"
     37 #include "uinvchar.h"
     38 #include "read.h"
     39 #include "ustr.h"
     40 #include "reslist.h"
     41 #include "rbt_pars.h"
     42 #include "genrb.h"
     43 #include "unicode/stringpiece.h"
     44 #include "unicode/unistr.h"
     45 #include "unicode/ustring.h"
     46 #include "unicode/uscript.h"
     47 #include "unicode/utf16.h"
     48 #include "unicode/putil.h"
     49 #include "charstr.h"
     50 #include "collationbuilder.h"
     51 #include "collationdata.h"
     52 #include "collationdatareader.h"
     53 #include "collationdatawriter.h"
     54 #include "collationfastlatinbuilder.h"
     55 #include "collationinfo.h"
     56 #include "collationroot.h"
     57 #include "collationruleparser.h"
     58 #include "collationtailoring.h"
     59 #include <stdio.h>
     60 
     61 /* Number of tokens to read ahead of the current stream position */
     62 #define MAX_LOOKAHEAD   3
     63 
     64 #define CR               0x000D
     65 #define LF               0x000A
     66 #define SPACE            0x0020
     67 #define TAB              0x0009
     68 #define ESCAPE           0x005C
     69 #define HASH             0x0023
     70 #define QUOTE            0x0027
     71 #define ZERO             0x0030
     72 #define STARTCOMMAND     0x005B
     73 #define ENDCOMMAND       0x005D
     74 #define OPENSQBRACKET    0x005B
     75 #define CLOSESQBRACKET   0x005D
     76 
     77 using icu::CharString;
     78 using icu::LocalMemory;
     79 using icu::LocalPointer;
     80 using icu::LocalUCHARBUFPointer;
     81 using icu::StringPiece;
     82 using icu::UnicodeString;
     83 
     84 struct Lookahead
     85 {
     86      enum   ETokenType type;
     87      struct UString    value;
     88      struct UString    comment;
     89      uint32_t          line;
     90 };
     91 
     92 /* keep in sync with token defines in read.h */
     93 const char *tokenNames[TOK_TOKEN_COUNT] =
     94 {
     95      "string",             /* A string token, such as "MonthNames" */
     96      "'{'",                 /* An opening brace character */
     97      "'}'",                 /* A closing brace character */
     98      "','",                 /* A comma */
     99      "':'",                 /* A colon */
    100 
    101      "<end of file>",     /* End of the file has been reached successfully */
    102      "<end of line>"
    103 };
    104 
    105 /* Just to store "TRUE" */
    106 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
    107 
    108 typedef struct {
    109     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
    110     uint32_t          lookaheadPosition;
    111     UCHARBUF         *buffer;
    112     struct SRBRoot *bundle;
    113     const char     *inputdir;
    114     uint32_t        inputdirLength;
    115     const char     *outputdir;
    116     uint32_t        outputdirLength;
    117     const char     *filename;
    118     UBool           makeBinaryCollation;
    119     UBool           omitCollationRules;
    120 } ParseState;
    121 
    122 typedef struct SResource *
    123 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
    124 
    125 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
    126 
    127 /* The nature of the lookahead buffer:
    128    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
    129    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
    130    When getToken is called, the current pointer is moved to the next slot and the
    131    old slot is filled with the next token from the reader by calling getNextToken.
    132    The token values are stored in the slot, which means that token values don't
    133    survive a call to getToken, ie.
    134 
    135    UString *value;
    136 
    137    getToken(&value, NULL, status);
    138    getToken(NULL,   NULL, status);       bad - value is now a different string
    139 */
    140 static void
    141 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
    142 {
    143     static uint32_t initTypeStrings = 0;
    144     uint32_t i;
    145 
    146     if (!initTypeStrings)
    147     {
    148         initTypeStrings = 1;
    149     }
    150 
    151     state->lookaheadPosition   = 0;
    152     state->buffer              = buf;
    153 
    154     resetLineNumber();
    155 
    156     for (i = 0; i < MAX_LOOKAHEAD; i++)
    157     {
    158         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
    159         if (U_FAILURE(*status))
    160         {
    161             return;
    162         }
    163     }
    164 
    165     *status = U_ZERO_ERROR;
    166 }
    167 
    168 static void
    169 cleanupLookahead(ParseState* state)
    170 {
    171     uint32_t i;
    172     for (i = 0; i <= MAX_LOOKAHEAD; i++)
    173     {
    174         ustr_deinit(&state->lookahead[i].value);
    175         ustr_deinit(&state->lookahead[i].comment);
    176     }
    177 
    178 }
    179 
    180 static enum ETokenType
    181 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
    182 {
    183     enum ETokenType result;
    184     uint32_t          i;
    185 
    186     result = state->lookahead[state->lookaheadPosition].type;
    187 
    188     if (tokenValue != NULL)
    189     {
    190         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
    191     }
    192 
    193     if (linenumber != NULL)
    194     {
    195         *linenumber = state->lookahead[state->lookaheadPosition].line;
    196     }
    197 
    198     if (comment != NULL)
    199     {
    200         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
    201     }
    202 
    203     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
    204     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
    205     ustr_setlen(&state->lookahead[i].comment, 0, status);
    206     ustr_setlen(&state->lookahead[i].value, 0, status);
    207     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
    208 
    209     /* printf("getToken, returning %s\n", tokenNames[result]); */
    210 
    211     return result;
    212 }
    213 
    214 static enum ETokenType
    215 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
    216 {
    217     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
    218 
    219     if (U_FAILURE(*status))
    220     {
    221         return TOK_ERROR;
    222     }
    223 
    224     if (lookaheadCount >= MAX_LOOKAHEAD)
    225     {
    226         *status = U_INTERNAL_PROGRAM_ERROR;
    227         return TOK_ERROR;
    228     }
    229 
    230     if (tokenValue != NULL)
    231     {
    232         *tokenValue = &state->lookahead[i].value;
    233     }
    234 
    235     if (linenumber != NULL)
    236     {
    237         *linenumber = state->lookahead[i].line;
    238     }
    239 
    240     if(comment != NULL){
    241         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
    242     }
    243 
    244     return state->lookahead[i].type;
    245 }
    246 
    247 static void
    248 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
    249 {
    250     uint32_t        line;
    251 
    252     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
    253 
    254     if (linenumber != NULL)
    255     {
    256         *linenumber = line;
    257     }
    258 
    259     if (U_FAILURE(*status))
    260     {
    261         return;
    262     }
    263 
    264     if (token != expectedToken)
    265     {
    266         *status = U_INVALID_FORMAT_ERROR;
    267         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
    268     }
    269     else
    270     {
    271         *status = U_ZERO_ERROR;
    272     }
    273 }
    274 
    275 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
    276 {
    277     struct UString *tokenValue;
    278     char           *result;
    279     uint32_t        count;
    280 
    281     expect(state, TOK_STRING, &tokenValue, comment, line, status);
    282 
    283     if (U_FAILURE(*status))
    284     {
    285         return NULL;
    286     }
    287 
    288     count = u_strlen(tokenValue->fChars);
    289     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
    290         *status = U_INVALID_FORMAT_ERROR;
    291         error(*line, "invariant characters required for table keys, binary data, etc.");
    292         return NULL;
    293     }
    294 
    295     result = static_cast<char *>(uprv_malloc(count+1));
    296 
    297     if (result == NULL)
    298     {
    299         *status = U_MEMORY_ALLOCATION_ERROR;
    300         return NULL;
    301     }
    302 
    303     u_UCharsToChars(tokenValue->fChars, result, count+1);
    304     return result;
    305 }
    306 
    307 static struct SResource *
    308 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
    309 {
    310     struct SResource *result = NULL;
    311     struct UString   *tokenValue;
    312     FileStream       *file          = NULL;
    313     char              filename[256] = { '\0' };
    314     char              cs[128]       = { '\0' };
    315     uint32_t          line;
    316     UBool quoted = FALSE;
    317     UCHARBUF *ucbuf=NULL;
    318     UChar32   c     = 0;
    319     const char* cp  = NULL;
    320     UChar *pTarget     = NULL;
    321     UChar *target      = NULL;
    322     UChar *targetLimit = NULL;
    323     int32_t size = 0;
    324 
    325     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    326 
    327     if(isVerbose()){
    328         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    329     }
    330 
    331     if (U_FAILURE(*status))
    332     {
    333         return NULL;
    334     }
    335     /* make the filename including the directory */
    336     if (state->inputdir != NULL)
    337     {
    338         uprv_strcat(filename, state->inputdir);
    339 
    340         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
    341         {
    342             uprv_strcat(filename, U_FILE_SEP_STRING);
    343         }
    344     }
    345 
    346     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    347 
    348     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    349 
    350     if (U_FAILURE(*status))
    351     {
    352         return NULL;
    353     }
    354     uprv_strcat(filename, cs);
    355 
    356     if(state->omitCollationRules) {
    357         return res_none();
    358     }
    359 
    360     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    361 
    362     if (U_FAILURE(*status)) {
    363         error(line, "An error occured while opening the input file %s\n", filename);
    364         return NULL;
    365     }
    366 
    367     /* We allocate more space than actually required
    368     * since the actual size needed for storing UChars
    369     * is not known in UTF-8 byte stream
    370     */
    371     size        = ucbuf_size(ucbuf) + 1;
    372     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
    373     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    374     target      = pTarget;
    375     targetLimit = pTarget+size;
    376 
    377     /* read the rules into the buffer */
    378     while (target < targetLimit)
    379     {
    380         c = ucbuf_getc(ucbuf, status);
    381         if(c == QUOTE) {
    382             quoted = (UBool)!quoted;
    383         }
    384         /* weiv (06/26/2002): adding the following:
    385          * - preserving spaces in commands [...]
    386          * - # comments until the end of line
    387          */
    388         if (c == STARTCOMMAND && !quoted)
    389         {
    390             /* preserve commands
    391              * closing bracket will be handled by the
    392              * append at the end of the loop
    393              */
    394             while(c != ENDCOMMAND) {
    395                 U_APPEND_CHAR32_ONLY(c, target);
    396                 c = ucbuf_getc(ucbuf, status);
    397             }
    398         }
    399         else if (c == HASH && !quoted) {
    400             /* skip comments */
    401             while(c != CR && c != LF) {
    402                 c = ucbuf_getc(ucbuf, status);
    403             }
    404             continue;
    405         }
    406         else if (c == ESCAPE)
    407         {
    408             c = unescape(ucbuf, status);
    409 
    410             if (c == (UChar32)U_ERR)
    411             {
    412                 uprv_free(pTarget);
    413                 T_FileStream_close(file);
    414                 return NULL;
    415             }
    416         }
    417         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
    418         {
    419             /* ignore spaces carriage returns
    420             * and line feed unless in the form \uXXXX
    421             */
    422             continue;
    423         }
    424 
    425         /* Append UChar * after dissembling if c > 0xffff*/
    426         if (c != (UChar32)U_EOF)
    427         {
    428             U_APPEND_CHAR32_ONLY(c, target);
    429         }
    430         else
    431         {
    432             break;
    433         }
    434     }
    435 
    436     /* terminate the string */
    437     if(target < targetLimit){
    438         *target = 0x0000;
    439     }
    440 
    441     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
    442 
    443 
    444     ucbuf_close(ucbuf);
    445     uprv_free(pTarget);
    446     T_FileStream_close(file);
    447 
    448     return result;
    449 }
    450 
    451 static struct SResource *
    452 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
    453 {
    454     struct SResource *result = NULL;
    455     struct UString   *tokenValue;
    456     FileStream       *file          = NULL;
    457     char              filename[256] = { '\0' };
    458     char              cs[128]       = { '\0' };
    459     uint32_t          line;
    460     UCHARBUF *ucbuf=NULL;
    461     const char* cp  = NULL;
    462     UChar *pTarget     = NULL;
    463     const UChar *pSource     = NULL;
    464     int32_t size = 0;
    465 
    466     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    467 
    468     if(isVerbose()){
    469         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    470     }
    471 
    472     if (U_FAILURE(*status))
    473     {
    474         return NULL;
    475     }
    476     /* make the filename including the directory */
    477     if (state->inputdir != NULL)
    478     {
    479         uprv_strcat(filename, state->inputdir);
    480 
    481         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
    482         {
    483             uprv_strcat(filename, U_FILE_SEP_STRING);
    484         }
    485     }
    486 
    487     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    488 
    489     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    490 
    491     if (U_FAILURE(*status))
    492     {
    493         return NULL;
    494     }
    495     uprv_strcat(filename, cs);
    496 
    497 
    498     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    499 
    500     if (U_FAILURE(*status)) {
    501         error(line, "An error occured while opening the input file %s\n", filename);
    502         return NULL;
    503     }
    504 
    505     /* We allocate more space than actually required
    506     * since the actual size needed for storing UChars
    507     * is not known in UTF-8 byte stream
    508     */
    509     pSource = ucbuf_getBuffer(ucbuf, &size, status);
    510     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
    511     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    512 
    513 #if !UCONFIG_NO_TRANSLITERATION
    514     size = utrans_stripRules(pSource, size, pTarget, status);
    515 #else
    516     size = 0;
    517     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
    518 #endif
    519     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
    520 
    521     ucbuf_close(ucbuf);
    522     uprv_free(pTarget);
    523     T_FileStream_close(file);
    524 
    525     return result;
    526 }
    527 static ArrayResource* dependencyArray = NULL;
    528 
    529 static struct SResource *
    530 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    531 {
    532     struct SResource *result = NULL;
    533     struct SResource *elem = NULL;
    534     struct UString   *tokenValue;
    535     uint32_t          line;
    536     char              filename[256] = { '\0' };
    537     char              cs[128]       = { '\0' };
    538 
    539     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    540 
    541     if(isVerbose()){
    542         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    543     }
    544 
    545     if (U_FAILURE(*status))
    546     {
    547         return NULL;
    548     }
    549     /* make the filename including the directory */
    550     if (state->outputdir != NULL)
    551     {
    552         uprv_strcat(filename, state->outputdir);
    553 
    554         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
    555         {
    556             uprv_strcat(filename, U_FILE_SEP_STRING);
    557         }
    558     }
    559 
    560     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    561 
    562     if (U_FAILURE(*status))
    563     {
    564         return NULL;
    565     }
    566     uprv_strcat(filename, cs);
    567     if(!T_FileStream_file_exists(filename)){
    568         if(isStrict()){
    569             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    570         }else{
    571             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    572         }
    573     }
    574     if(dependencyArray==NULL){
    575         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
    576     }
    577     if(tag!=NULL){
    578         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    579     }
    580     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
    581 
    582     dependencyArray->add(elem);
    583 
    584     if (U_FAILURE(*status))
    585     {
    586         return NULL;
    587     }
    588     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    589     return result;
    590 }
    591 static struct SResource *
    592 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    593 {
    594     struct UString   *tokenValue;
    595     struct SResource *result = NULL;
    596 
    597 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
    598     {
    599         return parseUCARules(tag, startline, status);
    600     }*/
    601     if(isVerbose()){
    602         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    603     }
    604     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
    605 
    606     if (U_SUCCESS(*status))
    607     {
    608         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    609         doesn't survive expect either) */
    610 
    611         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    612         if(U_SUCCESS(*status) && result) {
    613             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    614 
    615             if (U_FAILURE(*status))
    616             {
    617                 res_close(result);
    618                 return NULL;
    619             }
    620         }
    621     }
    622 
    623     return result;
    624 }
    625 
    626 static struct SResource *
    627 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
    628 {
    629     struct UString   *tokenValue;
    630     struct SResource *result  = NULL;
    631 
    632     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
    633 
    634     if(isVerbose()){
    635         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    636     }
    637 
    638     if (U_SUCCESS(*status))
    639     {
    640         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    641         doesn't survive expect either) */
    642 
    643         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    644 
    645         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    646 
    647         if (U_FAILURE(*status))
    648         {
    649             res_close(result);
    650             return NULL;
    651         }
    652     }
    653 
    654     return result;
    655 }
    656 
    657 #if !UCONFIG_NO_COLLATION
    658 
    659 namespace {
    660 
    661 static struct SResource* resLookup(struct SResource* res, const char* key){
    662     if (res == res_none() || !res->isTable()) {
    663         return NULL;
    664     }
    665 
    666     TableResource *list = static_cast<TableResource *>(res);
    667     SResource *current = list->fFirst;
    668     while (current != NULL) {
    669         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
    670             return current;
    671         }
    672         current = current->fNext;
    673     }
    674     return NULL;
    675 }
    676 
    677 class GenrbImporter : public icu::CollationRuleParser::Importer {
    678 public:
    679     GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
    680     virtual ~GenrbImporter();
    681     virtual void getRules(
    682             const char *localeID, const char *collationType,
    683             UnicodeString &rules,
    684             const char *&errorReason, UErrorCode &errorCode);
    685 
    686 private:
    687     const char *inputDir;
    688     const char *outputDir;
    689 };
    690 
    691 GenrbImporter::~GenrbImporter() {}
    692 
    693 void
    694 GenrbImporter::getRules(
    695         const char *localeID, const char *collationType,
    696         UnicodeString &rules,
    697         const char *& /*errorReason*/, UErrorCode &errorCode) {
    698     CharString filename(localeID, errorCode);
    699     for(int32_t i = 0; i < filename.length(); i++){
    700         if(filename[i] == '-'){
    701             filename.data()[i] = '_';
    702         }
    703     }
    704     filename.append(".txt", errorCode);
    705     if (U_FAILURE(errorCode)) {
    706         return;
    707     }
    708     CharString inputDirBuf;
    709     CharString openFileName;
    710     if(inputDir == NULL) {
    711         const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
    712         if (filenameBegin != NULL) {
    713             /*
    714              * When a filename ../../../data/root.txt is specified,
    715              * we presume that the input directory is ../../../data
    716              * This is very important when the resource file includes
    717              * another file, like UCARules.txt or thaidict.brk.
    718              */
    719             StringPiece dir = filename.toStringPiece();
    720             const char *filenameLimit = filename.data() + filename.length();
    721             dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
    722             inputDirBuf.append(dir, errorCode);
    723             inputDir = inputDirBuf.data();
    724         }
    725     }else{
    726         int32_t dirlen  = (int32_t)uprv_strlen(inputDir);
    727 
    728         if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
    729             /*
    730              * append the input dir to openFileName if the first char in
    731              * filename is not file separator char and the last char input directory is  not '.'.
    732              * This is to support :
    733              * genrb -s. /home/icu/data
    734              * genrb -s. icu/data
    735              * The user cannot mix notations like
    736              * genrb -s. /icu/data --- the absolute path specified. -s redundant
    737              * user should use
    738              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
    739              */
    740             openFileName.append(inputDir, dirlen, errorCode);
    741             if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
    742                 openFileName.append(U_FILE_SEP_CHAR, errorCode);
    743             }
    744         }
    745     }
    746     openFileName.append(filename, errorCode);
    747     if(U_FAILURE(errorCode)) {
    748         return;
    749     }
    750     // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
    751     const char* cp = "";
    752     LocalUCHARBUFPointer ucbuf(
    753             ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
    754     if(errorCode == U_FILE_ACCESS_ERROR) {
    755         fprintf(stderr, "couldn't open file %s\n", openFileName.data());
    756         return;
    757     }
    758     if (ucbuf.isNull() || U_FAILURE(errorCode)) {
    759         fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
    760         return;
    761     }
    762 
    763     /* Parse the data into an SRBRoot */
    764     struct SRBRoot *data =
    765             parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode);
    766     if (U_FAILURE(errorCode)) {
    767         return;
    768     }
    769 
    770     struct SResource *root = data->fRoot;
    771     struct SResource *collations = resLookup(root, "collations");
    772     if (collations != NULL) {
    773       struct SResource *collation = resLookup(collations, collationType);
    774       if (collation != NULL) {
    775         struct SResource *sequence = resLookup(collation, "Sequence");
    776         if (sequence != NULL && sequence->isString()) {
    777           // No string pointer aliasing so that we need not hold onto the resource bundle.
    778           StringResource *sr = static_cast<StringResource *>(sequence);
    779           rules = sr->fString;
    780         }
    781       }
    782     }
    783 }
    784 
    785 // Quick-and-dirty escaping function.
    786 // Assumes that we are on an ASCII-based platform.
    787 static void
    788 escape(const UChar *s, char *buffer) {
    789     int32_t length = u_strlen(s);
    790     int32_t i = 0;
    791     for (;;) {
    792         UChar32 c;
    793         U16_NEXT(s, i, length, c);
    794         if (c == 0) {
    795             *buffer = 0;
    796             return;
    797         } else if (0x20 <= c && c <= 0x7e) {
    798             // printable ASCII
    799             *buffer++ = (char)c;  // assumes ASCII-based platform
    800         } else {
    801             buffer += sprintf(buffer, "\\u%04X", (int)c);
    802         }
    803     }
    804 }
    805 
    806 }  // namespace
    807 
    808 #endif  // !UCONFIG_NO_COLLATION
    809 
    810 static TableResource *
    811 addCollation(ParseState* state, TableResource  *result, const char *collationType,
    812              uint32_t startline, UErrorCode *status)
    813 {
    814     // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
    815     struct SResource  *member = NULL;
    816     struct UString    *tokenValue;
    817     struct UString     comment;
    818     enum   ETokenType  token;
    819     char               subtag[1024];
    820     UnicodeString      rules;
    821     UBool              haveRules = FALSE;
    822     UVersionInfo       version;
    823     uint32_t           line;
    824 
    825     /* '{' . (name resource)* '}' */
    826     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
    827 
    828     for (;;)
    829     {
    830         ustr_init(&comment);
    831         token = getToken(state, &tokenValue, &comment, &line, status);
    832 
    833         if (token == TOK_CLOSE_BRACE)
    834         {
    835             break;
    836         }
    837 
    838         if (token != TOK_STRING)
    839         {
    840             res_close(result);
    841             *status = U_INVALID_FORMAT_ERROR;
    842 
    843             if (token == TOK_EOF)
    844             {
    845                 error(startline, "unterminated table");
    846             }
    847             else
    848             {
    849                 error(line, "Unexpected token %s", tokenNames[token]);
    850             }
    851 
    852             return NULL;
    853         }
    854 
    855         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
    856 
    857         if (U_FAILURE(*status))
    858         {
    859             res_close(result);
    860             return NULL;
    861         }
    862 
    863         member = parseResource(state, subtag, NULL, status);
    864 
    865         if (U_FAILURE(*status))
    866         {
    867             res_close(result);
    868             return NULL;
    869         }
    870         if (result == NULL)
    871         {
    872             // Ignore the parsed resources, continue parsing.
    873         }
    874         else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
    875         {
    876             StringResource *sr = static_cast<StringResource *>(member);
    877             char     ver[40];
    878             int32_t length = sr->length();
    879 
    880             if (length >= UPRV_LENGTHOF(ver))
    881             {
    882                 length = UPRV_LENGTHOF(ver) - 1;
    883             }
    884 
    885             sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
    886             u_versionFromString(version, ver);
    887 
    888             result->add(member, line, *status);
    889             member = NULL;
    890         }
    891         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
    892         {
    893             /* discard duplicate %%CollationBin if any*/
    894         }
    895         else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
    896         {
    897             StringResource *sr = static_cast<StringResource *>(member);
    898             rules = sr->fString;
    899             haveRules = TRUE;
    900             // Defer building the collator until we have seen
    901             // all sub-elements of the collation table, including the Version.
    902             /* in order to achieve smaller data files, we can direct genrb */
    903             /* to omit collation rules */
    904             if(!state->omitCollationRules) {
    905                 result->add(member, line, *status);
    906                 member = NULL;
    907             }
    908         }
    909         else  // Just copy non-special items.
    910         {
    911             result->add(member, line, *status);
    912             member = NULL;
    913         }
    914         res_close(member);  // TODO: use LocalPointer
    915         if (U_FAILURE(*status))
    916         {
    917             res_close(result);
    918             return NULL;
    919         }
    920     }
    921 
    922     if (!haveRules) { return result; }
    923 
    924 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
    925     warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
    926     (void)collationType;
    927 #else
    928     // CLDR ticket #3949, ICU ticket #8082:
    929     // Do not build collation binary data for for-import-only "private" collation rule strings.
    930     if (uprv_strncmp(collationType, "private-", 8) == 0) {
    931         if(isVerbose()) {
    932             printf("Not building %s~%s collation binary\n", state->filename, collationType);
    933         }
    934         return result;
    935     }
    936 
    937     if(!state->makeBinaryCollation) {
    938         if(isVerbose()) {
    939             printf("Not building %s~%s collation binary\n", state->filename, collationType);
    940         }
    941         return result;
    942     }
    943     UErrorCode intStatus = U_ZERO_ERROR;
    944     UParseError parseError;
    945     uprv_memset(&parseError, 0, sizeof(parseError));
    946     GenrbImporter importer(state->inputdir, state->outputdir);
    947     const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
    948     if(U_FAILURE(intStatus)) {
    949         error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
    950         res_close(result);
    951         return NULL;  // TODO: use LocalUResourceBundlePointer for result
    952     }
    953     icu::CollationBuilder builder(base, intStatus);
    954     if(uprv_strncmp(collationType, "search", 6) == 0) {
    955         builder.disableFastLatin();  // build fast-Latin table unless search collator
    956     }
    957     LocalPointer<icu::CollationTailoring> t(
    958             builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
    959     if(U_FAILURE(intStatus)) {
    960         const char *reason = builder.getErrorReason();
    961         if(reason == NULL) { reason = ""; }
    962         error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s",
    963                 state->filename, collationType,
    964                 (long)parseError.offset, u_errorName(intStatus), reason);
    965         if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
    966             // Print pre- and post-context.
    967             char preBuffer[100], postBuffer[100];
    968             escape(parseError.preContext, preBuffer);
    969             escape(parseError.postContext, postBuffer);
    970             error(line, "  error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
    971         }
    972         if(isStrict() || t.isNull()) {
    973             *status = intStatus;
    974             res_close(result);
    975             return NULL;
    976         }
    977     }
    978     icu::LocalMemory<uint8_t> buffer;
    979     int32_t capacity = 100000;
    980     uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
    981     if(dest == NULL) {
    982         fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
    983                 (long)capacity);
    984         *status = U_MEMORY_ALLOCATION_ERROR;
    985         res_close(result);
    986         return NULL;
    987     }
    988     int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
    989     int32_t totalSize = icu::CollationDataWriter::writeTailoring(
    990             *t, *t->settings, indexes, dest, capacity, intStatus);
    991     if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
    992         intStatus = U_ZERO_ERROR;
    993         capacity = totalSize;
    994         dest = buffer.allocateInsteadAndCopy(capacity);
    995         if(dest == NULL) {
    996             fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
    997                     (long)capacity);
    998             *status = U_MEMORY_ALLOCATION_ERROR;
    999             res_close(result);
   1000             return NULL;
   1001         }
   1002         totalSize = icu::CollationDataWriter::writeTailoring(
   1003                 *t, *t->settings, indexes, dest, capacity, intStatus);
   1004     }
   1005     if(U_FAILURE(intStatus)) {
   1006         fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
   1007                 u_errorName(intStatus));
   1008         res_close(result);
   1009         return NULL;
   1010     }
   1011     if(isVerbose()) {
   1012         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
   1013         icu::CollationInfo::printSizes(totalSize, indexes);
   1014         if(t->settings->hasReordering()) {
   1015             printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
   1016             icu::CollationInfo::printReorderRanges(
   1017                     *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
   1018         }
   1019     }
   1020     struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
   1021     result->add(collationBin, line, *status);
   1022     if (U_FAILURE(*status)) {
   1023         res_close(result);
   1024         return NULL;
   1025     }
   1026 #endif
   1027     return result;
   1028 }
   1029 
   1030 static UBool
   1031 keepCollationType(const char *type) {  // android-changed
   1032     // BEGIN android-added
   1033     if (uprv_strcmp(type, "big5han") == 0) { return FALSE; }
   1034     if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; }
   1035     // END android-added
   1036     return TRUE;
   1037 }
   1038 
   1039 static struct SResource *
   1040 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
   1041 {
   1042     TableResource  *result = NULL;
   1043     struct SResource  *member = NULL;
   1044     struct UString    *tokenValue;
   1045     struct UString     comment;
   1046     enum   ETokenType  token;
   1047     char               subtag[1024], typeKeyword[1024];
   1048     uint32_t           line;
   1049 
   1050     result = table_open(state->bundle, tag, NULL, status);
   1051 
   1052     if (result == NULL || U_FAILURE(*status))
   1053     {
   1054         return NULL;
   1055     }
   1056     if(isVerbose()){
   1057         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1058     }
   1059     if(!newCollation) {
   1060         return addCollation(state, result, "(no type)", startline, status);
   1061     }
   1062     else {
   1063         for(;;) {
   1064             ustr_init(&comment);
   1065             token = getToken(state, &tokenValue, &comment, &line, status);
   1066 
   1067             if (token == TOK_CLOSE_BRACE)
   1068             {
   1069                 return result;
   1070             }
   1071 
   1072             if (token != TOK_STRING)
   1073             {
   1074                 res_close(result);
   1075                 *status = U_INVALID_FORMAT_ERROR;
   1076 
   1077                 if (token == TOK_EOF)
   1078                 {
   1079                     error(startline, "unterminated table");
   1080                 }
   1081                 else
   1082                 {
   1083                     error(line, "Unexpected token %s", tokenNames[token]);
   1084                 }
   1085 
   1086                 return NULL;
   1087             }
   1088 
   1089             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
   1090 
   1091             if (U_FAILURE(*status))
   1092             {
   1093                 res_close(result);
   1094                 return NULL;
   1095             }
   1096 
   1097             if (uprv_strcmp(subtag, "default") == 0)
   1098             {
   1099                 member = parseResource(state, subtag, NULL, status);
   1100 
   1101                 if (U_FAILURE(*status))
   1102                 {
   1103                     res_close(result);
   1104                     return NULL;
   1105                 }
   1106 
   1107                 result->add(member, line, *status);
   1108             }
   1109             else
   1110             {
   1111                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
   1112                 /* this probably needs to be refactored or recursively use the parser */
   1113                 /* first we assume that our collation table won't have the explicit type */
   1114                 /* then, we cannot handle aliases */
   1115                 if(token == TOK_OPEN_BRACE) {
   1116                     token = getToken(state, &tokenValue, &comment, &line, status);
   1117                     TableResource *collationRes;
   1118                     if (keepCollationType(subtag)) {
   1119                         collationRes = table_open(state->bundle, subtag, NULL, status);
   1120                     } else {
   1121                         collationRes = NULL;
   1122                     }
   1123                     // need to parse the collation data regardless
   1124                     collationRes = addCollation(state, collationRes, subtag, startline, status);
   1125                     if (collationRes != NULL) {
   1126                         result->add(collationRes, startline, *status);
   1127                     }
   1128                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
   1129                     /* we could have a table too */
   1130                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
   1131                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
   1132                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
   1133                         member = parseResource(state, subtag, NULL, status);
   1134                         if (U_FAILURE(*status))
   1135                         {
   1136                             res_close(result);
   1137                             return NULL;
   1138                         }
   1139 
   1140                         result->add(member, line, *status);
   1141                     } else {
   1142                         res_close(result);
   1143                         *status = U_INVALID_FORMAT_ERROR;
   1144                         return NULL;
   1145                     }
   1146                 } else {
   1147                     res_close(result);
   1148                     *status = U_INVALID_FORMAT_ERROR;
   1149                     return NULL;
   1150                 }
   1151             }
   1152 
   1153             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
   1154 
   1155             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
   1156 
   1157             if (U_FAILURE(*status))
   1158             {
   1159                 res_close(result);
   1160                 return NULL;
   1161             }
   1162         }
   1163     }
   1164 }
   1165 
   1166 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
   1167    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
   1168 static struct SResource *
   1169 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
   1170 {
   1171     struct SResource  *member = NULL;
   1172     struct UString    *tokenValue=NULL;
   1173     struct UString    comment;
   1174     enum   ETokenType token;
   1175     char              subtag[1024];
   1176     uint32_t          line;
   1177     UBool             readToken = FALSE;
   1178 
   1179     /* '{' . (name resource)* '}' */
   1180 
   1181     if(isVerbose()){
   1182         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
   1183     }
   1184     for (;;)
   1185     {
   1186         ustr_init(&comment);
   1187         token = getToken(state, &tokenValue, &comment, &line, status);
   1188 
   1189         if (token == TOK_CLOSE_BRACE)
   1190         {
   1191             if (!readToken) {
   1192                 warning(startline, "Encountered empty table");
   1193             }
   1194             return table;
   1195         }
   1196 
   1197         if (token != TOK_STRING)
   1198         {
   1199             *status = U_INVALID_FORMAT_ERROR;
   1200 
   1201             if (token == TOK_EOF)
   1202             {
   1203                 error(startline, "unterminated table");
   1204             }
   1205             else
   1206             {
   1207                 error(line, "unexpected token %s", tokenNames[token]);
   1208             }
   1209 
   1210             return NULL;
   1211         }
   1212 
   1213         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
   1214             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
   1215         } else {
   1216             *status = U_INVALID_FORMAT_ERROR;
   1217             error(line, "invariant characters required for table keys");
   1218             return NULL;
   1219         }
   1220 
   1221         if (U_FAILURE(*status))
   1222         {
   1223             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
   1224             return NULL;
   1225         }
   1226 
   1227         member = parseResource(state, subtag, &comment, status);
   1228 
   1229         if (member == NULL || U_FAILURE(*status))
   1230         {
   1231             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
   1232             return NULL;
   1233         }
   1234 
   1235         table->add(member, line, *status);
   1236 
   1237         if (U_FAILURE(*status))
   1238         {
   1239             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
   1240             return NULL;
   1241         }
   1242         readToken = TRUE;
   1243         ustr_deinit(&comment);
   1244    }
   1245 
   1246     /* not reached */
   1247     /* A compiler warning will appear if all paths don't contain a return statement. */
   1248 /*     *status = U_INTERNAL_PROGRAM_ERROR;
   1249      return NULL;*/
   1250 }
   1251 
   1252 static struct SResource *
   1253 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1254 {
   1255     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
   1256     {
   1257         return parseCollationElements(state, tag, startline, FALSE, status);
   1258     }
   1259     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
   1260     {
   1261         return parseCollationElements(state, tag, startline, TRUE, status);
   1262     }
   1263     if(isVerbose()){
   1264         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1265     }
   1266 
   1267     TableResource *result = table_open(state->bundle, tag, comment, status);
   1268 
   1269     if (result == NULL || U_FAILURE(*status))
   1270     {
   1271         return NULL;
   1272     }
   1273     return realParseTable(state, result, tag, startline,  status);
   1274 }
   1275 
   1276 static struct SResource *
   1277 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1278 {
   1279     struct SResource  *member = NULL;
   1280     struct UString    *tokenValue;
   1281     struct UString    memberComments;
   1282     enum   ETokenType token;
   1283     UBool             readToken = FALSE;
   1284 
   1285     ArrayResource  *result = array_open(state->bundle, tag, comment, status);
   1286 
   1287     if (result == NULL || U_FAILURE(*status))
   1288     {
   1289         return NULL;
   1290     }
   1291     if(isVerbose()){
   1292         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1293     }
   1294 
   1295     ustr_init(&memberComments);
   1296 
   1297     /* '{' . resource [','] '}' */
   1298     for (;;)
   1299     {
   1300         /* reset length */
   1301         ustr_setlen(&memberComments, 0, status);
   1302 
   1303         /* check for end of array, but don't consume next token unless it really is the end */
   1304         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
   1305 
   1306 
   1307         if (token == TOK_CLOSE_BRACE)
   1308         {
   1309             getToken(state, NULL, NULL, NULL, status);
   1310             if (!readToken) {
   1311                 warning(startline, "Encountered empty array");
   1312             }
   1313             break;
   1314         }
   1315 
   1316         if (token == TOK_EOF)
   1317         {
   1318             res_close(result);
   1319             *status = U_INVALID_FORMAT_ERROR;
   1320             error(startline, "unterminated array");
   1321             return NULL;
   1322         }
   1323 
   1324         /* string arrays are a special case */
   1325         if (token == TOK_STRING)
   1326         {
   1327             getToken(state, &tokenValue, &memberComments, NULL, status);
   1328             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
   1329         }
   1330         else
   1331         {
   1332             member = parseResource(state, NULL, &memberComments, status);
   1333         }
   1334 
   1335         if (member == NULL || U_FAILURE(*status))
   1336         {
   1337             res_close(result);
   1338             return NULL;
   1339         }
   1340 
   1341         result->add(member);
   1342 
   1343         /* eat optional comma if present */
   1344         token = peekToken(state, 0, NULL, NULL, NULL, status);
   1345 
   1346         if (token == TOK_COMMA)
   1347         {
   1348             getToken(state, NULL, NULL, NULL, status);
   1349         }
   1350 
   1351         if (U_FAILURE(*status))
   1352         {
   1353             res_close(result);
   1354             return NULL;
   1355         }
   1356         readToken = TRUE;
   1357     }
   1358 
   1359     ustr_deinit(&memberComments);
   1360     return result;
   1361 }
   1362 
   1363 static struct SResource *
   1364 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1365 {
   1366     enum   ETokenType  token;
   1367     char              *string;
   1368     int32_t            value;
   1369     UBool              readToken = FALSE;
   1370     char              *stopstring;
   1371     uint32_t           len;
   1372     struct UString     memberComments;
   1373 
   1374     IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
   1375 
   1376     if (result == NULL || U_FAILURE(*status))
   1377     {
   1378         return NULL;
   1379     }
   1380 
   1381     if(isVerbose()){
   1382         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1383     }
   1384     ustr_init(&memberComments);
   1385     /* '{' . string [','] '}' */
   1386     for (;;)
   1387     {
   1388         ustr_setlen(&memberComments, 0, status);
   1389 
   1390         /* check for end of array, but don't consume next token unless it really is the end */
   1391         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
   1392 
   1393         if (token == TOK_CLOSE_BRACE)
   1394         {
   1395             /* it's the end, consume the close brace */
   1396             getToken(state, NULL, NULL, NULL, status);
   1397             if (!readToken) {
   1398                 warning(startline, "Encountered empty int vector");
   1399             }
   1400             ustr_deinit(&memberComments);
   1401             return result;
   1402         }
   1403 
   1404         string = getInvariantString(state, NULL, NULL, status);
   1405 
   1406         if (U_FAILURE(*status))
   1407         {
   1408             res_close(result);
   1409             return NULL;
   1410         }
   1411 
   1412         /* For handling illegal char in the Intvector */
   1413         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
   1414         len=(uint32_t)(stopstring-string);
   1415 
   1416         if(len==uprv_strlen(string))
   1417         {
   1418             result->add(value, *status);
   1419             uprv_free(string);
   1420             token = peekToken(state, 0, NULL, NULL, NULL, status);
   1421         }
   1422         else
   1423         {
   1424             uprv_free(string);
   1425             *status=U_INVALID_CHAR_FOUND;
   1426         }
   1427 
   1428         if (U_FAILURE(*status))
   1429         {
   1430             res_close(result);
   1431             return NULL;
   1432         }
   1433 
   1434         /* the comma is optional (even though it is required to prevent the reader from concatenating
   1435         consecutive entries) so that a missing comma on the last entry isn't an error */
   1436         if (token == TOK_COMMA)
   1437         {
   1438             getToken(state, NULL, NULL, NULL, status);
   1439         }
   1440         readToken = TRUE;
   1441     }
   1442 
   1443     /* not reached */
   1444     /* A compiler warning will appear if all paths don't contain a return statement. */
   1445 /*    intvector_close(result, status);
   1446     *status = U_INTERNAL_PROGRAM_ERROR;
   1447     return NULL;*/
   1448 }
   1449 
   1450 static struct SResource *
   1451 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1452 {
   1453     uint32_t line;
   1454     LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
   1455     if (string.isNull() || U_FAILURE(*status))
   1456     {
   1457         return NULL;
   1458     }
   1459 
   1460     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1461     if (U_FAILURE(*status))
   1462     {
   1463         return NULL;
   1464     }
   1465 
   1466     if(isVerbose()){
   1467         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1468     }
   1469 
   1470     uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
   1471     if (count > 0){
   1472         if((count % 2)==0){
   1473             LocalMemory<uint8_t> value;
   1474             if (value.allocateInsteadAndCopy(count) == NULL)
   1475             {
   1476                 *status = U_MEMORY_ALLOCATION_ERROR;
   1477                 return NULL;
   1478             }
   1479 
   1480             char toConv[3] = {'\0', '\0', '\0'};
   1481             for (uint32_t i = 0; i < count; i += 2)
   1482             {
   1483                 toConv[0] = string[i];
   1484                 toConv[1] = string[i + 1];
   1485 
   1486                 char *stopstring;
   1487                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
   1488                 uint32_t len=(uint32_t)(stopstring-toConv);
   1489 
   1490                 if(len!=2)
   1491                 {
   1492                     *status=U_INVALID_CHAR_FOUND;
   1493                     return NULL;
   1494                 }
   1495             }
   1496 
   1497             return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
   1498         }
   1499         else
   1500         {
   1501             *status = U_INVALID_CHAR_FOUND;
   1502             error(line, "Encountered invalid binary value (length is odd)");
   1503             return NULL;
   1504         }
   1505     }
   1506     else
   1507     {
   1508         warning(startline, "Encountered empty binary value");
   1509         return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
   1510     }
   1511 }
   1512 
   1513 static struct SResource *
   1514 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1515 {
   1516     struct SResource *result = NULL;
   1517     int32_t           value;
   1518     char             *string;
   1519     char             *stopstring;
   1520     uint32_t          len;
   1521 
   1522     string = getInvariantString(state, NULL, NULL, status);
   1523 
   1524     if (string == NULL || U_FAILURE(*status))
   1525     {
   1526         return NULL;
   1527     }
   1528 
   1529     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1530 
   1531     if (U_FAILURE(*status))
   1532     {
   1533         uprv_free(string);
   1534         return NULL;
   1535     }
   1536 
   1537     if(isVerbose()){
   1538         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1539     }
   1540 
   1541     if (uprv_strlen(string) <= 0)
   1542     {
   1543         warning(startline, "Encountered empty integer. Default value is 0.");
   1544     }
   1545 
   1546     /* Allow integer support for hexdecimal, octal digit and decimal*/
   1547     /* and handle illegal char in the integer*/
   1548     value = uprv_strtoul(string, &stopstring, 0);
   1549     len=(uint32_t)(stopstring-string);
   1550     if(len==uprv_strlen(string))
   1551     {
   1552         result = int_open(state->bundle, tag, value, comment, status);
   1553     }
   1554     else
   1555     {
   1556         *status=U_INVALID_CHAR_FOUND;
   1557     }
   1558     uprv_free(string);
   1559 
   1560     return result;
   1561 }
   1562 
   1563 static struct SResource *
   1564 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1565 {
   1566     uint32_t          line;
   1567     LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
   1568     if (U_FAILURE(*status))
   1569     {
   1570         return NULL;
   1571     }
   1572 
   1573     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1574 
   1575     if (U_FAILURE(*status))
   1576     {
   1577         return NULL;
   1578     }
   1579 
   1580     if(isVerbose()){
   1581         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1582     }
   1583 
   1584     /* Open the input file for reading */
   1585     CharString fullname;
   1586     if (state->inputdir != NULL) {
   1587         fullname.append(state->inputdir, *status);
   1588     }
   1589     fullname.appendPathPart(filename.getAlias(), *status);
   1590     if (U_FAILURE(*status)) {
   1591         return NULL;
   1592     }
   1593 
   1594     FileStream *file = T_FileStream_open(fullname.data(), "rb");
   1595     if (file == NULL)
   1596     {
   1597         error(line, "couldn't open input file %s", filename.getAlias());
   1598         *status = U_FILE_ACCESS_ERROR;
   1599         return NULL;
   1600     }
   1601 
   1602     int32_t len  = T_FileStream_size(file);
   1603     LocalMemory<uint8_t> data;
   1604     if(data.allocateInsteadAndCopy(len) == NULL)
   1605     {
   1606         *status = U_MEMORY_ALLOCATION_ERROR;
   1607         T_FileStream_close (file);
   1608         return NULL;
   1609     }
   1610 
   1611     /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
   1612     T_FileStream_close (file);
   1613 
   1614     return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
   1615 }
   1616 
   1617 static struct SResource *
   1618 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1619 {
   1620     struct SResource *result;
   1621     int32_t           len=0;
   1622     char             *filename;
   1623     uint32_t          line;
   1624     UChar *pTarget     = NULL;
   1625 
   1626     UCHARBUF *ucbuf;
   1627     char     *fullname = NULL;
   1628     int32_t  count     = 0;
   1629     const char* cp = NULL;
   1630     const UChar* uBuffer = NULL;
   1631 
   1632     filename = getInvariantString(state, &line, NULL, status);
   1633     count     = (int32_t)uprv_strlen(filename);
   1634 
   1635     if (U_FAILURE(*status))
   1636     {
   1637         return NULL;
   1638     }
   1639 
   1640     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1641 
   1642     if (U_FAILURE(*status))
   1643     {
   1644         uprv_free(filename);
   1645         return NULL;
   1646     }
   1647 
   1648     if(isVerbose()){
   1649         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1650     }
   1651 
   1652     fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
   1653     /* test for NULL */
   1654     if(fullname == NULL)
   1655     {
   1656         *status = U_MEMORY_ALLOCATION_ERROR;
   1657         uprv_free(filename);
   1658         return NULL;
   1659     }
   1660 
   1661     if(state->inputdir!=NULL){
   1662         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
   1663         {
   1664 
   1665             uprv_strcpy(fullname, state->inputdir);
   1666 
   1667             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
   1668             fullname[state->inputdirLength + 1] = '\0';
   1669 
   1670             uprv_strcat(fullname, filename);
   1671         }
   1672         else
   1673         {
   1674             uprv_strcpy(fullname, state->inputdir);
   1675             uprv_strcat(fullname, filename);
   1676         }
   1677     }else{
   1678         uprv_strcpy(fullname,filename);
   1679     }
   1680 
   1681     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
   1682 
   1683     if (U_FAILURE(*status)) {
   1684         error(line, "couldn't open input file %s\n", filename);
   1685         return NULL;
   1686     }
   1687 
   1688     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
   1689     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
   1690 
   1691     ucbuf_close(ucbuf);
   1692 
   1693     uprv_free(pTarget);
   1694 
   1695     uprv_free(filename);
   1696     uprv_free(fullname);
   1697 
   1698     return result;
   1699 }
   1700 
   1701 
   1702 
   1703 
   1704 
   1705 U_STRING_DECL(k_type_string,    "string",    6);
   1706 U_STRING_DECL(k_type_binary,    "binary",    6);
   1707 U_STRING_DECL(k_type_bin,       "bin",       3);
   1708 U_STRING_DECL(k_type_table,     "table",     5);
   1709 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
   1710 U_STRING_DECL(k_type_int,       "int",       3);
   1711 U_STRING_DECL(k_type_integer,   "integer",   7);
   1712 U_STRING_DECL(k_type_array,     "array",     5);
   1713 U_STRING_DECL(k_type_alias,     "alias",     5);
   1714 U_STRING_DECL(k_type_intvector, "intvector", 9);
   1715 U_STRING_DECL(k_type_import,    "import",    6);
   1716 U_STRING_DECL(k_type_include,   "include",   7);
   1717 
   1718 /* Various non-standard processing plugins that create one or more special resources. */
   1719 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1720 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
   1721 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
   1722 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
   1723 
   1724 typedef enum EResourceType
   1725 {
   1726     RESTYPE_UNKNOWN,
   1727     RESTYPE_STRING,
   1728     RESTYPE_BINARY,
   1729     RESTYPE_TABLE,
   1730     RESTYPE_TABLE_NO_FALLBACK,
   1731     RESTYPE_INTEGER,
   1732     RESTYPE_ARRAY,
   1733     RESTYPE_ALIAS,
   1734     RESTYPE_INTVECTOR,
   1735     RESTYPE_IMPORT,
   1736     RESTYPE_INCLUDE,
   1737     RESTYPE_PROCESS_UCA_RULES,
   1738     RESTYPE_PROCESS_COLLATION,
   1739     RESTYPE_PROCESS_TRANSLITERATOR,
   1740     RESTYPE_PROCESS_DEPENDENCY,
   1741     RESTYPE_RESERVED
   1742 } EResourceType;
   1743 
   1744 static struct {
   1745     const char *nameChars;   /* only used for debugging */
   1746     const UChar *nameUChars;
   1747     ParseResourceFunction *parseFunction;
   1748 } gResourceTypes[] = {
   1749     {"Unknown", NULL, NULL},
   1750     {"string", k_type_string, parseString},
   1751     {"binary", k_type_binary, parseBinary},
   1752     {"table", k_type_table, parseTable},
   1753     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
   1754     {"integer", k_type_integer, parseInteger},
   1755     {"array", k_type_array, parseArray},
   1756     {"alias", k_type_alias, parseAlias},
   1757     {"intvector", k_type_intvector, parseIntVector},
   1758     {"import", k_type_import, parseImport},
   1759     {"include", k_type_include, parseInclude},
   1760     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
   1761     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
   1762     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
   1763     {"process(dependency)", k_type_plugin_dependency, parseDependency},
   1764     {"reserved", NULL, NULL}
   1765 };
   1766 
   1767 void initParser()
   1768 {
   1769     U_STRING_INIT(k_type_string,    "string",    6);
   1770     U_STRING_INIT(k_type_binary,    "binary",    6);
   1771     U_STRING_INIT(k_type_bin,       "bin",       3);
   1772     U_STRING_INIT(k_type_table,     "table",     5);
   1773     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
   1774     U_STRING_INIT(k_type_int,       "int",       3);
   1775     U_STRING_INIT(k_type_integer,   "integer",   7);
   1776     U_STRING_INIT(k_type_array,     "array",     5);
   1777     U_STRING_INIT(k_type_alias,     "alias",     5);
   1778     U_STRING_INIT(k_type_intvector, "intvector", 9);
   1779     U_STRING_INIT(k_type_import,    "import",    6);
   1780     U_STRING_INIT(k_type_include,   "include",   7);
   1781 
   1782     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1783     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
   1784     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
   1785     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
   1786 }
   1787 
   1788 static inline UBool isTable(enum EResourceType type) {
   1789     return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
   1790 }
   1791 
   1792 static enum EResourceType
   1793 parseResourceType(ParseState* state, UErrorCode *status)
   1794 {
   1795     struct UString        *tokenValue;
   1796     struct UString        comment;
   1797     enum   EResourceType  result = RESTYPE_UNKNOWN;
   1798     uint32_t              line=0;
   1799     ustr_init(&comment);
   1800     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
   1801 
   1802     if (U_FAILURE(*status))
   1803     {
   1804         return RESTYPE_UNKNOWN;
   1805     }
   1806 
   1807     *status = U_ZERO_ERROR;
   1808 
   1809     /* Search for normal types */
   1810     result=RESTYPE_UNKNOWN;
   1811     while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
   1812         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
   1813             break;
   1814         }
   1815     }
   1816     /* Now search for the aliases */
   1817     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
   1818         result = RESTYPE_INTEGER;
   1819     }
   1820     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
   1821         result = RESTYPE_BINARY;
   1822     }
   1823     else if (result == RESTYPE_RESERVED) {
   1824         char tokenBuffer[1024];
   1825         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
   1826         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
   1827         *status = U_INVALID_FORMAT_ERROR;
   1828         error(line, "unknown resource type '%s'", tokenBuffer);
   1829     }
   1830 
   1831     return result;
   1832 }
   1833 
   1834 /* parse a non-top-level resource */
   1835 static struct SResource *
   1836 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
   1837 {
   1838     enum   ETokenType      token;
   1839     enum   EResourceType  resType = RESTYPE_UNKNOWN;
   1840     ParseResourceFunction *parseFunction = NULL;
   1841     struct UString        *tokenValue;
   1842     uint32_t                 startline;
   1843     uint32_t                 line;
   1844 
   1845 
   1846     token = getToken(state, &tokenValue, NULL, &startline, status);
   1847 
   1848     if(isVerbose()){
   1849         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1850     }
   1851 
   1852     /* name . [ ':' type ] '{' resource '}' */
   1853     /* This function parses from the colon onwards.  If the colon is present, parse the
   1854     type then try to parse a resource of that type.  If there is no explicit type,
   1855     work it out using the lookahead tokens. */
   1856     switch (token)
   1857     {
   1858     case TOK_EOF:
   1859         *status = U_INVALID_FORMAT_ERROR;
   1860         error(startline, "Unexpected EOF encountered");
   1861         return NULL;
   1862 
   1863     case TOK_ERROR:
   1864         *status = U_INVALID_FORMAT_ERROR;
   1865         return NULL;
   1866 
   1867     case TOK_COLON:
   1868         resType = parseResourceType(state, status);
   1869         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
   1870 
   1871         if (U_FAILURE(*status))
   1872         {
   1873             return NULL;
   1874         }
   1875 
   1876         break;
   1877 
   1878     case TOK_OPEN_BRACE:
   1879         break;
   1880 
   1881     default:
   1882         *status = U_INVALID_FORMAT_ERROR;
   1883         error(startline, "syntax error while reading a resource, expected '{' or ':'");
   1884         return NULL;
   1885     }
   1886 
   1887 
   1888     if (resType == RESTYPE_UNKNOWN)
   1889     {
   1890         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
   1891         We could have any of the following:
   1892         { {         => array (nested)
   1893         { :/}       => array
   1894         { string ,  => string array
   1895 
   1896         { string {  => table
   1897 
   1898         { string :/{    => table
   1899         { string }      => string
   1900         */
   1901 
   1902         token = peekToken(state, 0, NULL, &line, NULL,status);
   1903 
   1904         if (U_FAILURE(*status))
   1905         {
   1906             return NULL;
   1907         }
   1908 
   1909         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
   1910         {
   1911             resType = RESTYPE_ARRAY;
   1912         }
   1913         else if (token == TOK_STRING)
   1914         {
   1915             token = peekToken(state, 1, NULL, &line, NULL, status);
   1916 
   1917             if (U_FAILURE(*status))
   1918             {
   1919                 return NULL;
   1920             }
   1921 
   1922             switch (token)
   1923             {
   1924             case TOK_COMMA:         resType = RESTYPE_ARRAY;  break;
   1925             case TOK_OPEN_BRACE:    resType = RESTYPE_TABLE;  break;
   1926             case TOK_CLOSE_BRACE:   resType = RESTYPE_STRING; break;
   1927             case TOK_COLON:         resType = RESTYPE_TABLE;  break;
   1928             default:
   1929                 *status = U_INVALID_FORMAT_ERROR;
   1930                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
   1931                 return NULL;
   1932             }
   1933         }
   1934         else
   1935         {
   1936             *status = U_INVALID_FORMAT_ERROR;
   1937             error(line, "Unexpected token after '{'");
   1938             return NULL;
   1939         }
   1940 
   1941         /* printf("Type guessed as %s\n", resourceNames[resType]); */
   1942     } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
   1943         *status = U_INVALID_FORMAT_ERROR;
   1944         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
   1945         return NULL;
   1946     }
   1947 
   1948 
   1949     /* We should now know what we need to parse next, so call the appropriate parser
   1950     function and return. */
   1951     parseFunction = gResourceTypes[resType].parseFunction;
   1952     if (parseFunction != NULL) {
   1953         return parseFunction(state, tag, startline, comment, status);
   1954     }
   1955     else {
   1956         *status = U_INTERNAL_PROGRAM_ERROR;
   1957         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
   1958     }
   1959 
   1960     return NULL;
   1961 }
   1962 
   1963 /* parse the top-level resource */
   1964 struct SRBRoot *
   1965 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
   1966       UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
   1967 {
   1968     struct UString    *tokenValue;
   1969     struct UString    comment;
   1970     uint32_t           line;
   1971     enum EResourceType bundleType;
   1972     enum ETokenType    token;
   1973     ParseState state;
   1974     uint32_t i;
   1975 
   1976 
   1977     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
   1978     {
   1979         ustr_init(&state.lookahead[i].value);
   1980         ustr_init(&state.lookahead[i].comment);
   1981     }
   1982 
   1983     initLookahead(&state, buf, status);
   1984 
   1985     state.inputdir       = inputDir;
   1986     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
   1987     state.outputdir       = outputDir;
   1988     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
   1989     state.filename = filename;
   1990     state.makeBinaryCollation = makeBinaryCollation;
   1991     state.omitCollationRules = omitCollationRules;
   1992 
   1993     ustr_init(&comment);
   1994     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
   1995 
   1996     state.bundle = new SRBRoot(&comment, FALSE, *status);
   1997 
   1998     if (state.bundle == NULL || U_FAILURE(*status))
   1999     {
   2000         return NULL;
   2001     }
   2002 
   2003 
   2004     state.bundle->setLocale(tokenValue->fChars, *status);
   2005 
   2006     /* The following code is to make Empty bundle work no matter with :table specifer or not */
   2007     token = getToken(&state, NULL, NULL, &line, status);
   2008     if(token==TOK_COLON) {
   2009         *status=U_ZERO_ERROR;
   2010         bundleType=parseResourceType(&state, status);
   2011 
   2012         if(isTable(bundleType))
   2013         {
   2014             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
   2015         }
   2016         else
   2017         {
   2018             *status=U_PARSE_ERROR;
   2019              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
   2020         }
   2021     }
   2022     else
   2023     {
   2024         /* not a colon */
   2025         if(token==TOK_OPEN_BRACE)
   2026         {
   2027             *status=U_ZERO_ERROR;
   2028             bundleType=RESTYPE_TABLE;
   2029         }
   2030         else
   2031         {
   2032             /* neither colon nor open brace */
   2033             *status=U_PARSE_ERROR;
   2034             bundleType=RESTYPE_UNKNOWN;
   2035             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
   2036         }
   2037     }
   2038 
   2039     if (U_FAILURE(*status))
   2040     {
   2041         delete state.bundle;
   2042         return NULL;
   2043     }
   2044 
   2045     if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
   2046         /*
   2047          * Parse a top-level table with the table(nofallback) declaration.
   2048          * This is the same as a regular table, but also sets the
   2049          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
   2050          */
   2051         state.bundle->fNoFallback=TRUE;
   2052     }
   2053     /* top-level tables need not handle special table names like "collations" */
   2054     assert(!state.bundle->fIsPoolBundle);
   2055     assert(state.bundle->fRoot->fType == URES_TABLE);
   2056     TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
   2057     realParseTable(&state, rootTable, NULL, line, status);
   2058     if(dependencyArray!=NULL){
   2059         rootTable->add(dependencyArray, 0, *status);
   2060         dependencyArray = NULL;
   2061     }
   2062    if (U_FAILURE(*status))
   2063     {
   2064         delete state.bundle;
   2065         res_close(dependencyArray);
   2066         return NULL;
   2067     }
   2068 
   2069     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
   2070     {
   2071         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
   2072         if(isStrict()){
   2073             *status = U_INVALID_FORMAT_ERROR;
   2074             return NULL;
   2075         }
   2076     }
   2077 
   2078     cleanupLookahead(&state);
   2079     ustr_deinit(&comment);
   2080     return state.bundle;
   2081 }
   2082