Home | History | Annotate | Download | only in genrb
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File parse.cpp
     12 *
     13 * Modification History:
     14 *
     15 *   Date          Name          Description
     16 *   05/26/99     stephen       Creation.
     17 *   02/25/00     weiv          Overhaul to write udata
     18 *   5/10/01      Ram           removed ustdio dependency
     19 *   06/10/2001  Dominic Ludlam <dom (at) recoil.org> Rewritten
     20 *******************************************************************************
     21 */
     22 
     23 // Safer use of UnicodeString.
     24 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     25 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     26 #endif
     27 
     28 // Less important, but still a good idea.
     29 #ifndef UNISTR_FROM_STRING_EXPLICIT
     30 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     31 #endif
     32 
     33 #include <assert.h>
     34 #include "parse.h"
     35 #include "errmsg.h"
     36 #include "uhash.h"
     37 #include "cmemory.h"
     38 #include "cstring.h"
     39 #include "uinvchar.h"
     40 #include "read.h"
     41 #include "ustr.h"
     42 #include "reslist.h"
     43 #include "rbt_pars.h"
     44 #include "genrb.h"
     45 #include "unicode/stringpiece.h"
     46 #include "unicode/unistr.h"
     47 #include "unicode/ustring.h"
     48 #include "unicode/uscript.h"
     49 #include "unicode/utf16.h"
     50 #include "unicode/putil.h"
     51 #include "charstr.h"
     52 #include "collationbuilder.h"
     53 #include "collationdata.h"
     54 #include "collationdatareader.h"
     55 #include "collationdatawriter.h"
     56 #include "collationfastlatinbuilder.h"
     57 #include "collationinfo.h"
     58 #include "collationroot.h"
     59 #include "collationruleparser.h"
     60 #include "collationtailoring.h"
     61 #include <stdio.h>
     62 
     63 /* Number of tokens to read ahead of the current stream position */
     64 #define MAX_LOOKAHEAD   3
     65 
     66 #define CR               0x000D
     67 #define LF               0x000A
     68 #define SPACE            0x0020
     69 #define TAB              0x0009
     70 #define ESCAPE           0x005C
     71 #define HASH             0x0023
     72 #define QUOTE            0x0027
     73 #define ZERO             0x0030
     74 #define STARTCOMMAND     0x005B
     75 #define ENDCOMMAND       0x005D
     76 #define OPENSQBRACKET    0x005B
     77 #define CLOSESQBRACKET   0x005D
     78 
     79 using icu::CharString;
     80 using icu::LocalMemory;
     81 using icu::LocalPointer;
     82 using icu::LocalUCHARBUFPointer;
     83 using icu::StringPiece;
     84 using icu::UnicodeString;
     85 
     86 struct Lookahead
     87 {
     88      enum   ETokenType type;
     89      struct UString    value;
     90      struct UString    comment;
     91      uint32_t          line;
     92 };
     93 
     94 /* keep in sync with token defines in read.h */
     95 const char *tokenNames[TOK_TOKEN_COUNT] =
     96 {
     97      "string",             /* A string token, such as "MonthNames" */
     98      "'{'",                 /* An opening brace character */
     99      "'}'",                 /* A closing brace character */
    100      "','",                 /* A comma */
    101      "':'",                 /* A colon */
    102 
    103      "<end of file>",     /* End of the file has been reached successfully */
    104      "<end of line>"
    105 };
    106 
    107 /* Just to store "TRUE" */
    108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
    109 
    110 typedef struct {
    111     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
    112     uint32_t          lookaheadPosition;
    113     UCHARBUF         *buffer;
    114     struct SRBRoot *bundle;
    115     const char     *inputdir;
    116     uint32_t        inputdirLength;
    117     const char     *outputdir;
    118     uint32_t        outputdirLength;
    119     const char     *filename;
    120     UBool           makeBinaryCollation;
    121     UBool           omitCollationRules;
    122 } ParseState;
    123 
    124 typedef struct SResource *
    125 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
    126 
    127 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
    128 
    129 /* The nature of the lookahead buffer:
    130    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
    131    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
    132    When getToken is called, the current pointer is moved to the next slot and the
    133    old slot is filled with the next token from the reader by calling getNextToken.
    134    The token values are stored in the slot, which means that token values don't
    135    survive a call to getToken, ie.
    136 
    137    UString *value;
    138 
    139    getToken(&value, NULL, status);
    140    getToken(NULL,   NULL, status);       bad - value is now a different string
    141 */
    142 static void
    143 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
    144 {
    145     static uint32_t initTypeStrings = 0;
    146     uint32_t i;
    147 
    148     if (!initTypeStrings)
    149     {
    150         initTypeStrings = 1;
    151     }
    152 
    153     state->lookaheadPosition   = 0;
    154     state->buffer              = buf;
    155 
    156     resetLineNumber();
    157 
    158     for (i = 0; i < MAX_LOOKAHEAD; i++)
    159     {
    160         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
    161         if (U_FAILURE(*status))
    162         {
    163             return;
    164         }
    165     }
    166 
    167     *status = U_ZERO_ERROR;
    168 }
    169 
    170 static void
    171 cleanupLookahead(ParseState* state)
    172 {
    173     uint32_t i;
    174     for (i = 0; i <= MAX_LOOKAHEAD; i++)
    175     {
    176         ustr_deinit(&state->lookahead[i].value);
    177         ustr_deinit(&state->lookahead[i].comment);
    178     }
    179 
    180 }
    181 
    182 static enum ETokenType
    183 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
    184 {
    185     enum ETokenType result;
    186     uint32_t          i;
    187 
    188     result = state->lookahead[state->lookaheadPosition].type;
    189 
    190     if (tokenValue != NULL)
    191     {
    192         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
    193     }
    194 
    195     if (linenumber != NULL)
    196     {
    197         *linenumber = state->lookahead[state->lookaheadPosition].line;
    198     }
    199 
    200     if (comment != NULL)
    201     {
    202         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
    203     }
    204 
    205     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
    206     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
    207     ustr_setlen(&state->lookahead[i].comment, 0, status);
    208     ustr_setlen(&state->lookahead[i].value, 0, status);
    209     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
    210 
    211     /* printf("getToken, returning %s\n", tokenNames[result]); */
    212 
    213     return result;
    214 }
    215 
    216 static enum ETokenType
    217 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
    218 {
    219     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
    220 
    221     if (U_FAILURE(*status))
    222     {
    223         return TOK_ERROR;
    224     }
    225 
    226     if (lookaheadCount >= MAX_LOOKAHEAD)
    227     {
    228         *status = U_INTERNAL_PROGRAM_ERROR;
    229         return TOK_ERROR;
    230     }
    231 
    232     if (tokenValue != NULL)
    233     {
    234         *tokenValue = &state->lookahead[i].value;
    235     }
    236 
    237     if (linenumber != NULL)
    238     {
    239         *linenumber = state->lookahead[i].line;
    240     }
    241 
    242     if(comment != NULL){
    243         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
    244     }
    245 
    246     return state->lookahead[i].type;
    247 }
    248 
    249 static void
    250 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
    251 {
    252     uint32_t        line;
    253 
    254     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
    255 
    256     if (linenumber != NULL)
    257     {
    258         *linenumber = line;
    259     }
    260 
    261     if (U_FAILURE(*status))
    262     {
    263         return;
    264     }
    265 
    266     if (token != expectedToken)
    267     {
    268         *status = U_INVALID_FORMAT_ERROR;
    269         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
    270     }
    271     else
    272     {
    273         *status = U_ZERO_ERROR;
    274     }
    275 }
    276 
    277 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
    278 {
    279     struct UString *tokenValue;
    280     char           *result;
    281     uint32_t        count;
    282 
    283     expect(state, TOK_STRING, &tokenValue, comment, line, status);
    284 
    285     if (U_FAILURE(*status))
    286     {
    287         return NULL;
    288     }
    289 
    290     count = u_strlen(tokenValue->fChars);
    291     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
    292         *status = U_INVALID_FORMAT_ERROR;
    293         error(*line, "invariant characters required for table keys, binary data, etc.");
    294         return NULL;
    295     }
    296 
    297     result = static_cast<char *>(uprv_malloc(count+1));
    298 
    299     if (result == NULL)
    300     {
    301         *status = U_MEMORY_ALLOCATION_ERROR;
    302         return NULL;
    303     }
    304 
    305     u_UCharsToChars(tokenValue->fChars, result, count+1);
    306     return result;
    307 }
    308 
    309 static struct SResource *
    310 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
    311 {
    312     struct SResource *result = NULL;
    313     struct UString   *tokenValue;
    314     FileStream       *file          = NULL;
    315     char              filename[256] = { '\0' };
    316     char              cs[128]       = { '\0' };
    317     uint32_t          line;
    318     UBool quoted = FALSE;
    319     UCHARBUF *ucbuf=NULL;
    320     UChar32   c     = 0;
    321     const char* cp  = NULL;
    322     UChar *pTarget     = NULL;
    323     UChar *target      = NULL;
    324     UChar *targetLimit = NULL;
    325     int32_t size = 0;
    326 
    327     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    328 
    329     if(isVerbose()){
    330         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    331     }
    332 
    333     if (U_FAILURE(*status))
    334     {
    335         return NULL;
    336     }
    337     /* make the filename including the directory */
    338     if (state->inputdir != NULL)
    339     {
    340         uprv_strcat(filename, state->inputdir);
    341 
    342         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
    343         {
    344             uprv_strcat(filename, U_FILE_SEP_STRING);
    345         }
    346     }
    347 
    348     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    349 
    350     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    351 
    352     if (U_FAILURE(*status))
    353     {
    354         return NULL;
    355     }
    356     uprv_strcat(filename, cs);
    357 
    358     if(state->omitCollationRules) {
    359         return res_none();
    360     }
    361 
    362     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    363 
    364     if (U_FAILURE(*status)) {
    365         error(line, "An error occurred while opening the input file %s\n", filename);
    366         return NULL;
    367     }
    368 
    369     /* We allocate more space than actually required
    370     * since the actual size needed for storing UChars
    371     * is not known in UTF-8 byte stream
    372     */
    373     size        = ucbuf_size(ucbuf) + 1;
    374     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
    375     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    376     target      = pTarget;
    377     targetLimit = pTarget+size;
    378 
    379     /* read the rules into the buffer */
    380     while (target < targetLimit)
    381     {
    382         c = ucbuf_getc(ucbuf, status);
    383         if(c == QUOTE) {
    384             quoted = (UBool)!quoted;
    385         }
    386         /* weiv (06/26/2002): adding the following:
    387          * - preserving spaces in commands [...]
    388          * - # comments until the end of line
    389          */
    390         if (c == STARTCOMMAND && !quoted)
    391         {
    392             /* preserve commands
    393              * closing bracket will be handled by the
    394              * append at the end of the loop
    395              */
    396             while(c != ENDCOMMAND) {
    397                 U_APPEND_CHAR32_ONLY(c, target);
    398                 c = ucbuf_getc(ucbuf, status);
    399             }
    400         }
    401         else if (c == HASH && !quoted) {
    402             /* skip comments */
    403             while(c != CR && c != LF) {
    404                 c = ucbuf_getc(ucbuf, status);
    405             }
    406             continue;
    407         }
    408         else if (c == ESCAPE)
    409         {
    410             c = unescape(ucbuf, status);
    411 
    412             if (c == (UChar32)U_ERR)
    413             {
    414                 uprv_free(pTarget);
    415                 T_FileStream_close(file);
    416                 return NULL;
    417             }
    418         }
    419         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
    420         {
    421             /* ignore spaces carriage returns
    422             * and line feed unless in the form \uXXXX
    423             */
    424             continue;
    425         }
    426 
    427         /* Append UChar * after dissembling if c > 0xffff*/
    428         if (c != (UChar32)U_EOF)
    429         {
    430             U_APPEND_CHAR32_ONLY(c, target);
    431         }
    432         else
    433         {
    434             break;
    435         }
    436     }
    437 
    438     /* terminate the string */
    439     if(target < targetLimit){
    440         *target = 0x0000;
    441     }
    442 
    443     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
    444 
    445 
    446     ucbuf_close(ucbuf);
    447     uprv_free(pTarget);
    448     T_FileStream_close(file);
    449 
    450     return result;
    451 }
    452 
    453 static struct SResource *
    454 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
    455 {
    456     struct SResource *result = NULL;
    457     struct UString   *tokenValue;
    458     FileStream       *file          = NULL;
    459     char              filename[256] = { '\0' };
    460     char              cs[128]       = { '\0' };
    461     uint32_t          line;
    462     UCHARBUF *ucbuf=NULL;
    463     const char* cp  = NULL;
    464     UChar *pTarget     = NULL;
    465     const UChar *pSource     = NULL;
    466     int32_t size = 0;
    467 
    468     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    469 
    470     if(isVerbose()){
    471         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    472     }
    473 
    474     if (U_FAILURE(*status))
    475     {
    476         return NULL;
    477     }
    478     /* make the filename including the directory */
    479     if (state->inputdir != NULL)
    480     {
    481         uprv_strcat(filename, state->inputdir);
    482 
    483         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
    484         {
    485             uprv_strcat(filename, U_FILE_SEP_STRING);
    486         }
    487     }
    488 
    489     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    490 
    491     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    492 
    493     if (U_FAILURE(*status))
    494     {
    495         return NULL;
    496     }
    497     uprv_strcat(filename, cs);
    498 
    499 
    500     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    501 
    502     if (U_FAILURE(*status)) {
    503         error(line, "An error occurred while opening the input file %s\n", filename);
    504         return NULL;
    505     }
    506 
    507     /* We allocate more space than actually required
    508     * since the actual size needed for storing UChars
    509     * is not known in UTF-8 byte stream
    510     */
    511     pSource = ucbuf_getBuffer(ucbuf, &size, status);
    512     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
    513     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    514 
    515 #if !UCONFIG_NO_TRANSLITERATION
    516     size = utrans_stripRules(pSource, size, pTarget, status);
    517 #else
    518     size = 0;
    519     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
    520 #endif
    521     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
    522 
    523     ucbuf_close(ucbuf);
    524     uprv_free(pTarget);
    525     T_FileStream_close(file);
    526 
    527     return result;
    528 }
    529 static ArrayResource* dependencyArray = NULL;
    530 
    531 static struct SResource *
    532 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    533 {
    534     struct SResource *result = NULL;
    535     struct SResource *elem = NULL;
    536     struct UString   *tokenValue;
    537     uint32_t          line;
    538     char              filename[256] = { '\0' };
    539     char              cs[128]       = { '\0' };
    540 
    541     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    542 
    543     if(isVerbose()){
    544         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    545     }
    546 
    547     if (U_FAILURE(*status))
    548     {
    549         return NULL;
    550     }
    551     /* make the filename including the directory */
    552     if (state->outputdir != NULL)
    553     {
    554         uprv_strcat(filename, state->outputdir);
    555 
    556         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
    557         {
    558             uprv_strcat(filename, U_FILE_SEP_STRING);
    559         }
    560     }
    561 
    562     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    563 
    564     if (U_FAILURE(*status))
    565     {
    566         return NULL;
    567     }
    568     uprv_strcat(filename, cs);
    569     if(!T_FileStream_file_exists(filename)){
    570         if(isStrict()){
    571             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    572         }else{
    573             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    574         }
    575     }
    576     if(dependencyArray==NULL){
    577         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
    578     }
    579     if(tag!=NULL){
    580         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    581     }
    582     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
    583 
    584     dependencyArray->add(elem);
    585 
    586     if (U_FAILURE(*status))
    587     {
    588         return NULL;
    589     }
    590     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    591     return result;
    592 }
    593 static struct SResource *
    594 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    595 {
    596     struct UString   *tokenValue;
    597     struct SResource *result = NULL;
    598 
    599 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
    600     {
    601         return parseUCARules(tag, startline, status);
    602     }*/
    603     if(isVerbose()){
    604         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    605     }
    606     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
    607 
    608     if (U_SUCCESS(*status))
    609     {
    610         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    611         doesn't survive expect either) */
    612 
    613         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    614         if(U_SUCCESS(*status) && result) {
    615             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    616 
    617             if (U_FAILURE(*status))
    618             {
    619                 res_close(result);
    620                 return NULL;
    621             }
    622         }
    623     }
    624 
    625     return result;
    626 }
    627 
    628 static struct SResource *
    629 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
    630 {
    631     struct UString   *tokenValue;
    632     struct SResource *result  = NULL;
    633 
    634     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
    635 
    636     if(isVerbose()){
    637         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    638     }
    639 
    640     if (U_SUCCESS(*status))
    641     {
    642         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    643         doesn't survive expect either) */
    644 
    645         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    646 
    647         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    648 
    649         if (U_FAILURE(*status))
    650         {
    651             res_close(result);
    652             return NULL;
    653         }
    654     }
    655 
    656     return result;
    657 }
    658 
    659 #if !UCONFIG_NO_COLLATION
    660 
    661 namespace {
    662 
    663 static struct SResource* resLookup(struct SResource* res, const char* key){
    664     if (res == res_none() || !res->isTable()) {
    665         return NULL;
    666     }
    667 
    668     TableResource *list = static_cast<TableResource *>(res);
    669     SResource *current = list->fFirst;
    670     while (current != NULL) {
    671         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
    672             return current;
    673         }
    674         current = current->fNext;
    675     }
    676     return NULL;
    677 }
    678 
    679 class GenrbImporter : public icu::CollationRuleParser::Importer {
    680 public:
    681     GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
    682     virtual ~GenrbImporter();
    683     virtual void getRules(
    684             const char *localeID, const char *collationType,
    685             UnicodeString &rules,
    686             const char *&errorReason, UErrorCode &errorCode);
    687 
    688 private:
    689     const char *inputDir;
    690     const char *outputDir;
    691 };
    692 
    693 GenrbImporter::~GenrbImporter() {}
    694 
    695 void
    696 GenrbImporter::getRules(
    697         const char *localeID, const char *collationType,
    698         UnicodeString &rules,
    699         const char *& /*errorReason*/, UErrorCode &errorCode) {
    700     CharString filename(localeID, errorCode);
    701     for(int32_t i = 0; i < filename.length(); i++){
    702         if(filename[i] == '-'){
    703             filename.data()[i] = '_';
    704         }
    705     }
    706     filename.append(".txt", errorCode);
    707     if (U_FAILURE(errorCode)) {
    708         return;
    709     }
    710     CharString inputDirBuf;
    711     CharString openFileName;
    712     if(inputDir == NULL) {
    713         const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
    714         if (filenameBegin != NULL) {
    715             /*
    716              * When a filename ../../../data/root.txt is specified,
    717              * we presume that the input directory is ../../../data
    718              * This is very important when the resource file includes
    719              * another file, like UCARules.txt or thaidict.brk.
    720              */
    721             StringPiece dir = filename.toStringPiece();
    722             const char *filenameLimit = filename.data() + filename.length();
    723             dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
    724             inputDirBuf.append(dir, errorCode);
    725             inputDir = inputDirBuf.data();
    726         }
    727     }else{
    728         int32_t dirlen  = (int32_t)uprv_strlen(inputDir);
    729 
    730         if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
    731             /*
    732              * append the input dir to openFileName if the first char in
    733              * filename is not file separator char and the last char input directory is  not '.'.
    734              * This is to support :
    735              * genrb -s. /home/icu/data
    736              * genrb -s. icu/data
    737              * The user cannot mix notations like
    738              * genrb -s. /icu/data --- the absolute path specified. -s redundant
    739              * user should use
    740              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
    741              */
    742             openFileName.append(inputDir, dirlen, errorCode);
    743             if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
    744                 openFileName.append(U_FILE_SEP_CHAR, errorCode);
    745             }
    746         }
    747     }
    748     openFileName.append(filename, errorCode);
    749     if(U_FAILURE(errorCode)) {
    750         return;
    751     }
    752     // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
    753     const char* cp = "";
    754     LocalUCHARBUFPointer ucbuf(
    755             ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
    756     if(errorCode == U_FILE_ACCESS_ERROR) {
    757         fprintf(stderr, "couldn't open file %s\n", openFileName.data());
    758         return;
    759     }
    760     if (ucbuf.isNull() || U_FAILURE(errorCode)) {
    761         fprintf(stderr, "An error occurred processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
    762         return;
    763     }
    764 
    765     /* Parse the data into an SRBRoot */
    766     LocalPointer<SRBRoot> data(
    767             parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode));
    768     if (U_FAILURE(errorCode)) {
    769         return;
    770     }
    771 
    772     struct SResource *root = data->fRoot;
    773     struct SResource *collations = resLookup(root, "collations");
    774     if (collations != NULL) {
    775       struct SResource *collation = resLookup(collations, collationType);
    776       if (collation != NULL) {
    777         struct SResource *sequence = resLookup(collation, "Sequence");
    778         if (sequence != NULL && sequence->isString()) {
    779           // No string pointer aliasing so that we need not hold onto the resource bundle.
    780           StringResource *sr = static_cast<StringResource *>(sequence);
    781           rules = sr->fString;
    782         }
    783       }
    784     }
    785 }
    786 
    787 // Quick-and-dirty escaping function.
    788 // Assumes that we are on an ASCII-based platform.
    789 static void
    790 escape(const UChar *s, char *buffer) {
    791     int32_t length = u_strlen(s);
    792     int32_t i = 0;
    793     for (;;) {
    794         UChar32 c;
    795         U16_NEXT(s, i, length, c);
    796         if (c == 0) {
    797             *buffer = 0;
    798             return;
    799         } else if (0x20 <= c && c <= 0x7e) {
    800             // printable ASCII
    801             *buffer++ = (char)c;  // assumes ASCII-based platform
    802         } else {
    803             buffer += sprintf(buffer, "\\u%04X", (int)c);
    804         }
    805     }
    806 }
    807 
    808 }  // namespace
    809 
    810 #endif  // !UCONFIG_NO_COLLATION
    811 
    812 static TableResource *
    813 addCollation(ParseState* state, TableResource  *result, const char *collationType,
    814              uint32_t startline, UErrorCode *status)
    815 {
    816     // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
    817     struct SResource  *member = NULL;
    818     struct UString    *tokenValue;
    819     struct UString     comment;
    820     enum   ETokenType  token;
    821     char               subtag[1024];
    822     UnicodeString      rules;
    823     UBool              haveRules = FALSE;
    824     UVersionInfo       version;
    825     uint32_t           line;
    826 
    827     /* '{' . (name resource)* '}' */
    828     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
    829 
    830     for (;;)
    831     {
    832         ustr_init(&comment);
    833         token = getToken(state, &tokenValue, &comment, &line, status);
    834 
    835         if (token == TOK_CLOSE_BRACE)
    836         {
    837             break;
    838         }
    839 
    840         if (token != TOK_STRING)
    841         {
    842             res_close(result);
    843             *status = U_INVALID_FORMAT_ERROR;
    844 
    845             if (token == TOK_EOF)
    846             {
    847                 error(startline, "unterminated table");
    848             }
    849             else
    850             {
    851                 error(line, "Unexpected token %s", tokenNames[token]);
    852             }
    853 
    854             return NULL;
    855         }
    856 
    857         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
    858 
    859         if (U_FAILURE(*status))
    860         {
    861             res_close(result);
    862             return NULL;
    863         }
    864 
    865         member = parseResource(state, subtag, NULL, status);
    866 
    867         if (U_FAILURE(*status))
    868         {
    869             res_close(result);
    870             return NULL;
    871         }
    872         if (result == NULL)
    873         {
    874             // Ignore the parsed resources, continue parsing.
    875         }
    876         else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
    877         {
    878             StringResource *sr = static_cast<StringResource *>(member);
    879             char     ver[40];
    880             int32_t length = sr->length();
    881 
    882             if (length >= UPRV_LENGTHOF(ver))
    883             {
    884                 length = UPRV_LENGTHOF(ver) - 1;
    885             }
    886 
    887             sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
    888             u_versionFromString(version, ver);
    889 
    890             result->add(member, line, *status);
    891             member = NULL;
    892         }
    893         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
    894         {
    895             /* discard duplicate %%CollationBin if any*/
    896         }
    897         else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
    898         {
    899             StringResource *sr = static_cast<StringResource *>(member);
    900             rules = sr->fString;
    901             haveRules = TRUE;
    902             // Defer building the collator until we have seen
    903             // all sub-elements of the collation table, including the Version.
    904             /* in order to achieve smaller data files, we can direct genrb */
    905             /* to omit collation rules */
    906             if(!state->omitCollationRules) {
    907                 result->add(member, line, *status);
    908                 member = NULL;
    909             }
    910         }
    911         else  // Just copy non-special items.
    912         {
    913             result->add(member, line, *status);
    914             member = NULL;
    915         }
    916         res_close(member);  // TODO: use LocalPointer
    917         if (U_FAILURE(*status))
    918         {
    919             res_close(result);
    920             return NULL;
    921         }
    922     }
    923 
    924     if (!haveRules) { return result; }
    925 
    926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
    927     warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
    928     (void)collationType;
    929 #else
    930     // CLDR ticket #3949, ICU ticket #8082:
    931     // Do not build collation binary data for for-import-only "private" collation rule strings.
    932     if (uprv_strncmp(collationType, "private-", 8) == 0) {
    933         if(isVerbose()) {
    934             printf("Not building %s~%s collation binary\n", state->filename, collationType);
    935         }
    936         return result;
    937     }
    938 
    939     if(!state->makeBinaryCollation) {
    940         if(isVerbose()) {
    941             printf("Not building %s~%s collation binary\n", state->filename, collationType);
    942         }
    943         return result;
    944     }
    945     UErrorCode intStatus = U_ZERO_ERROR;
    946     UParseError parseError;
    947     uprv_memset(&parseError, 0, sizeof(parseError));
    948     GenrbImporter importer(state->inputdir, state->outputdir);
    949     const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
    950     if(U_FAILURE(intStatus)) {
    951         error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
    952         res_close(result);
    953         return NULL;  // TODO: use LocalUResourceBundlePointer for result
    954     }
    955     icu::CollationBuilder builder(base, intStatus);
    956     if(uprv_strncmp(collationType, "search", 6) == 0) {
    957         builder.disableFastLatin();  // build fast-Latin table unless search collator
    958     }
    959     LocalPointer<icu::CollationTailoring> t(
    960             builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
    961     if(U_FAILURE(intStatus)) {
    962         const char *reason = builder.getErrorReason();
    963         if(reason == NULL) { reason = ""; }
    964         error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s",
    965                 state->filename, collationType,
    966                 (long)parseError.offset, u_errorName(intStatus), reason);
    967         if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
    968             // Print pre- and post-context.
    969             char preBuffer[100], postBuffer[100];
    970             escape(parseError.preContext, preBuffer);
    971             escape(parseError.postContext, postBuffer);
    972             error(line, "  error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
    973         }
    974         if(isStrict() || t.isNull()) {
    975             *status = intStatus;
    976             res_close(result);
    977             return NULL;
    978         }
    979     }
    980     icu::LocalMemory<uint8_t> buffer;
    981     int32_t capacity = 100000;
    982     uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
    983     if(dest == NULL) {
    984         fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
    985                 (long)capacity);
    986         *status = U_MEMORY_ALLOCATION_ERROR;
    987         res_close(result);
    988         return NULL;
    989     }
    990     int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
    991     int32_t totalSize = icu::CollationDataWriter::writeTailoring(
    992             *t, *t->settings, indexes, dest, capacity, intStatus);
    993     if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
    994         intStatus = U_ZERO_ERROR;
    995         capacity = totalSize;
    996         dest = buffer.allocateInsteadAndCopy(capacity);
    997         if(dest == NULL) {
    998             fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
    999                     (long)capacity);
   1000             *status = U_MEMORY_ALLOCATION_ERROR;
   1001             res_close(result);
   1002             return NULL;
   1003         }
   1004         totalSize = icu::CollationDataWriter::writeTailoring(
   1005                 *t, *t->settings, indexes, dest, capacity, intStatus);
   1006     }
   1007     if(U_FAILURE(intStatus)) {
   1008         fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
   1009                 u_errorName(intStatus));
   1010         res_close(result);
   1011         return NULL;
   1012     }
   1013     if(isVerbose()) {
   1014         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
   1015         icu::CollationInfo::printSizes(totalSize, indexes);
   1016         if(t->settings->hasReordering()) {
   1017             printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
   1018             icu::CollationInfo::printReorderRanges(
   1019                     *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
   1020         }
   1021 #if 0  // debugging output
   1022     } else {
   1023         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
   1024         icu::CollationInfo::printSizes(totalSize, indexes);
   1025 #endif
   1026     }
   1027     struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
   1028     result->add(collationBin, line, *status);
   1029     if (U_FAILURE(*status)) {
   1030         res_close(result);
   1031         return NULL;
   1032     }
   1033 #endif
   1034     return result;
   1035 }
   1036 
   1037 static UBool
   1038 keepCollationType(const char *type) {  // android-changed
   1039     // BEGIN android-added
   1040     if (uprv_strcmp(type, "big5han") == 0) { return FALSE; }
   1041     if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; }
   1042     // END android-added
   1043     return TRUE;
   1044 }
   1045 
   1046 static struct SResource *
   1047 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
   1048 {
   1049     TableResource  *result = NULL;
   1050     struct SResource  *member = NULL;
   1051     struct UString    *tokenValue;
   1052     struct UString     comment;
   1053     enum   ETokenType  token;
   1054     char               subtag[1024], typeKeyword[1024];
   1055     uint32_t           line;
   1056 
   1057     result = table_open(state->bundle, tag, NULL, status);
   1058 
   1059     if (result == NULL || U_FAILURE(*status))
   1060     {
   1061         return NULL;
   1062     }
   1063     if(isVerbose()){
   1064         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1065     }
   1066     if(!newCollation) {
   1067         return addCollation(state, result, "(no type)", startline, status);
   1068     }
   1069     else {
   1070         for(;;) {
   1071             ustr_init(&comment);
   1072             token = getToken(state, &tokenValue, &comment, &line, status);
   1073 
   1074             if (token == TOK_CLOSE_BRACE)
   1075             {
   1076                 return result;
   1077             }
   1078 
   1079             if (token != TOK_STRING)
   1080             {
   1081                 res_close(result);
   1082                 *status = U_INVALID_FORMAT_ERROR;
   1083 
   1084                 if (token == TOK_EOF)
   1085                 {
   1086                     error(startline, "unterminated table");
   1087                 }
   1088                 else
   1089                 {
   1090                     error(line, "Unexpected token %s", tokenNames[token]);
   1091                 }
   1092 
   1093                 return NULL;
   1094             }
   1095 
   1096             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
   1097 
   1098             if (U_FAILURE(*status))
   1099             {
   1100                 res_close(result);
   1101                 return NULL;
   1102             }
   1103 
   1104             if (uprv_strcmp(subtag, "default") == 0)
   1105             {
   1106                 member = parseResource(state, subtag, NULL, status);
   1107 
   1108                 if (U_FAILURE(*status))
   1109                 {
   1110                     res_close(result);
   1111                     return NULL;
   1112                 }
   1113 
   1114                 result->add(member, line, *status);
   1115             }
   1116             else
   1117             {
   1118                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
   1119                 /* this probably needs to be refactored or recursively use the parser */
   1120                 /* first we assume that our collation table won't have the explicit type */
   1121                 /* then, we cannot handle aliases */
   1122                 if(token == TOK_OPEN_BRACE) {
   1123                     token = getToken(state, &tokenValue, &comment, &line, status);
   1124                     TableResource *collationRes;
   1125                     if (keepCollationType(subtag)) {
   1126                         collationRes = table_open(state->bundle, subtag, NULL, status);
   1127                     } else {
   1128                         collationRes = NULL;
   1129                     }
   1130                     // need to parse the collation data regardless
   1131                     collationRes = addCollation(state, collationRes, subtag, startline, status);
   1132                     if (collationRes != NULL) {
   1133                         result->add(collationRes, startline, *status);
   1134                     }
   1135                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
   1136                     /* we could have a table too */
   1137                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
   1138                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
   1139                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
   1140                         member = parseResource(state, subtag, NULL, status);
   1141                         if (U_FAILURE(*status))
   1142                         {
   1143                             res_close(result);
   1144                             return NULL;
   1145                         }
   1146 
   1147                         result->add(member, line, *status);
   1148                     } else {
   1149                         res_close(result);
   1150                         *status = U_INVALID_FORMAT_ERROR;
   1151                         return NULL;
   1152                     }
   1153                 } else {
   1154                     res_close(result);
   1155                     *status = U_INVALID_FORMAT_ERROR;
   1156                     return NULL;
   1157                 }
   1158             }
   1159 
   1160             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
   1161 
   1162             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
   1163 
   1164             if (U_FAILURE(*status))
   1165             {
   1166                 res_close(result);
   1167                 return NULL;
   1168             }
   1169         }
   1170     }
   1171 }
   1172 
   1173 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
   1174    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
   1175 static struct SResource *
   1176 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
   1177 {
   1178     struct SResource  *member = NULL;
   1179     struct UString    *tokenValue=NULL;
   1180     struct UString    comment;
   1181     enum   ETokenType token;
   1182     char              subtag[1024];
   1183     uint32_t          line;
   1184     UBool             readToken = FALSE;
   1185 
   1186     /* '{' . (name resource)* '}' */
   1187 
   1188     if(isVerbose()){
   1189         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
   1190     }
   1191     for (;;)
   1192     {
   1193         ustr_init(&comment);
   1194         token = getToken(state, &tokenValue, &comment, &line, status);
   1195 
   1196         if (token == TOK_CLOSE_BRACE)
   1197         {
   1198             if (!readToken) {
   1199                 warning(startline, "Encountered empty table");
   1200             }
   1201             return table;
   1202         }
   1203 
   1204         if (token != TOK_STRING)
   1205         {
   1206             *status = U_INVALID_FORMAT_ERROR;
   1207 
   1208             if (token == TOK_EOF)
   1209             {
   1210                 error(startline, "unterminated table");
   1211             }
   1212             else
   1213             {
   1214                 error(line, "unexpected token %s", tokenNames[token]);
   1215             }
   1216 
   1217             return NULL;
   1218         }
   1219 
   1220         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
   1221             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
   1222         } else {
   1223             *status = U_INVALID_FORMAT_ERROR;
   1224             error(line, "invariant characters required for table keys");
   1225             return NULL;
   1226         }
   1227 
   1228         if (U_FAILURE(*status))
   1229         {
   1230             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
   1231             return NULL;
   1232         }
   1233 
   1234         member = parseResource(state, subtag, &comment, status);
   1235 
   1236         if (member == NULL || U_FAILURE(*status))
   1237         {
   1238             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
   1239             return NULL;
   1240         }
   1241 
   1242         table->add(member, line, *status);
   1243 
   1244         if (U_FAILURE(*status))
   1245         {
   1246             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
   1247             return NULL;
   1248         }
   1249         readToken = TRUE;
   1250         ustr_deinit(&comment);
   1251    }
   1252 
   1253     /* not reached */
   1254     /* A compiler warning will appear if all paths don't contain a return statement. */
   1255 /*     *status = U_INTERNAL_PROGRAM_ERROR;
   1256      return NULL;*/
   1257 }
   1258 
   1259 static struct SResource *
   1260 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1261 {
   1262     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
   1263     {
   1264         return parseCollationElements(state, tag, startline, FALSE, status);
   1265     }
   1266     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
   1267     {
   1268         return parseCollationElements(state, tag, startline, TRUE, status);
   1269     }
   1270     if(isVerbose()){
   1271         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1272     }
   1273 
   1274     TableResource *result = table_open(state->bundle, tag, comment, status);
   1275 
   1276     if (result == NULL || U_FAILURE(*status))
   1277     {
   1278         return NULL;
   1279     }
   1280     return realParseTable(state, result, tag, startline,  status);
   1281 }
   1282 
   1283 static struct SResource *
   1284 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1285 {
   1286     struct SResource  *member = NULL;
   1287     struct UString    *tokenValue;
   1288     struct UString    memberComments;
   1289     enum   ETokenType token;
   1290     UBool             readToken = FALSE;
   1291 
   1292     ArrayResource  *result = array_open(state->bundle, tag, comment, status);
   1293 
   1294     if (result == NULL || U_FAILURE(*status))
   1295     {
   1296         return NULL;
   1297     }
   1298     if(isVerbose()){
   1299         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1300     }
   1301 
   1302     ustr_init(&memberComments);
   1303 
   1304     /* '{' . resource [','] '}' */
   1305     for (;;)
   1306     {
   1307         /* reset length */
   1308         ustr_setlen(&memberComments, 0, status);
   1309 
   1310         /* check for end of array, but don't consume next token unless it really is the end */
   1311         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
   1312 
   1313 
   1314         if (token == TOK_CLOSE_BRACE)
   1315         {
   1316             getToken(state, NULL, NULL, NULL, status);
   1317             if (!readToken) {
   1318                 warning(startline, "Encountered empty array");
   1319             }
   1320             break;
   1321         }
   1322 
   1323         if (token == TOK_EOF)
   1324         {
   1325             res_close(result);
   1326             *status = U_INVALID_FORMAT_ERROR;
   1327             error(startline, "unterminated array");
   1328             return NULL;
   1329         }
   1330 
   1331         /* string arrays are a special case */
   1332         if (token == TOK_STRING)
   1333         {
   1334             getToken(state, &tokenValue, &memberComments, NULL, status);
   1335             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
   1336         }
   1337         else
   1338         {
   1339             member = parseResource(state, NULL, &memberComments, status);
   1340         }
   1341 
   1342         if (member == NULL || U_FAILURE(*status))
   1343         {
   1344             res_close(result);
   1345             return NULL;
   1346         }
   1347 
   1348         result->add(member);
   1349 
   1350         /* eat optional comma if present */
   1351         token = peekToken(state, 0, NULL, NULL, NULL, status);
   1352 
   1353         if (token == TOK_COMMA)
   1354         {
   1355             getToken(state, NULL, NULL, NULL, status);
   1356         }
   1357 
   1358         if (U_FAILURE(*status))
   1359         {
   1360             res_close(result);
   1361             return NULL;
   1362         }
   1363         readToken = TRUE;
   1364     }
   1365 
   1366     ustr_deinit(&memberComments);
   1367     return result;
   1368 }
   1369 
   1370 static struct SResource *
   1371 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1372 {
   1373     enum   ETokenType  token;
   1374     char              *string;
   1375     int32_t            value;
   1376     UBool              readToken = FALSE;
   1377     char              *stopstring;
   1378     uint32_t           len;
   1379     struct UString     memberComments;
   1380 
   1381     IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
   1382 
   1383     if (result == NULL || U_FAILURE(*status))
   1384     {
   1385         return NULL;
   1386     }
   1387 
   1388     if(isVerbose()){
   1389         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1390     }
   1391     ustr_init(&memberComments);
   1392     /* '{' . string [','] '}' */
   1393     for (;;)
   1394     {
   1395         ustr_setlen(&memberComments, 0, status);
   1396 
   1397         /* check for end of array, but don't consume next token unless it really is the end */
   1398         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
   1399 
   1400         if (token == TOK_CLOSE_BRACE)
   1401         {
   1402             /* it's the end, consume the close brace */
   1403             getToken(state, NULL, NULL, NULL, status);
   1404             if (!readToken) {
   1405                 warning(startline, "Encountered empty int vector");
   1406             }
   1407             ustr_deinit(&memberComments);
   1408             return result;
   1409         }
   1410 
   1411         string = getInvariantString(state, NULL, NULL, status);
   1412 
   1413         if (U_FAILURE(*status))
   1414         {
   1415             res_close(result);
   1416             return NULL;
   1417         }
   1418 
   1419         /* For handling illegal char in the Intvector */
   1420         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
   1421         len=(uint32_t)(stopstring-string);
   1422 
   1423         if(len==uprv_strlen(string))
   1424         {
   1425             result->add(value, *status);
   1426             uprv_free(string);
   1427             token = peekToken(state, 0, NULL, NULL, NULL, status);
   1428         }
   1429         else
   1430         {
   1431             uprv_free(string);
   1432             *status=U_INVALID_CHAR_FOUND;
   1433         }
   1434 
   1435         if (U_FAILURE(*status))
   1436         {
   1437             res_close(result);
   1438             return NULL;
   1439         }
   1440 
   1441         /* the comma is optional (even though it is required to prevent the reader from concatenating
   1442         consecutive entries) so that a missing comma on the last entry isn't an error */
   1443         if (token == TOK_COMMA)
   1444         {
   1445             getToken(state, NULL, NULL, NULL, status);
   1446         }
   1447         readToken = TRUE;
   1448     }
   1449 
   1450     /* not reached */
   1451     /* A compiler warning will appear if all paths don't contain a return statement. */
   1452 /*    intvector_close(result, status);
   1453     *status = U_INTERNAL_PROGRAM_ERROR;
   1454     return NULL;*/
   1455 }
   1456 
   1457 static struct SResource *
   1458 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1459 {
   1460     uint32_t line;
   1461     LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
   1462     if (string.isNull() || U_FAILURE(*status))
   1463     {
   1464         return NULL;
   1465     }
   1466 
   1467     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1468     if (U_FAILURE(*status))
   1469     {
   1470         return NULL;
   1471     }
   1472 
   1473     if(isVerbose()){
   1474         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1475     }
   1476 
   1477     uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
   1478     if (count > 0){
   1479         if((count % 2)==0){
   1480             LocalMemory<uint8_t> value;
   1481             if (value.allocateInsteadAndCopy(count) == NULL)
   1482             {
   1483                 *status = U_MEMORY_ALLOCATION_ERROR;
   1484                 return NULL;
   1485             }
   1486 
   1487             char toConv[3] = {'\0', '\0', '\0'};
   1488             for (uint32_t i = 0; i < count; i += 2)
   1489             {
   1490                 toConv[0] = string[i];
   1491                 toConv[1] = string[i + 1];
   1492 
   1493                 char *stopstring;
   1494                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
   1495                 uint32_t len=(uint32_t)(stopstring-toConv);
   1496 
   1497                 if(len!=2)
   1498                 {
   1499                     *status=U_INVALID_CHAR_FOUND;
   1500                     return NULL;
   1501                 }
   1502             }
   1503 
   1504             return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
   1505         }
   1506         else
   1507         {
   1508             *status = U_INVALID_CHAR_FOUND;
   1509             error(line, "Encountered invalid binary value (length is odd)");
   1510             return NULL;
   1511         }
   1512     }
   1513     else
   1514     {
   1515         warning(startline, "Encountered empty binary value");
   1516         return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
   1517     }
   1518 }
   1519 
   1520 static struct SResource *
   1521 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1522 {
   1523     struct SResource *result = NULL;
   1524     int32_t           value;
   1525     char             *string;
   1526     char             *stopstring;
   1527     uint32_t          len;
   1528 
   1529     string = getInvariantString(state, NULL, NULL, status);
   1530 
   1531     if (string == NULL || U_FAILURE(*status))
   1532     {
   1533         return NULL;
   1534     }
   1535 
   1536     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1537 
   1538     if (U_FAILURE(*status))
   1539     {
   1540         uprv_free(string);
   1541         return NULL;
   1542     }
   1543 
   1544     if(isVerbose()){
   1545         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1546     }
   1547 
   1548     if (uprv_strlen(string) <= 0)
   1549     {
   1550         warning(startline, "Encountered empty integer. Default value is 0.");
   1551     }
   1552 
   1553     /* Allow integer support for hexdecimal, octal digit and decimal*/
   1554     /* and handle illegal char in the integer*/
   1555     value = uprv_strtoul(string, &stopstring, 0);
   1556     len=(uint32_t)(stopstring-string);
   1557     if(len==uprv_strlen(string))
   1558     {
   1559         result = int_open(state->bundle, tag, value, comment, status);
   1560     }
   1561     else
   1562     {
   1563         *status=U_INVALID_CHAR_FOUND;
   1564     }
   1565     uprv_free(string);
   1566 
   1567     return result;
   1568 }
   1569 
   1570 static struct SResource *
   1571 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1572 {
   1573     uint32_t          line;
   1574     LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
   1575     if (U_FAILURE(*status))
   1576     {
   1577         return NULL;
   1578     }
   1579 
   1580     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1581 
   1582     if (U_FAILURE(*status))
   1583     {
   1584         return NULL;
   1585     }
   1586 
   1587     if(isVerbose()){
   1588         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1589     }
   1590 
   1591     /* Open the input file for reading */
   1592     CharString fullname;
   1593     if (state->inputdir != NULL) {
   1594         fullname.append(state->inputdir, *status);
   1595     }
   1596     fullname.appendPathPart(filename.getAlias(), *status);
   1597     if (U_FAILURE(*status)) {
   1598         return NULL;
   1599     }
   1600 
   1601     FileStream *file = T_FileStream_open(fullname.data(), "rb");
   1602     if (file == NULL)
   1603     {
   1604         error(line, "couldn't open input file %s", filename.getAlias());
   1605         *status = U_FILE_ACCESS_ERROR;
   1606         return NULL;
   1607     }
   1608 
   1609     int32_t len  = T_FileStream_size(file);
   1610     LocalMemory<uint8_t> data;
   1611     if(data.allocateInsteadAndCopy(len) == NULL)
   1612     {
   1613         *status = U_MEMORY_ALLOCATION_ERROR;
   1614         T_FileStream_close (file);
   1615         return NULL;
   1616     }
   1617 
   1618     /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
   1619     T_FileStream_close (file);
   1620 
   1621     return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
   1622 }
   1623 
   1624 static struct SResource *
   1625 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1626 {
   1627     struct SResource *result;
   1628     int32_t           len=0;
   1629     char             *filename;
   1630     uint32_t          line;
   1631     UChar *pTarget     = NULL;
   1632 
   1633     UCHARBUF *ucbuf;
   1634     char     *fullname = NULL;
   1635     int32_t  count     = 0;
   1636     const char* cp = NULL;
   1637     const UChar* uBuffer = NULL;
   1638 
   1639     filename = getInvariantString(state, &line, NULL, status);
   1640     count     = (int32_t)uprv_strlen(filename);
   1641 
   1642     if (U_FAILURE(*status))
   1643     {
   1644         return NULL;
   1645     }
   1646 
   1647     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1648 
   1649     if (U_FAILURE(*status))
   1650     {
   1651         uprv_free(filename);
   1652         return NULL;
   1653     }
   1654 
   1655     if(isVerbose()){
   1656         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1657     }
   1658 
   1659     fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
   1660     /* test for NULL */
   1661     if(fullname == NULL)
   1662     {
   1663         *status = U_MEMORY_ALLOCATION_ERROR;
   1664         uprv_free(filename);
   1665         return NULL;
   1666     }
   1667 
   1668     if(state->inputdir!=NULL){
   1669         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
   1670         {
   1671 
   1672             uprv_strcpy(fullname, state->inputdir);
   1673 
   1674             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
   1675             fullname[state->inputdirLength + 1] = '\0';
   1676 
   1677             uprv_strcat(fullname, filename);
   1678         }
   1679         else
   1680         {
   1681             uprv_strcpy(fullname, state->inputdir);
   1682             uprv_strcat(fullname, filename);
   1683         }
   1684     }else{
   1685         uprv_strcpy(fullname,filename);
   1686     }
   1687 
   1688     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
   1689 
   1690     if (U_FAILURE(*status)) {
   1691         error(line, "couldn't open input file %s\n", filename);
   1692         return NULL;
   1693     }
   1694 
   1695     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
   1696     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
   1697 
   1698     ucbuf_close(ucbuf);
   1699 
   1700     uprv_free(pTarget);
   1701 
   1702     uprv_free(filename);
   1703     uprv_free(fullname);
   1704 
   1705     return result;
   1706 }
   1707 
   1708 
   1709 
   1710 
   1711 
   1712 U_STRING_DECL(k_type_string,    "string",    6);
   1713 U_STRING_DECL(k_type_binary,    "binary",    6);
   1714 U_STRING_DECL(k_type_bin,       "bin",       3);
   1715 U_STRING_DECL(k_type_table,     "table",     5);
   1716 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
   1717 U_STRING_DECL(k_type_int,       "int",       3);
   1718 U_STRING_DECL(k_type_integer,   "integer",   7);
   1719 U_STRING_DECL(k_type_array,     "array",     5);
   1720 U_STRING_DECL(k_type_alias,     "alias",     5);
   1721 U_STRING_DECL(k_type_intvector, "intvector", 9);
   1722 U_STRING_DECL(k_type_import,    "import",    6);
   1723 U_STRING_DECL(k_type_include,   "include",   7);
   1724 
   1725 /* Various non-standard processing plugins that create one or more special resources. */
   1726 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1727 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
   1728 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
   1729 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
   1730 
   1731 typedef enum EResourceType
   1732 {
   1733     RESTYPE_UNKNOWN,
   1734     RESTYPE_STRING,
   1735     RESTYPE_BINARY,
   1736     RESTYPE_TABLE,
   1737     RESTYPE_TABLE_NO_FALLBACK,
   1738     RESTYPE_INTEGER,
   1739     RESTYPE_ARRAY,
   1740     RESTYPE_ALIAS,
   1741     RESTYPE_INTVECTOR,
   1742     RESTYPE_IMPORT,
   1743     RESTYPE_INCLUDE,
   1744     RESTYPE_PROCESS_UCA_RULES,
   1745     RESTYPE_PROCESS_COLLATION,
   1746     RESTYPE_PROCESS_TRANSLITERATOR,
   1747     RESTYPE_PROCESS_DEPENDENCY,
   1748     RESTYPE_RESERVED
   1749 } EResourceType;
   1750 
   1751 static struct {
   1752     const char *nameChars;   /* only used for debugging */
   1753     const UChar *nameUChars;
   1754     ParseResourceFunction *parseFunction;
   1755 } gResourceTypes[] = {
   1756     {"Unknown", NULL, NULL},
   1757     {"string", k_type_string, parseString},
   1758     {"binary", k_type_binary, parseBinary},
   1759     {"table", k_type_table, parseTable},
   1760     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
   1761     {"integer", k_type_integer, parseInteger},
   1762     {"array", k_type_array, parseArray},
   1763     {"alias", k_type_alias, parseAlias},
   1764     {"intvector", k_type_intvector, parseIntVector},
   1765     {"import", k_type_import, parseImport},
   1766     {"include", k_type_include, parseInclude},
   1767     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
   1768     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
   1769     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
   1770     {"process(dependency)", k_type_plugin_dependency, parseDependency},
   1771     {"reserved", NULL, NULL}
   1772 };
   1773 
   1774 void initParser()
   1775 {
   1776     U_STRING_INIT(k_type_string,    "string",    6);
   1777     U_STRING_INIT(k_type_binary,    "binary",    6);
   1778     U_STRING_INIT(k_type_bin,       "bin",       3);
   1779     U_STRING_INIT(k_type_table,     "table",     5);
   1780     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
   1781     U_STRING_INIT(k_type_int,       "int",       3);
   1782     U_STRING_INIT(k_type_integer,   "integer",   7);
   1783     U_STRING_INIT(k_type_array,     "array",     5);
   1784     U_STRING_INIT(k_type_alias,     "alias",     5);
   1785     U_STRING_INIT(k_type_intvector, "intvector", 9);
   1786     U_STRING_INIT(k_type_import,    "import",    6);
   1787     U_STRING_INIT(k_type_include,   "include",   7);
   1788 
   1789     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1790     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
   1791     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
   1792     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
   1793 }
   1794 
   1795 static inline UBool isTable(enum EResourceType type) {
   1796     return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
   1797 }
   1798 
   1799 static enum EResourceType
   1800 parseResourceType(ParseState* state, UErrorCode *status)
   1801 {
   1802     struct UString        *tokenValue;
   1803     struct UString        comment;
   1804     enum   EResourceType  result = RESTYPE_UNKNOWN;
   1805     uint32_t              line=0;
   1806     ustr_init(&comment);
   1807     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
   1808 
   1809     if (U_FAILURE(*status))
   1810     {
   1811         return RESTYPE_UNKNOWN;
   1812     }
   1813 
   1814     *status = U_ZERO_ERROR;
   1815 
   1816     /* Search for normal types */
   1817     result=RESTYPE_UNKNOWN;
   1818     while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
   1819         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
   1820             break;
   1821         }
   1822     }
   1823     /* Now search for the aliases */
   1824     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
   1825         result = RESTYPE_INTEGER;
   1826     }
   1827     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
   1828         result = RESTYPE_BINARY;
   1829     }
   1830     else if (result == RESTYPE_RESERVED) {
   1831         char tokenBuffer[1024];
   1832         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
   1833         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
   1834         *status = U_INVALID_FORMAT_ERROR;
   1835         error(line, "unknown resource type '%s'", tokenBuffer);
   1836     }
   1837 
   1838     return result;
   1839 }
   1840 
   1841 /* parse a non-top-level resource */
   1842 static struct SResource *
   1843 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
   1844 {
   1845     enum   ETokenType      token;
   1846     enum   EResourceType  resType = RESTYPE_UNKNOWN;
   1847     ParseResourceFunction *parseFunction = NULL;
   1848     struct UString        *tokenValue;
   1849     uint32_t                 startline;
   1850     uint32_t                 line;
   1851 
   1852 
   1853     token = getToken(state, &tokenValue, NULL, &startline, status);
   1854 
   1855     if(isVerbose()){
   1856         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1857     }
   1858 
   1859     /* name . [ ':' type ] '{' resource '}' */
   1860     /* This function parses from the colon onwards.  If the colon is present, parse the
   1861     type then try to parse a resource of that type.  If there is no explicit type,
   1862     work it out using the lookahead tokens. */
   1863     switch (token)
   1864     {
   1865     case TOK_EOF:
   1866         *status = U_INVALID_FORMAT_ERROR;
   1867         error(startline, "Unexpected EOF encountered");
   1868         return NULL;
   1869 
   1870     case TOK_ERROR:
   1871         *status = U_INVALID_FORMAT_ERROR;
   1872         return NULL;
   1873 
   1874     case TOK_COLON:
   1875         resType = parseResourceType(state, status);
   1876         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
   1877 
   1878         if (U_FAILURE(*status))
   1879         {
   1880             return NULL;
   1881         }
   1882 
   1883         break;
   1884 
   1885     case TOK_OPEN_BRACE:
   1886         break;
   1887 
   1888     default:
   1889         *status = U_INVALID_FORMAT_ERROR;
   1890         error(startline, "syntax error while reading a resource, expected '{' or ':'");
   1891         return NULL;
   1892     }
   1893 
   1894 
   1895     if (resType == RESTYPE_UNKNOWN)
   1896     {
   1897         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
   1898         We could have any of the following:
   1899         { {         => array (nested)
   1900         { :/}       => array
   1901         { string ,  => string array
   1902 
   1903         { string {  => table
   1904 
   1905         { string :/{    => table
   1906         { string }      => string
   1907         */
   1908 
   1909         token = peekToken(state, 0, NULL, &line, NULL,status);
   1910 
   1911         if (U_FAILURE(*status))
   1912         {
   1913             return NULL;
   1914         }
   1915 
   1916         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
   1917         {
   1918             resType = RESTYPE_ARRAY;
   1919         }
   1920         else if (token == TOK_STRING)
   1921         {
   1922             token = peekToken(state, 1, NULL, &line, NULL, status);
   1923 
   1924             if (U_FAILURE(*status))
   1925             {
   1926                 return NULL;
   1927             }
   1928 
   1929             switch (token)
   1930             {
   1931             case TOK_COMMA:         resType = RESTYPE_ARRAY;  break;
   1932             case TOK_OPEN_BRACE:    resType = RESTYPE_TABLE;  break;
   1933             case TOK_CLOSE_BRACE:   resType = RESTYPE_STRING; break;
   1934             case TOK_COLON:         resType = RESTYPE_TABLE;  break;
   1935             default:
   1936                 *status = U_INVALID_FORMAT_ERROR;
   1937                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
   1938                 return NULL;
   1939             }
   1940         }
   1941         else
   1942         {
   1943             *status = U_INVALID_FORMAT_ERROR;
   1944             error(line, "Unexpected token after '{'");
   1945             return NULL;
   1946         }
   1947 
   1948         /* printf("Type guessed as %s\n", resourceNames[resType]); */
   1949     } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
   1950         *status = U_INVALID_FORMAT_ERROR;
   1951         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
   1952         return NULL;
   1953     }
   1954 
   1955 
   1956     /* We should now know what we need to parse next, so call the appropriate parser
   1957     function and return. */
   1958     parseFunction = gResourceTypes[resType].parseFunction;
   1959     if (parseFunction != NULL) {
   1960         return parseFunction(state, tag, startline, comment, status);
   1961     }
   1962     else {
   1963         *status = U_INTERNAL_PROGRAM_ERROR;
   1964         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
   1965     }
   1966 
   1967     return NULL;
   1968 }
   1969 
   1970 /* parse the top-level resource */
   1971 struct SRBRoot *
   1972 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
   1973       UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
   1974 {
   1975     struct UString    *tokenValue;
   1976     struct UString    comment;
   1977     uint32_t           line;
   1978     enum EResourceType bundleType;
   1979     enum ETokenType    token;
   1980     ParseState state;
   1981     uint32_t i;
   1982 
   1983 
   1984     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
   1985     {
   1986         ustr_init(&state.lookahead[i].value);
   1987         ustr_init(&state.lookahead[i].comment);
   1988     }
   1989 
   1990     initLookahead(&state, buf, status);
   1991 
   1992     state.inputdir       = inputDir;
   1993     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
   1994     state.outputdir       = outputDir;
   1995     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
   1996     state.filename = filename;
   1997     state.makeBinaryCollation = makeBinaryCollation;
   1998     state.omitCollationRules = omitCollationRules;
   1999 
   2000     ustr_init(&comment);
   2001     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
   2002 
   2003     state.bundle = new SRBRoot(&comment, FALSE, *status);
   2004 
   2005     if (state.bundle == NULL || U_FAILURE(*status))
   2006     {
   2007         return NULL;
   2008     }
   2009 
   2010 
   2011     state.bundle->setLocale(tokenValue->fChars, *status);
   2012 
   2013     /* The following code is to make Empty bundle work no matter with :table specifer or not */
   2014     token = getToken(&state, NULL, NULL, &line, status);
   2015     if(token==TOK_COLON) {
   2016         *status=U_ZERO_ERROR;
   2017         bundleType=parseResourceType(&state, status);
   2018 
   2019         if(isTable(bundleType))
   2020         {
   2021             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
   2022         }
   2023         else
   2024         {
   2025             *status=U_PARSE_ERROR;
   2026              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
   2027         }
   2028     }
   2029     else
   2030     {
   2031         /* not a colon */
   2032         if(token==TOK_OPEN_BRACE)
   2033         {
   2034             *status=U_ZERO_ERROR;
   2035             bundleType=RESTYPE_TABLE;
   2036         }
   2037         else
   2038         {
   2039             /* neither colon nor open brace */
   2040             *status=U_PARSE_ERROR;
   2041             bundleType=RESTYPE_UNKNOWN;
   2042             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
   2043         }
   2044     }
   2045 
   2046     if (U_FAILURE(*status))
   2047     {
   2048         delete state.bundle;
   2049         return NULL;
   2050     }
   2051 
   2052     if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
   2053         /*
   2054          * Parse a top-level table with the table(nofallback) declaration.
   2055          * This is the same as a regular table, but also sets the
   2056          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
   2057          */
   2058         state.bundle->fNoFallback=TRUE;
   2059     }
   2060     /* top-level tables need not handle special table names like "collations" */
   2061     assert(!state.bundle->fIsPoolBundle);
   2062     assert(state.bundle->fRoot->fType == URES_TABLE);
   2063     TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
   2064     realParseTable(&state, rootTable, NULL, line, status);
   2065     if(dependencyArray!=NULL){
   2066         rootTable->add(dependencyArray, 0, *status);
   2067         dependencyArray = NULL;
   2068     }
   2069    if (U_FAILURE(*status))
   2070     {
   2071         delete state.bundle;
   2072         res_close(dependencyArray);
   2073         return NULL;
   2074     }
   2075 
   2076     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
   2077     {
   2078         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
   2079         if(isStrict()){
   2080             *status = U_INVALID_FORMAT_ERROR;
   2081             return NULL;
   2082         }
   2083     }
   2084 
   2085     cleanupLookahead(&state);
   2086     ustr_deinit(&comment);
   2087     return state.bundle;
   2088 }
   2089