1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File parse.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/26/99 stephen Creation. 15 * 02/25/00 weiv Overhaul to write udata 16 * 5/10/01 Ram removed ustdio dependency 17 * 06/10/2001 Dominic Ludlam <dom (at) recoil.org> Rewritten 18 ******************************************************************************* 19 */ 20 21 #include "ucol_imp.h" 22 #include "parse.h" 23 #include "errmsg.h" 24 #include "uhash.h" 25 #include "cmemory.h" 26 #include "cstring.h" 27 #include "uinvchar.h" 28 #include "read.h" 29 #include "ustr.h" 30 #include "reslist.h" 31 #include "rbt_pars.h" 32 #include "genrb.h" 33 #include "unicode/ustring.h" 34 #include "unicode/uscript.h" 35 #include "unicode/putil.h" 36 #include <stdio.h> 37 38 extern UBool gIncludeUnihanColl; 39 40 /* Number of tokens to read ahead of the current stream position */ 41 #define MAX_LOOKAHEAD 3 42 43 #define CR 0x000D 44 #define LF 0x000A 45 #define SPACE 0x0020 46 #define TAB 0x0009 47 #define ESCAPE 0x005C 48 #define HASH 0x0023 49 #define QUOTE 0x0027 50 #define ZERO 0x0030 51 #define STARTCOMMAND 0x005B 52 #define ENDCOMMAND 0x005D 53 #define OPENSQBRACKET 0x005B 54 #define CLOSESQBRACKET 0x005D 55 56 struct Lookahead 57 { 58 enum ETokenType type; 59 struct UString value; 60 struct UString comment; 61 uint32_t line; 62 }; 63 64 /* keep in sync with token defines in read.h */ 65 const char *tokenNames[TOK_TOKEN_COUNT] = 66 { 67 "string", /* A string token, such as "MonthNames" */ 68 "'{'", /* An opening brace character */ 69 "'}'", /* A closing brace character */ 70 "','", /* A comma */ 71 "':'", /* A colon */ 72 73 "<end of file>", /* End of the file has been reached successfully */ 74 "<end of line>" 75 }; 76 77 /* Just to store "TRUE" */ 78 static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; 79 80 typedef struct { 81 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; 82 uint32_t lookaheadPosition; 83 UCHARBUF *buffer; 84 struct SRBRoot *bundle; 85 const char *inputdir; 86 uint32_t inputdirLength; 87 const char *outputdir; 88 uint32_t outputdirLength; 89 } ParseState; 90 91 static UBool gMakeBinaryCollation = TRUE; 92 static UBool gOmitCollationRules = FALSE; 93 94 typedef struct SResource * 95 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); 96 97 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status); 98 99 /* The nature of the lookahead buffer: 100 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides 101 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 102 When getToken is called, the current pointer is moved to the next slot and the 103 old slot is filled with the next token from the reader by calling getNextToken. 104 The token values are stored in the slot, which means that token values don't 105 survive a call to getToken, ie. 106 107 UString *value; 108 109 getToken(&value, NULL, status); 110 getToken(NULL, NULL, status); bad - value is now a different string 111 */ 112 static void 113 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status) 114 { 115 static uint32_t initTypeStrings = 0; 116 uint32_t i; 117 118 if (!initTypeStrings) 119 { 120 initTypeStrings = 1; 121 } 122 123 state->lookaheadPosition = 0; 124 state->buffer = buf; 125 126 resetLineNumber(); 127 128 for (i = 0; i < MAX_LOOKAHEAD; i++) 129 { 130 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 131 if (U_FAILURE(*status)) 132 { 133 return; 134 } 135 } 136 137 *status = U_ZERO_ERROR; 138 } 139 140 static void 141 cleanupLookahead(ParseState* state) 142 { 143 uint32_t i; 144 for (i = 0; i < MAX_LOOKAHEAD; i++) 145 { 146 ustr_deinit(&state->lookahead[i].value); 147 ustr_deinit(&state->lookahead[i].comment); 148 } 149 150 } 151 152 static enum ETokenType 153 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) 154 { 155 enum ETokenType result; 156 uint32_t i; 157 158 result = state->lookahead[state->lookaheadPosition].type; 159 160 if (tokenValue != NULL) 161 { 162 *tokenValue = &state->lookahead[state->lookaheadPosition].value; 163 } 164 165 if (linenumber != NULL) 166 { 167 *linenumber = state->lookahead[state->lookaheadPosition].line; 168 } 169 170 if (comment != NULL) 171 { 172 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 173 } 174 175 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); 176 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); 177 ustr_setlen(&state->lookahead[i].comment, 0, status); 178 ustr_setlen(&state->lookahead[i].value, 0, status); 179 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 180 181 /* printf("getToken, returning %s\n", tokenNames[result]); */ 182 183 return result; 184 } 185 186 static enum ETokenType 187 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) 188 { 189 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); 190 191 if (U_FAILURE(*status)) 192 { 193 return TOK_ERROR; 194 } 195 196 if (lookaheadCount >= MAX_LOOKAHEAD) 197 { 198 *status = U_INTERNAL_PROGRAM_ERROR; 199 return TOK_ERROR; 200 } 201 202 if (tokenValue != NULL) 203 { 204 *tokenValue = &state->lookahead[i].value; 205 } 206 207 if (linenumber != NULL) 208 { 209 *linenumber = state->lookahead[i].line; 210 } 211 212 if(comment != NULL){ 213 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 214 } 215 216 return state->lookahead[i].type; 217 } 218 219 static void 220 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) 221 { 222 uint32_t line; 223 224 enum ETokenType token = getToken(state, tokenValue, comment, &line, status); 225 226 if (linenumber != NULL) 227 { 228 *linenumber = line; 229 } 230 231 if (U_FAILURE(*status)) 232 { 233 return; 234 } 235 236 if (token != expectedToken) 237 { 238 *status = U_INVALID_FORMAT_ERROR; 239 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); 240 } 241 else 242 { 243 *status = U_ZERO_ERROR; 244 } 245 } 246 247 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) 248 { 249 struct UString *tokenValue; 250 char *result; 251 uint32_t count; 252 253 expect(state, TOK_STRING, &tokenValue, comment, line, status); 254 255 if (U_FAILURE(*status)) 256 { 257 return NULL; 258 } 259 260 count = u_strlen(tokenValue->fChars); 261 if(!uprv_isInvariantUString(tokenValue->fChars, count)) { 262 *status = U_INVALID_FORMAT_ERROR; 263 error(*line, "invariant characters required for table keys, binary data, etc."); 264 return NULL; 265 } 266 267 result = uprv_malloc(count+1); 268 269 if (result == NULL) 270 { 271 *status = U_MEMORY_ALLOCATION_ERROR; 272 return NULL; 273 } 274 275 u_UCharsToChars(tokenValue->fChars, result, count+1); 276 return result; 277 } 278 279 static struct SResource * 280 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 281 { 282 struct SResource *result = NULL; 283 struct UString *tokenValue; 284 FileStream *file = NULL; 285 char filename[256] = { '\0' }; 286 char cs[128] = { '\0' }; 287 uint32_t line; 288 int len=0; 289 UBool quoted = FALSE; 290 UCHARBUF *ucbuf=NULL; 291 UChar32 c = 0; 292 const char* cp = NULL; 293 UChar *pTarget = NULL; 294 UChar *target = NULL; 295 UChar *targetLimit = NULL; 296 int32_t size = 0; 297 298 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 299 300 if(isVerbose()){ 301 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 302 } 303 304 if (U_FAILURE(*status)) 305 { 306 return NULL; 307 } 308 /* make the filename including the directory */ 309 if (state->inputdir != NULL) 310 { 311 uprv_strcat(filename, state->inputdir); 312 313 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 314 { 315 uprv_strcat(filename, U_FILE_SEP_STRING); 316 } 317 } 318 319 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 320 321 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 322 323 if (U_FAILURE(*status)) 324 { 325 return NULL; 326 } 327 uprv_strcat(filename, cs); 328 329 if(gOmitCollationRules) { 330 return res_none(); 331 } 332 333 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 334 335 if (U_FAILURE(*status)) { 336 error(line, "An error occured while opening the input file %s\n", filename); 337 return NULL; 338 } 339 340 /* We allocate more space than actually required 341 * since the actual size needed for storing UChars 342 * is not known in UTF-8 byte stream 343 */ 344 size = ucbuf_size(ucbuf) + 1; 345 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); 346 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 347 target = pTarget; 348 targetLimit = pTarget+size; 349 350 /* read the rules into the buffer */ 351 while (target < targetLimit) 352 { 353 c = ucbuf_getc(ucbuf, status); 354 if(c == QUOTE) { 355 quoted = (UBool)!quoted; 356 } 357 /* weiv (06/26/2002): adding the following: 358 * - preserving spaces in commands [...] 359 * - # comments until the end of line 360 */ 361 if (c == STARTCOMMAND && !quoted) 362 { 363 /* preserve commands 364 * closing bracket will be handled by the 365 * append at the end of the loop 366 */ 367 while(c != ENDCOMMAND) { 368 U_APPEND_CHAR32(c, target,len); 369 c = ucbuf_getc(ucbuf, status); 370 } 371 } 372 else if (c == HASH && !quoted) { 373 /* skip comments */ 374 while(c != CR && c != LF) { 375 c = ucbuf_getc(ucbuf, status); 376 } 377 continue; 378 } 379 else if (c == ESCAPE) 380 { 381 c = unescape(ucbuf, status); 382 383 if (c == U_ERR) 384 { 385 uprv_free(pTarget); 386 T_FileStream_close(file); 387 return NULL; 388 } 389 } 390 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) 391 { 392 /* ignore spaces carriage returns 393 * and line feed unless in the form \uXXXX 394 */ 395 continue; 396 } 397 398 /* Append UChar * after dissembling if c > 0xffff*/ 399 if (c != U_EOF) 400 { 401 U_APPEND_CHAR32(c, target,len); 402 } 403 else 404 { 405 break; 406 } 407 } 408 409 /* terminate the string */ 410 if(target < targetLimit){ 411 *target = 0x0000; 412 } 413 414 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); 415 416 417 ucbuf_close(ucbuf); 418 uprv_free(pTarget); 419 T_FileStream_close(file); 420 421 return result; 422 } 423 424 static struct SResource * 425 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 426 { 427 struct SResource *result = NULL; 428 struct UString *tokenValue; 429 FileStream *file = NULL; 430 char filename[256] = { '\0' }; 431 char cs[128] = { '\0' }; 432 uint32_t line; 433 UCHARBUF *ucbuf=NULL; 434 const char* cp = NULL; 435 UChar *pTarget = NULL; 436 const UChar *pSource = NULL; 437 int32_t size = 0; 438 439 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 440 441 if(isVerbose()){ 442 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 443 } 444 445 if (U_FAILURE(*status)) 446 { 447 return NULL; 448 } 449 /* make the filename including the directory */ 450 if (state->inputdir != NULL) 451 { 452 uprv_strcat(filename, state->inputdir); 453 454 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 455 { 456 uprv_strcat(filename, U_FILE_SEP_STRING); 457 } 458 } 459 460 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 461 462 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 463 464 if (U_FAILURE(*status)) 465 { 466 return NULL; 467 } 468 uprv_strcat(filename, cs); 469 470 471 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 472 473 if (U_FAILURE(*status)) { 474 error(line, "An error occured while opening the input file %s\n", filename); 475 return NULL; 476 } 477 478 /* We allocate more space than actually required 479 * since the actual size needed for storing UChars 480 * is not known in UTF-8 byte stream 481 */ 482 pSource = ucbuf_getBuffer(ucbuf, &size, status); 483 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); 484 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 485 486 #if !UCONFIG_NO_TRANSLITERATION 487 size = utrans_stripRules(pSource, size, pTarget, status); 488 #else 489 size = 0; 490 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); 491 #endif 492 result = string_open(state->bundle, tag, pTarget, size, NULL, status); 493 494 ucbuf_close(ucbuf); 495 uprv_free(pTarget); 496 T_FileStream_close(file); 497 498 return result; 499 } 500 static struct SResource* dependencyArray = NULL; 501 502 static struct SResource * 503 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 504 { 505 struct SResource *result = NULL; 506 struct SResource *elem = NULL; 507 struct UString *tokenValue; 508 uint32_t line; 509 char filename[256] = { '\0' }; 510 char cs[128] = { '\0' }; 511 512 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 513 514 if(isVerbose()){ 515 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 516 } 517 518 if (U_FAILURE(*status)) 519 { 520 return NULL; 521 } 522 /* make the filename including the directory */ 523 if (state->outputdir != NULL) 524 { 525 uprv_strcat(filename, state->outputdir); 526 527 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR) 528 { 529 uprv_strcat(filename, U_FILE_SEP_STRING); 530 } 531 } 532 533 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 534 535 if (U_FAILURE(*status)) 536 { 537 return NULL; 538 } 539 uprv_strcat(filename, cs); 540 if(!T_FileStream_file_exists(filename)){ 541 if(isStrict()){ 542 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 543 }else{ 544 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 545 } 546 } 547 if(dependencyArray==NULL){ 548 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status); 549 } 550 if(tag!=NULL){ 551 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 552 } 553 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); 554 555 array_add(dependencyArray, elem, status); 556 557 if (U_FAILURE(*status)) 558 { 559 return NULL; 560 } 561 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 562 return result; 563 } 564 static struct SResource * 565 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 566 { 567 struct UString *tokenValue; 568 struct SResource *result = NULL; 569 570 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) 571 { 572 return parseUCARules(tag, startline, status); 573 }*/ 574 if(isVerbose()){ 575 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 576 } 577 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 578 579 if (U_SUCCESS(*status)) 580 { 581 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 582 doesn't survive expect either) */ 583 584 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 585 if(U_SUCCESS(*status) && result) { 586 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 587 588 if (U_FAILURE(*status)) 589 { 590 res_close(result); 591 return NULL; 592 } 593 } 594 } 595 596 return result; 597 } 598 599 static struct SResource * 600 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 601 { 602 struct UString *tokenValue; 603 struct SResource *result = NULL; 604 605 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 606 607 if(isVerbose()){ 608 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 609 } 610 611 if (U_SUCCESS(*status)) 612 { 613 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 614 doesn't survive expect either) */ 615 616 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 617 618 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 619 620 if (U_FAILURE(*status)) 621 { 622 res_close(result); 623 return NULL; 624 } 625 } 626 627 return result; 628 } 629 630 typedef struct{ 631 const char* inputDir; 632 const char* outputDir; 633 } GenrbData; 634 635 static struct SResource* resLookup(struct SResource* res, const char* key){ 636 struct SResource *current = NULL; 637 struct SResTable *list; 638 if (res == res_none()) { 639 return NULL; 640 } 641 642 list = &(res->u.fTable); 643 644 current = list->fFirst; 645 while (current != NULL) { 646 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { 647 return current; 648 } 649 current = current->fNext; 650 } 651 return NULL; 652 } 653 654 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){ 655 struct SRBRoot *data = NULL; 656 UCHARBUF *ucbuf = NULL; 657 GenrbData* genrbdata = (GenrbData*) context; 658 int localeLength = strlen(locale); 659 char* filename = (char*)uprv_malloc(localeLength+5); 660 char *inputDirBuf = NULL; 661 char *openFileName = NULL; 662 const char* cp = ""; 663 UChar* urules = NULL; 664 int32_t urulesLength = 0; 665 int32_t i = 0; 666 int32_t dirlen = 0; 667 int32_t filelen = 0; 668 struct SResource* root; 669 struct SResource* collations; 670 struct SResource* collation; 671 struct SResource* sequence; 672 673 memcpy(filename, locale, localeLength); 674 for(i = 0; i < localeLength; i++){ 675 if(filename[i] == '-'){ 676 filename[i] = '_'; 677 } 678 } 679 filename[localeLength] = '.'; 680 filename[localeLength+1] = 't'; 681 filename[localeLength+2] = 'x'; 682 filename[localeLength+3] = 't'; 683 filename[localeLength+4] = 0; 684 685 686 if (status==NULL || U_FAILURE(*status)) { 687 return NULL; 688 } 689 if(filename==NULL){ 690 *status=U_ILLEGAL_ARGUMENT_ERROR; 691 return NULL; 692 }else{ 693 filelen = (int32_t)uprv_strlen(filename); 694 } 695 if(genrbdata->inputDir == NULL) { 696 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); 697 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); 698 openFileName[0] = '\0'; 699 if (filenameBegin != NULL) { 700 /* 701 * When a filename ../../../data/root.txt is specified, 702 * we presume that the input directory is ../../../data 703 * This is very important when the resource file includes 704 * another file, like UCARules.txt or thaidict.brk. 705 */ 706 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); 707 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize); 708 709 /* test for NULL */ 710 if(inputDirBuf == NULL) { 711 *status = U_MEMORY_ALLOCATION_ERROR; 712 goto finish; 713 } 714 715 inputDirBuf[filenameSize - 1] = 0; 716 genrbdata->inputDir = inputDirBuf; 717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); 718 } 719 }else{ 720 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); 721 722 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 723 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); 724 725 /* test for NULL */ 726 if(openFileName == NULL) { 727 *status = U_MEMORY_ALLOCATION_ERROR; 728 goto finish; 729 } 730 731 openFileName[0] = '\0'; 732 /* 733 * append the input dir to openFileName if the first char in 734 * filename is not file seperation char and the last char input directory is not '.'. 735 * This is to support : 736 * genrb -s. /home/icu/data 737 * genrb -s. icu/data 738 * The user cannot mix notations like 739 * genrb -s. /icu/data --- the absolute path specified. -s redundant 740 * user should use 741 * genrb -s. icu/data --- start from CWD and look in icu/data dir 742 */ 743 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){ 744 uprv_strcpy(openFileName, genrbdata->inputDir); 745 openFileName[dirlen] = U_FILE_SEP_CHAR; 746 } 747 openFileName[dirlen + 1] = '\0'; 748 } else { 749 openFileName = (char *) uprv_malloc(dirlen + filelen + 1); 750 751 /* test for NULL */ 752 if(openFileName == NULL) { 753 *status = U_MEMORY_ALLOCATION_ERROR; 754 goto finish; 755 } 756 757 uprv_strcpy(openFileName, genrbdata->inputDir); 758 759 } 760 } 761 uprv_strcat(openFileName, filename); 762 /* printf("%s\n", openFileName); */ 763 *status = U_ZERO_ERROR; 764 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status); 765 766 if(*status == U_FILE_ACCESS_ERROR) { 767 768 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName); 769 goto finish; 770 } 771 if (ucbuf == NULL || U_FAILURE(*status)) { 772 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status)); 773 goto finish; 774 } 775 776 /* Parse the data into an SRBRoot */ 777 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, status); 778 779 root = data->fRoot; 780 collations = resLookup(root, "collations"); 781 collation = resLookup(collations, type); 782 sequence = resLookup(collation, "Sequence"); 783 urules = sequence->u.fString.fChars; 784 urulesLength = sequence->u.fString.fLength; 785 *pLength = urulesLength; 786 787 finish: 788 789 if (inputDirBuf != NULL) { 790 uprv_free(inputDirBuf); 791 } 792 793 if (openFileName != NULL) { 794 uprv_free(openFileName); 795 } 796 797 if(ucbuf) { 798 ucbuf_close(ucbuf); 799 } 800 801 return urules; 802 } 803 804 static struct SResource * 805 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status) 806 { 807 struct SResource *member = NULL; 808 struct UString *tokenValue; 809 struct UString comment; 810 enum ETokenType token; 811 char subtag[1024]; 812 UVersionInfo version; 813 UBool override = FALSE; 814 uint32_t line; 815 GenrbData genrbdata; 816 /* '{' . (name resource)* '}' */ 817 version[0]=0; version[1]=0; version[2]=0; version[3]=0; 818 819 for (;;) 820 { 821 ustr_init(&comment); 822 token = getToken(state, &tokenValue, &comment, &line, status); 823 824 if (token == TOK_CLOSE_BRACE) 825 { 826 return result; 827 } 828 829 if (token != TOK_STRING) 830 { 831 res_close(result); 832 *status = U_INVALID_FORMAT_ERROR; 833 834 if (token == TOK_EOF) 835 { 836 error(startline, "unterminated table"); 837 } 838 else 839 { 840 error(line, "Unexpected token %s", tokenNames[token]); 841 } 842 843 return NULL; 844 } 845 846 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 847 848 if (U_FAILURE(*status)) 849 { 850 res_close(result); 851 return NULL; 852 } 853 854 member = parseResource(state, subtag, NULL, status); 855 856 if (U_FAILURE(*status)) 857 { 858 res_close(result); 859 return NULL; 860 } 861 862 if (uprv_strcmp(subtag, "Version") == 0) 863 { 864 char ver[40]; 865 int32_t length = member->u.fString.fLength; 866 867 if (length >= (int32_t) sizeof(ver)) 868 { 869 length = (int32_t) sizeof(ver) - 1; 870 } 871 872 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */ 873 u_versionFromString(version, ver); 874 875 table_add(result, member, line, status); 876 877 } 878 else if (uprv_strcmp(subtag, "Override") == 0) 879 { 880 override = FALSE; 881 882 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0) 883 { 884 override = TRUE; 885 } 886 table_add(result, member, line, status); 887 888 } 889 else if(uprv_strcmp(subtag, "%%CollationBin")==0) 890 { 891 /* discard duplicate %%CollationBin if any*/ 892 } 893 else if (uprv_strcmp(subtag, "Sequence") == 0) 894 { 895 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO 896 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); 897 #else 898 if(gMakeBinaryCollation) { 899 UErrorCode intStatus = U_ZERO_ERROR; 900 901 /* do the collation elements */ 902 int32_t len = 0; 903 uint8_t *data = NULL; 904 UCollator *coll = NULL; 905 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)]; 906 uint32_t reorderCodeCount; 907 int32_t reorderCodeIndex; 908 UParseError parseError; 909 910 genrbdata.inputDir = state->inputdir; 911 genrbdata.outputDir = state->outputdir; 912 913 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength, 914 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus); 915 916 if (U_SUCCESS(intStatus) && coll != NULL) 917 { 918 len = ucol_cloneBinary(coll, NULL, 0, &intStatus); 919 data = (uint8_t *)uprv_malloc(len); 920 intStatus = U_ZERO_ERROR; 921 len = ucol_cloneBinary(coll, data, len, &intStatus); 922 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/ 923 924 /* tailoring rules version */ 925 /* This is wrong! */ 926 /*coll->dataInfo.dataVersion[1] = version[0];*/ 927 /* Copy tailoring version. Builder version already */ 928 /* set in ucol_openRules */ 929 ((UCATableHeader *)data)->version[1] = version[0]; 930 ((UCATableHeader *)data)->version[2] = version[1]; 931 ((UCATableHeader *)data)->version[3] = version[2]; 932 933 if (U_SUCCESS(intStatus) && data != NULL) 934 { 935 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status); 936 table_add(result, collationBin, line, status); 937 uprv_free(data); 938 939 reorderCodeCount = ucol_getReorderCodes( 940 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus); 941 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) { 942 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status); 943 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) { 944 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status); 945 } 946 table_add(result, reorderCodeRes, line, status); 947 } 948 } 949 else 950 { 951 warning(line, "could not obtain rules from collator"); 952 if(isStrict()){ 953 *status = U_INVALID_FORMAT_ERROR; 954 return NULL; 955 } 956 } 957 958 ucol_close(coll); 959 } 960 else 961 { 962 if(intStatus == U_FILE_ACCESS_ERROR) { 963 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly."); 964 *status = intStatus; 965 return NULL; 966 } 967 warning(line, "%%Collation could not be constructed from CollationElements - check context!"); 968 if(isStrict()){ 969 *status = intStatus; 970 return NULL; 971 } 972 } 973 } else { 974 if(isVerbose()) { 975 printf("Not building Collation binary\n"); 976 } 977 } 978 #endif 979 /* in order to achieve smaller data files, we can direct genrb */ 980 /* to omit collation rules */ 981 if(gOmitCollationRules) { 982 bundle_closeString(state->bundle, member); 983 } else { 984 table_add(result, member, line, status); 985 } 986 } 987 988 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 989 990 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 991 992 if (U_FAILURE(*status)) 993 { 994 res_close(result); 995 return NULL; 996 } 997 } 998 999 /* not reached */ 1000 /* A compiler warning will appear if all paths don't contain a return statement. */ 1001 /* *status = U_INTERNAL_PROGRAM_ERROR; 1002 return NULL;*/ 1003 } 1004 1005 static struct SResource * 1006 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) 1007 { 1008 struct SResource *result = NULL; 1009 struct SResource *member = NULL; 1010 struct SResource *collationRes = NULL; 1011 struct UString *tokenValue; 1012 struct UString comment; 1013 enum ETokenType token; 1014 char subtag[1024], typeKeyword[1024]; 1015 uint32_t line; 1016 1017 result = table_open(state->bundle, tag, NULL, status); 1018 1019 if (result == NULL || U_FAILURE(*status)) 1020 { 1021 return NULL; 1022 } 1023 if(isVerbose()){ 1024 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1025 } 1026 if(!newCollation) { 1027 return addCollation(state, result, startline, status); 1028 } 1029 else { 1030 for(;;) { 1031 ustr_init(&comment); 1032 token = getToken(state, &tokenValue, &comment, &line, status); 1033 1034 if (token == TOK_CLOSE_BRACE) 1035 { 1036 return result; 1037 } 1038 1039 if (token != TOK_STRING) 1040 { 1041 res_close(result); 1042 *status = U_INVALID_FORMAT_ERROR; 1043 1044 if (token == TOK_EOF) 1045 { 1046 error(startline, "unterminated table"); 1047 } 1048 else 1049 { 1050 error(line, "Unexpected token %s", tokenNames[token]); 1051 } 1052 1053 return NULL; 1054 } 1055 1056 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1057 1058 if (U_FAILURE(*status)) 1059 { 1060 res_close(result); 1061 return NULL; 1062 } 1063 1064 if (uprv_strcmp(subtag, "default") == 0) 1065 { 1066 member = parseResource(state, subtag, NULL, status); 1067 1068 if (U_FAILURE(*status)) 1069 { 1070 res_close(result); 1071 return NULL; 1072 } 1073 1074 table_add(result, member, line, status); 1075 } 1076 else 1077 { 1078 token = peekToken(state, 0, &tokenValue, &line, &comment, status); 1079 /* this probably needs to be refactored or recursively use the parser */ 1080 /* first we assume that our collation table won't have the explicit type */ 1081 /* then, we cannot handle aliases */ 1082 if(token == TOK_OPEN_BRACE) { 1083 token = getToken(state, &tokenValue, &comment, &line, status); 1084 collationRes = table_open(state->bundle, subtag, NULL, status); 1085 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */ 1086 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) { 1087 table_add(result, collationRes, startline, status); 1088 } 1089 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ 1090 /* we could have a table too */ 1091 token = peekToken(state, 1, &tokenValue, &line, &comment, status); 1092 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); 1093 if(uprv_strcmp(typeKeyword, "alias") == 0) { 1094 member = parseResource(state, subtag, NULL, status); 1095 1096 if (U_FAILURE(*status)) 1097 { 1098 res_close(result); 1099 return NULL; 1100 } 1101 1102 table_add(result, member, line, status); 1103 } else { 1104 res_close(result); 1105 *status = U_INVALID_FORMAT_ERROR; 1106 return NULL; 1107 } 1108 } else { 1109 res_close(result); 1110 *status = U_INVALID_FORMAT_ERROR; 1111 return NULL; 1112 } 1113 } 1114 1115 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 1116 1117 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 1118 1119 if (U_FAILURE(*status)) 1120 { 1121 res_close(result); 1122 return NULL; 1123 } 1124 } 1125 } 1126 } 1127 1128 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, 1129 if this weren't special-cased, wouldn't be set until the entire file had been processed. */ 1130 static struct SResource * 1131 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status) 1132 { 1133 struct SResource *member = NULL; 1134 struct UString *tokenValue=NULL; 1135 struct UString comment; 1136 enum ETokenType token; 1137 char subtag[1024]; 1138 uint32_t line; 1139 UBool readToken = FALSE; 1140 1141 /* '{' . (name resource)* '}' */ 1142 if(isVerbose()){ 1143 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1144 } 1145 for (;;) 1146 { 1147 ustr_init(&comment); 1148 token = getToken(state, &tokenValue, &comment, &line, status); 1149 1150 if (token == TOK_CLOSE_BRACE) 1151 { 1152 if (!readToken) { 1153 warning(startline, "Encountered empty table"); 1154 } 1155 return table; 1156 } 1157 1158 if (token != TOK_STRING) 1159 { 1160 *status = U_INVALID_FORMAT_ERROR; 1161 1162 if (token == TOK_EOF) 1163 { 1164 error(startline, "unterminated table"); 1165 } 1166 else 1167 { 1168 error(line, "unexpected token %s", tokenNames[token]); 1169 } 1170 1171 return NULL; 1172 } 1173 1174 if(uprv_isInvariantUString(tokenValue->fChars, -1)) { 1175 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1176 } else { 1177 *status = U_INVALID_FORMAT_ERROR; 1178 error(line, "invariant characters required for table keys"); 1179 return NULL; 1180 } 1181 1182 if (U_FAILURE(*status)) 1183 { 1184 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); 1185 return NULL; 1186 } 1187 1188 member = parseResource(state, subtag, &comment, status); 1189 1190 if (member == NULL || U_FAILURE(*status)) 1191 { 1192 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); 1193 return NULL; 1194 } 1195 1196 table_add(table, member, line, status); 1197 1198 if (U_FAILURE(*status)) 1199 { 1200 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); 1201 return NULL; 1202 } 1203 readToken = TRUE; 1204 ustr_deinit(&comment); 1205 } 1206 1207 /* not reached */ 1208 /* A compiler warning will appear if all paths don't contain a return statement. */ 1209 /* *status = U_INTERNAL_PROGRAM_ERROR; 1210 return NULL;*/ 1211 } 1212 1213 static struct SResource * 1214 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1215 { 1216 struct SResource *result; 1217 1218 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) 1219 { 1220 return parseCollationElements(state, tag, startline, FALSE, status); 1221 } 1222 if (tag != NULL && uprv_strcmp(tag, "collations") == 0) 1223 { 1224 return parseCollationElements(state, tag, startline, TRUE, status); 1225 } 1226 if(isVerbose()){ 1227 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1228 } 1229 1230 result = table_open(state->bundle, tag, comment, status); 1231 1232 if (result == NULL || U_FAILURE(*status)) 1233 { 1234 return NULL; 1235 } 1236 1237 return realParseTable(state, result, tag, startline, status); 1238 } 1239 1240 static struct SResource * 1241 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1242 { 1243 struct SResource *result = NULL; 1244 struct SResource *member = NULL; 1245 struct UString *tokenValue; 1246 struct UString memberComments; 1247 enum ETokenType token; 1248 UBool readToken = FALSE; 1249 1250 result = array_open(state->bundle, tag, comment, status); 1251 1252 if (result == NULL || U_FAILURE(*status)) 1253 { 1254 return NULL; 1255 } 1256 if(isVerbose()){ 1257 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1258 } 1259 1260 ustr_init(&memberComments); 1261 1262 /* '{' . resource [','] '}' */ 1263 for (;;) 1264 { 1265 /* reset length */ 1266 ustr_setlen(&memberComments, 0, status); 1267 1268 /* check for end of array, but don't consume next token unless it really is the end */ 1269 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status); 1270 1271 1272 if (token == TOK_CLOSE_BRACE) 1273 { 1274 getToken(state, NULL, NULL, NULL, status); 1275 if (!readToken) { 1276 warning(startline, "Encountered empty array"); 1277 } 1278 break; 1279 } 1280 1281 if (token == TOK_EOF) 1282 { 1283 res_close(result); 1284 *status = U_INVALID_FORMAT_ERROR; 1285 error(startline, "unterminated array"); 1286 return NULL; 1287 } 1288 1289 /* string arrays are a special case */ 1290 if (token == TOK_STRING) 1291 { 1292 getToken(state, &tokenValue, &memberComments, NULL, status); 1293 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); 1294 } 1295 else 1296 { 1297 member = parseResource(state, NULL, &memberComments, status); 1298 } 1299 1300 if (member == NULL || U_FAILURE(*status)) 1301 { 1302 res_close(result); 1303 return NULL; 1304 } 1305 1306 array_add(result, member, status); 1307 1308 if (U_FAILURE(*status)) 1309 { 1310 res_close(result); 1311 return NULL; 1312 } 1313 1314 /* eat optional comma if present */ 1315 token = peekToken(state, 0, NULL, NULL, NULL, status); 1316 1317 if (token == TOK_COMMA) 1318 { 1319 getToken(state, NULL, NULL, NULL, status); 1320 } 1321 1322 if (U_FAILURE(*status)) 1323 { 1324 res_close(result); 1325 return NULL; 1326 } 1327 readToken = TRUE; 1328 } 1329 1330 ustr_deinit(&memberComments); 1331 return result; 1332 } 1333 1334 static struct SResource * 1335 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1336 { 1337 struct SResource *result = NULL; 1338 enum ETokenType token; 1339 char *string; 1340 int32_t value; 1341 UBool readToken = FALSE; 1342 char *stopstring; 1343 uint32_t len; 1344 struct UString memberComments; 1345 1346 result = intvector_open(state->bundle, tag, comment, status); 1347 1348 if (result == NULL || U_FAILURE(*status)) 1349 { 1350 return NULL; 1351 } 1352 1353 if(isVerbose()){ 1354 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1355 } 1356 ustr_init(&memberComments); 1357 /* '{' . string [','] '}' */ 1358 for (;;) 1359 { 1360 ustr_setlen(&memberComments, 0, status); 1361 1362 /* check for end of array, but don't consume next token unless it really is the end */ 1363 token = peekToken(state, 0, NULL, NULL,&memberComments, status); 1364 1365 if (token == TOK_CLOSE_BRACE) 1366 { 1367 /* it's the end, consume the close brace */ 1368 getToken(state, NULL, NULL, NULL, status); 1369 if (!readToken) { 1370 warning(startline, "Encountered empty int vector"); 1371 } 1372 ustr_deinit(&memberComments); 1373 return result; 1374 } 1375 1376 string = getInvariantString(state, NULL, NULL, status); 1377 1378 if (U_FAILURE(*status)) 1379 { 1380 res_close(result); 1381 return NULL; 1382 } 1383 1384 /* For handling illegal char in the Intvector */ 1385 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ 1386 len=(uint32_t)(stopstring-string); 1387 1388 if(len==uprv_strlen(string)) 1389 { 1390 intvector_add(result, value, status); 1391 uprv_free(string); 1392 token = peekToken(state, 0, NULL, NULL, NULL, status); 1393 } 1394 else 1395 { 1396 uprv_free(string); 1397 *status=U_INVALID_CHAR_FOUND; 1398 } 1399 1400 if (U_FAILURE(*status)) 1401 { 1402 res_close(result); 1403 return NULL; 1404 } 1405 1406 /* the comma is optional (even though it is required to prevent the reader from concatenating 1407 consecutive entries) so that a missing comma on the last entry isn't an error */ 1408 if (token == TOK_COMMA) 1409 { 1410 getToken(state, NULL, NULL, NULL, status); 1411 } 1412 readToken = TRUE; 1413 } 1414 1415 /* not reached */ 1416 /* A compiler warning will appear if all paths don't contain a return statement. */ 1417 /* intvector_close(result, status); 1418 *status = U_INTERNAL_PROGRAM_ERROR; 1419 return NULL;*/ 1420 } 1421 1422 static struct SResource * 1423 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1424 { 1425 struct SResource *result = NULL; 1426 uint8_t *value; 1427 char *string; 1428 char toConv[3] = {'\0', '\0', '\0'}; 1429 uint32_t count; 1430 uint32_t i; 1431 uint32_t line; 1432 char *stopstring; 1433 uint32_t len; 1434 1435 string = getInvariantString(state, &line, NULL, status); 1436 1437 if (string == NULL || U_FAILURE(*status)) 1438 { 1439 return NULL; 1440 } 1441 1442 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1443 1444 if (U_FAILURE(*status)) 1445 { 1446 uprv_free(string); 1447 return NULL; 1448 } 1449 1450 if(isVerbose()){ 1451 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1452 } 1453 1454 count = (uint32_t)uprv_strlen(string); 1455 if (count > 0){ 1456 if((count % 2)==0){ 1457 value = uprv_malloc(sizeof(uint8_t) * count); 1458 1459 if (value == NULL) 1460 { 1461 uprv_free(string); 1462 *status = U_MEMORY_ALLOCATION_ERROR; 1463 return NULL; 1464 } 1465 1466 for (i = 0; i < count; i += 2) 1467 { 1468 toConv[0] = string[i]; 1469 toConv[1] = string[i + 1]; 1470 1471 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); 1472 len=(uint32_t)(stopstring-toConv); 1473 1474 if(len!=uprv_strlen(toConv)) 1475 { 1476 uprv_free(string); 1477 *status=U_INVALID_CHAR_FOUND; 1478 return NULL; 1479 } 1480 } 1481 1482 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status); 1483 1484 uprv_free(value); 1485 } 1486 else 1487 { 1488 *status = U_INVALID_CHAR_FOUND; 1489 uprv_free(string); 1490 error(line, "Encountered invalid binary string"); 1491 return NULL; 1492 } 1493 } 1494 else 1495 { 1496 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status); 1497 warning(startline, "Encountered empty binary tag"); 1498 } 1499 uprv_free(string); 1500 1501 return result; 1502 } 1503 1504 static struct SResource * 1505 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1506 { 1507 struct SResource *result = NULL; 1508 int32_t value; 1509 char *string; 1510 char *stopstring; 1511 uint32_t len; 1512 1513 string = getInvariantString(state, NULL, NULL, status); 1514 1515 if (string == NULL || U_FAILURE(*status)) 1516 { 1517 return NULL; 1518 } 1519 1520 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1521 1522 if (U_FAILURE(*status)) 1523 { 1524 uprv_free(string); 1525 return NULL; 1526 } 1527 1528 if(isVerbose()){ 1529 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1530 } 1531 1532 if (uprv_strlen(string) <= 0) 1533 { 1534 warning(startline, "Encountered empty integer. Default value is 0."); 1535 } 1536 1537 /* Allow integer support for hexdecimal, octal digit and decimal*/ 1538 /* and handle illegal char in the integer*/ 1539 value = uprv_strtoul(string, &stopstring, 0); 1540 len=(uint32_t)(stopstring-string); 1541 if(len==uprv_strlen(string)) 1542 { 1543 result = int_open(state->bundle, tag, value, comment, status); 1544 } 1545 else 1546 { 1547 *status=U_INVALID_CHAR_FOUND; 1548 } 1549 uprv_free(string); 1550 1551 return result; 1552 } 1553 1554 static struct SResource * 1555 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1556 { 1557 struct SResource *result; 1558 FileStream *file; 1559 int32_t len; 1560 uint8_t *data; 1561 char *filename; 1562 uint32_t line; 1563 char *fullname = NULL; 1564 int32_t numRead = 0; 1565 filename = getInvariantString(state, &line, NULL, status); 1566 1567 if (U_FAILURE(*status)) 1568 { 1569 return NULL; 1570 } 1571 1572 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1573 1574 if (U_FAILURE(*status)) 1575 { 1576 uprv_free(filename); 1577 return NULL; 1578 } 1579 1580 if(isVerbose()){ 1581 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1582 } 1583 1584 /* Open the input file for reading */ 1585 if (state->inputdir == NULL) 1586 { 1587 #if 1 1588 /* 1589 * Always save file file name, even if there's 1590 * no input directory specified. MIGHT BREAK SOMETHING 1591 */ 1592 int32_t filenameLength = uprv_strlen(filename); 1593 1594 fullname = (char *) uprv_malloc(filenameLength + 1); 1595 uprv_strcpy(fullname, filename); 1596 #endif 1597 1598 file = T_FileStream_open(filename, "rb"); 1599 } 1600 else 1601 { 1602 1603 int32_t count = (int32_t)uprv_strlen(filename); 1604 1605 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 1606 { 1607 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); 1608 1609 /* test for NULL */ 1610 if(fullname == NULL) 1611 { 1612 *status = U_MEMORY_ALLOCATION_ERROR; 1613 return NULL; 1614 } 1615 1616 uprv_strcpy(fullname, state->inputdir); 1617 1618 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; 1619 fullname[state->inputdirLength + 1] = '\0'; 1620 1621 uprv_strcat(fullname, filename); 1622 } 1623 else 1624 { 1625 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1); 1626 1627 /* test for NULL */ 1628 if(fullname == NULL) 1629 { 1630 *status = U_MEMORY_ALLOCATION_ERROR; 1631 return NULL; 1632 } 1633 1634 uprv_strcpy(fullname, state->inputdir); 1635 uprv_strcat(fullname, filename); 1636 } 1637 1638 file = T_FileStream_open(fullname, "rb"); 1639 1640 } 1641 1642 if (file == NULL) 1643 { 1644 error(line, "couldn't open input file %s", filename); 1645 *status = U_FILE_ACCESS_ERROR; 1646 return NULL; 1647 } 1648 1649 len = T_FileStream_size(file); 1650 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t)); 1651 /* test for NULL */ 1652 if(data == NULL) 1653 { 1654 *status = U_MEMORY_ALLOCATION_ERROR; 1655 T_FileStream_close (file); 1656 return NULL; 1657 } 1658 1659 numRead = T_FileStream_read (file, data, len); 1660 T_FileStream_close (file); 1661 1662 result = bin_open(state->bundle, tag, len, data, fullname, comment, status); 1663 1664 uprv_free(data); 1665 uprv_free(filename); 1666 uprv_free(fullname); 1667 1668 return result; 1669 } 1670 1671 static struct SResource * 1672 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1673 { 1674 struct SResource *result; 1675 int32_t len=0; 1676 char *filename; 1677 uint32_t line; 1678 UChar *pTarget = NULL; 1679 1680 UCHARBUF *ucbuf; 1681 char *fullname = NULL; 1682 int32_t count = 0; 1683 const char* cp = NULL; 1684 const UChar* uBuffer = NULL; 1685 1686 filename = getInvariantString(state, &line, NULL, status); 1687 count = (int32_t)uprv_strlen(filename); 1688 1689 if (U_FAILURE(*status)) 1690 { 1691 return NULL; 1692 } 1693 1694 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1695 1696 if (U_FAILURE(*status)) 1697 { 1698 uprv_free(filename); 1699 return NULL; 1700 } 1701 1702 if(isVerbose()){ 1703 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1704 } 1705 1706 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); 1707 /* test for NULL */ 1708 if(fullname == NULL) 1709 { 1710 *status = U_MEMORY_ALLOCATION_ERROR; 1711 uprv_free(filename); 1712 return NULL; 1713 } 1714 1715 if(state->inputdir!=NULL){ 1716 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 1717 { 1718 1719 uprv_strcpy(fullname, state->inputdir); 1720 1721 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; 1722 fullname[state->inputdirLength + 1] = '\0'; 1723 1724 uprv_strcat(fullname, filename); 1725 } 1726 else 1727 { 1728 uprv_strcpy(fullname, state->inputdir); 1729 uprv_strcat(fullname, filename); 1730 } 1731 }else{ 1732 uprv_strcpy(fullname,filename); 1733 } 1734 1735 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); 1736 1737 if (U_FAILURE(*status)) { 1738 error(line, "couldn't open input file %s\n", filename); 1739 return NULL; 1740 } 1741 1742 uBuffer = ucbuf_getBuffer(ucbuf,&len,status); 1743 result = string_open(state->bundle, tag, uBuffer, len, comment, status); 1744 1745 uprv_free(pTarget); 1746 1747 uprv_free(filename); 1748 uprv_free(fullname); 1749 1750 return result; 1751 } 1752 1753 1754 1755 1756 1757 U_STRING_DECL(k_type_string, "string", 6); 1758 U_STRING_DECL(k_type_binary, "binary", 6); 1759 U_STRING_DECL(k_type_bin, "bin", 3); 1760 U_STRING_DECL(k_type_table, "table", 5); 1761 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); 1762 U_STRING_DECL(k_type_int, "int", 3); 1763 U_STRING_DECL(k_type_integer, "integer", 7); 1764 U_STRING_DECL(k_type_array, "array", 5); 1765 U_STRING_DECL(k_type_alias, "alias", 5); 1766 U_STRING_DECL(k_type_intvector, "intvector", 9); 1767 U_STRING_DECL(k_type_import, "import", 6); 1768 U_STRING_DECL(k_type_include, "include", 7); 1769 U_STRING_DECL(k_type_reserved, "reserved", 8); 1770 1771 /* Various non-standard processing plugins that create one or more special resources. */ 1772 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1773 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); 1774 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); 1775 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); 1776 1777 typedef enum EResourceType 1778 { 1779 RT_UNKNOWN, 1780 RT_STRING, 1781 RT_BINARY, 1782 RT_TABLE, 1783 RT_TABLE_NO_FALLBACK, 1784 RT_INTEGER, 1785 RT_ARRAY, 1786 RT_ALIAS, 1787 RT_INTVECTOR, 1788 RT_IMPORT, 1789 RT_INCLUDE, 1790 RT_PROCESS_UCA_RULES, 1791 RT_PROCESS_COLLATION, 1792 RT_PROCESS_TRANSLITERATOR, 1793 RT_PROCESS_DEPENDENCY, 1794 RT_RESERVED 1795 } EResourceType; 1796 1797 static struct { 1798 const char *nameChars; /* only used for debugging */ 1799 const UChar *nameUChars; 1800 ParseResourceFunction *parseFunction; 1801 } gResourceTypes[] = { 1802 {"Unknown", NULL, NULL}, 1803 {"string", k_type_string, parseString}, 1804 {"binary", k_type_binary, parseBinary}, 1805 {"table", k_type_table, parseTable}, 1806 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ 1807 {"integer", k_type_integer, parseInteger}, 1808 {"array", k_type_array, parseArray}, 1809 {"alias", k_type_alias, parseAlias}, 1810 {"intvector", k_type_intvector, parseIntVector}, 1811 {"import", k_type_import, parseImport}, 1812 {"include", k_type_include, parseInclude}, 1813 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, 1814 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, 1815 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, 1816 {"process(dependency)", k_type_plugin_dependency, parseDependency}, 1817 {"reserved", NULL, NULL} 1818 }; 1819 1820 void initParser(UBool omitBinaryCollation, UBool omitCollationRules) 1821 { 1822 U_STRING_INIT(k_type_string, "string", 6); 1823 U_STRING_INIT(k_type_binary, "binary", 6); 1824 U_STRING_INIT(k_type_bin, "bin", 3); 1825 U_STRING_INIT(k_type_table, "table", 5); 1826 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); 1827 U_STRING_INIT(k_type_int, "int", 3); 1828 U_STRING_INIT(k_type_integer, "integer", 7); 1829 U_STRING_INIT(k_type_array, "array", 5); 1830 U_STRING_INIT(k_type_alias, "alias", 5); 1831 U_STRING_INIT(k_type_intvector, "intvector", 9); 1832 U_STRING_INIT(k_type_import, "import", 6); 1833 U_STRING_INIT(k_type_reserved, "reserved", 8); 1834 U_STRING_INIT(k_type_include, "include", 7); 1835 1836 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1837 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); 1838 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); 1839 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); 1840 1841 gMakeBinaryCollation = !omitBinaryCollation; 1842 gOmitCollationRules = omitCollationRules; 1843 } 1844 1845 static U_INLINE UBool isTable(enum EResourceType type) { 1846 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); 1847 } 1848 1849 static enum EResourceType 1850 parseResourceType(ParseState* state, UErrorCode *status) 1851 { 1852 struct UString *tokenValue; 1853 struct UString comment; 1854 enum EResourceType result = RT_UNKNOWN; 1855 uint32_t line=0; 1856 ustr_init(&comment); 1857 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); 1858 1859 if (U_FAILURE(*status)) 1860 { 1861 return RT_UNKNOWN; 1862 } 1863 1864 *status = U_ZERO_ERROR; 1865 1866 /* Search for normal types */ 1867 result=RT_UNKNOWN; 1868 while (++result < RT_RESERVED) { 1869 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { 1870 break; 1871 } 1872 } 1873 /* Now search for the aliases */ 1874 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { 1875 result = RT_INTEGER; 1876 } 1877 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { 1878 result = RT_BINARY; 1879 } 1880 else if (result == RT_RESERVED) { 1881 char tokenBuffer[1024]; 1882 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); 1883 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; 1884 *status = U_INVALID_FORMAT_ERROR; 1885 error(line, "unknown resource type '%s'", tokenBuffer); 1886 } 1887 1888 return result; 1889 } 1890 1891 /* parse a non-top-level resource */ 1892 static struct SResource * 1893 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) 1894 { 1895 enum ETokenType token; 1896 enum EResourceType resType = RT_UNKNOWN; 1897 ParseResourceFunction *parseFunction = NULL; 1898 struct UString *tokenValue; 1899 uint32_t startline; 1900 uint32_t line; 1901 1902 token = getToken(state, &tokenValue, NULL, &startline, status); 1903 1904 if(isVerbose()){ 1905 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1906 } 1907 1908 /* name . [ ':' type ] '{' resource '}' */ 1909 /* This function parses from the colon onwards. If the colon is present, parse the 1910 type then try to parse a resource of that type. If there is no explicit type, 1911 work it out using the lookahead tokens. */ 1912 switch (token) 1913 { 1914 case TOK_EOF: 1915 *status = U_INVALID_FORMAT_ERROR; 1916 error(startline, "Unexpected EOF encountered"); 1917 return NULL; 1918 1919 case TOK_ERROR: 1920 *status = U_INVALID_FORMAT_ERROR; 1921 return NULL; 1922 1923 case TOK_COLON: 1924 resType = parseResourceType(state, status); 1925 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); 1926 1927 if (U_FAILURE(*status)) 1928 { 1929 return NULL; 1930 } 1931 1932 break; 1933 1934 case TOK_OPEN_BRACE: 1935 break; 1936 1937 default: 1938 *status = U_INVALID_FORMAT_ERROR; 1939 error(startline, "syntax error while reading a resource, expected '{' or ':'"); 1940 return NULL; 1941 } 1942 1943 if (resType == RT_UNKNOWN) 1944 { 1945 /* No explicit type, so try to work it out. At this point, we've read the first '{'. 1946 We could have any of the following: 1947 { { => array (nested) 1948 { :/} => array 1949 { string , => string array 1950 1951 { string { => table 1952 1953 { string :/{ => table 1954 { string } => string 1955 */ 1956 1957 token = peekToken(state, 0, NULL, &line, NULL,status); 1958 1959 if (U_FAILURE(*status)) 1960 { 1961 return NULL; 1962 } 1963 1964 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) 1965 { 1966 resType = RT_ARRAY; 1967 } 1968 else if (token == TOK_STRING) 1969 { 1970 token = peekToken(state, 1, NULL, &line, NULL, status); 1971 1972 if (U_FAILURE(*status)) 1973 { 1974 return NULL; 1975 } 1976 1977 switch (token) 1978 { 1979 case TOK_COMMA: resType = RT_ARRAY; break; 1980 case TOK_OPEN_BRACE: resType = RT_TABLE; break; 1981 case TOK_CLOSE_BRACE: resType = RT_STRING; break; 1982 case TOK_COLON: resType = RT_TABLE; break; 1983 default: 1984 *status = U_INVALID_FORMAT_ERROR; 1985 error(line, "Unexpected token after string, expected ',', '{' or '}'"); 1986 return NULL; 1987 } 1988 } 1989 else 1990 { 1991 *status = U_INVALID_FORMAT_ERROR; 1992 error(line, "Unexpected token after '{'"); 1993 return NULL; 1994 } 1995 1996 /* printf("Type guessed as %s\n", resourceNames[resType]); */ 1997 } else if(resType == RT_TABLE_NO_FALLBACK) { 1998 *status = U_INVALID_FORMAT_ERROR; 1999 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); 2000 return NULL; 2001 } 2002 2003 /* We should now know what we need to parse next, so call the appropriate parser 2004 function and return. */ 2005 parseFunction = gResourceTypes[resType].parseFunction; 2006 if (parseFunction != NULL) { 2007 return parseFunction(state, tag, startline, comment, status); 2008 } 2009 else { 2010 *status = U_INTERNAL_PROGRAM_ERROR; 2011 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); 2012 } 2013 2014 return NULL; 2015 } 2016 2017 /* parse the top-level resource */ 2018 struct SRBRoot * 2019 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status) 2020 { 2021 struct UString *tokenValue; 2022 struct UString comment; 2023 uint32_t line; 2024 enum EResourceType bundleType; 2025 enum ETokenType token; 2026 ParseState state; 2027 uint32_t i; 2028 int encLength; 2029 char* enc; 2030 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) 2031 { 2032 ustr_init(&state.lookahead[i].value); 2033 ustr_init(&state.lookahead[i].comment); 2034 } 2035 2036 initLookahead(&state, buf, status); 2037 2038 state.inputdir = inputDir; 2039 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; 2040 state.outputdir = outputDir; 2041 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; 2042 2043 ustr_init(&comment); 2044 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); 2045 2046 state.bundle = bundle_open(&comment, FALSE, status); 2047 2048 if (state.bundle == NULL || U_FAILURE(*status)) 2049 { 2050 return NULL; 2051 } 2052 2053 2054 bundle_setlocale(state.bundle, tokenValue->fChars, status); 2055 2056 /* The following code is to make Empty bundle work no matter with :table specifer or not */ 2057 token = getToken(&state, NULL, NULL, &line, status); 2058 if(token==TOK_COLON) { 2059 *status=U_ZERO_ERROR; 2060 bundleType=parseResourceType(&state, status); 2061 2062 if(isTable(bundleType)) 2063 { 2064 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status); 2065 } 2066 else 2067 { 2068 *status=U_PARSE_ERROR; 2069 /* printf("asdsdweqdasdad\n"); */ 2070 2071 error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); 2072 } 2073 } 2074 else 2075 { 2076 /* not a colon */ 2077 if(token==TOK_OPEN_BRACE) 2078 { 2079 *status=U_ZERO_ERROR; 2080 bundleType=RT_TABLE; 2081 } 2082 else 2083 { 2084 /* neither colon nor open brace */ 2085 *status=U_PARSE_ERROR; 2086 bundleType=RT_UNKNOWN; 2087 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); 2088 } 2089 } 2090 2091 if (U_FAILURE(*status)) 2092 { 2093 bundle_close(state.bundle, status); 2094 return NULL; 2095 } 2096 2097 if(bundleType==RT_TABLE_NO_FALLBACK) { 2098 /* 2099 * Parse a top-level table with the table(nofallback) declaration. 2100 * This is the same as a regular table, but also sets the 2101 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 2102 */ 2103 state.bundle->noFallback=TRUE; 2104 } 2105 /* top-level tables need not handle special table names like "collations" */ 2106 realParseTable(&state, state.bundle->fRoot, NULL, line, status); 2107 2108 if(dependencyArray!=NULL){ 2109 table_add(state.bundle->fRoot, dependencyArray, 0, status); 2110 dependencyArray = NULL; 2111 } 2112 if (U_FAILURE(*status)) 2113 { 2114 bundle_close(state.bundle, status); 2115 res_close(dependencyArray); 2116 return NULL; 2117 } 2118 2119 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF) 2120 { 2121 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); 2122 if(isStrict()){ 2123 *status = U_INVALID_FORMAT_ERROR; 2124 return NULL; 2125 } 2126 } 2127 2128 cleanupLookahead(&state); 2129 ustr_deinit(&comment); 2130 return state.bundle; 2131 } 2132