1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File parse.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/26/99 stephen Creation. 15 * 02/25/00 weiv Overhaul to write udata 16 * 5/10/01 Ram removed ustdio dependency 17 * 06/10/2001 Dominic Ludlam <dom (at) recoil.org> Rewritten 18 ******************************************************************************* 19 */ 20 21 #include "ucol_imp.h" 22 #include "parse.h" 23 #include "errmsg.h" 24 #include "uhash.h" 25 #include "cmemory.h" 26 #include "cstring.h" 27 #include "uinvchar.h" 28 #include "read.h" 29 #include "ustr.h" 30 #include "reslist.h" 31 #include "rbt_pars.h" 32 #include "unicode/ustring.h" 33 #include "unicode/putil.h" 34 #include <stdio.h> 35 36 /* Number of tokens to read ahead of the current stream position */ 37 #define MAX_LOOKAHEAD 3 38 39 #define CR 0x000D 40 #define LF 0x000A 41 #define SPACE 0x0020 42 #define TAB 0x0009 43 #define ESCAPE 0x005C 44 #define HASH 0x0023 45 #define QUOTE 0x0027 46 #define ZERO 0x0030 47 #define STARTCOMMAND 0x005B 48 #define ENDCOMMAND 0x005D 49 #define OPENSQBRACKET 0x005B 50 #define CLOSESQBRACKET 0x005D 51 52 typedef struct SResource * 53 ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); 54 55 struct Lookahead 56 { 57 enum ETokenType type; 58 struct UString value; 59 struct UString comment; 60 uint32_t line; 61 }; 62 63 /* keep in sync with token defines in read.h */ 64 const char *tokenNames[TOK_TOKEN_COUNT] = 65 { 66 "string", /* A string token, such as "MonthNames" */ 67 "'{'", /* An opening brace character */ 68 "'}'", /* A closing brace character */ 69 "','", /* A comma */ 70 "':'", /* A colon */ 71 72 "<end of file>", /* End of the file has been reached successfully */ 73 "<end of line>" 74 }; 75 76 /* Just to store "TRUE" */ 77 static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; 78 79 static struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; 80 static uint32_t lookaheadPosition; 81 static UCHARBUF *buffer; 82 83 static struct SRBRoot *bundle; 84 static const char *inputdir; 85 static uint32_t inputdirLength; 86 static const char *outputdir; 87 static uint32_t outputdirLength; 88 89 static UBool gMakeBinaryCollation = TRUE; 90 static UBool gOmitCollationRules = FALSE; 91 92 static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status); 93 94 /* The nature of the lookahead buffer: 95 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides 96 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 97 When getToken is called, the current pointer is moved to the next slot and the 98 old slot is filled with the next token from the reader by calling getNextToken. 99 The token values are stored in the slot, which means that token values don't 100 survive a call to getToken, ie. 101 102 UString *value; 103 104 getToken(&value, NULL, status); 105 getToken(NULL, NULL, status); bad - value is now a different string 106 */ 107 static void 108 initLookahead(UCHARBUF *buf, UErrorCode *status) 109 { 110 static uint32_t initTypeStrings = 0; 111 uint32_t i; 112 113 if (!initTypeStrings) 114 { 115 initTypeStrings = 1; 116 } 117 118 lookaheadPosition = 0; 119 buffer = buf; 120 121 resetLineNumber(); 122 123 for (i = 0; i < MAX_LOOKAHEAD; i++) 124 { 125 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status); 126 if (U_FAILURE(*status)) 127 { 128 return; 129 } 130 } 131 132 *status = U_ZERO_ERROR; 133 } 134 135 static void 136 cleanupLookahead() 137 { 138 uint32_t i; 139 for (i = 0; i < MAX_LOOKAHEAD; i++) 140 { 141 ustr_deinit(&lookahead[i].value); 142 ustr_deinit(&lookahead[i].comment); 143 } 144 145 } 146 147 static enum ETokenType 148 getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) 149 { 150 enum ETokenType result; 151 uint32_t i; 152 153 result = lookahead[lookaheadPosition].type; 154 155 if (tokenValue != NULL) 156 { 157 *tokenValue = &lookahead[lookaheadPosition].value; 158 } 159 160 if (linenumber != NULL) 161 { 162 *linenumber = lookahead[lookaheadPosition].line; 163 } 164 165 if (comment != NULL) 166 { 167 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status); 168 } 169 170 i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); 171 lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); 172 ustr_setlen(&lookahead[i].comment, 0, status); 173 ustr_setlen(&lookahead[i].value, 0, status); 174 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status); 175 176 /* printf("getToken, returning %s\n", tokenNames[result]); */ 177 178 return result; 179 } 180 181 static enum ETokenType 182 peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) 183 { 184 uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); 185 186 if (U_FAILURE(*status)) 187 { 188 return TOK_ERROR; 189 } 190 191 if (lookaheadCount >= MAX_LOOKAHEAD) 192 { 193 *status = U_INTERNAL_PROGRAM_ERROR; 194 return TOK_ERROR; 195 } 196 197 if (tokenValue != NULL) 198 { 199 *tokenValue = &lookahead[i].value; 200 } 201 202 if (linenumber != NULL) 203 { 204 *linenumber = lookahead[i].line; 205 } 206 207 if(comment != NULL){ 208 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status); 209 } 210 211 return lookahead[i].type; 212 } 213 214 static void 215 expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) 216 { 217 uint32_t line; 218 219 enum ETokenType token = getToken(tokenValue, comment, &line, status); 220 221 if (linenumber != NULL) 222 { 223 *linenumber = line; 224 } 225 226 if (U_FAILURE(*status)) 227 { 228 return; 229 } 230 231 if (token != expectedToken) 232 { 233 *status = U_INVALID_FORMAT_ERROR; 234 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); 235 } 236 else 237 { 238 *status = U_ZERO_ERROR; 239 } 240 } 241 242 static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status) 243 { 244 struct UString *tokenValue; 245 char *result; 246 uint32_t count; 247 248 expect(TOK_STRING, &tokenValue, comment, line, status); 249 250 if (U_FAILURE(*status)) 251 { 252 return NULL; 253 } 254 255 count = u_strlen(tokenValue->fChars); 256 if(!uprv_isInvariantUString(tokenValue->fChars, count)) { 257 *status = U_INVALID_FORMAT_ERROR; 258 error(*line, "invariant characters required for table keys, binary data, etc."); 259 return NULL; 260 } 261 262 result = uprv_malloc(count+1); 263 264 if (result == NULL) 265 { 266 *status = U_MEMORY_ALLOCATION_ERROR; 267 return NULL; 268 } 269 270 u_UCharsToChars(tokenValue->fChars, result, count+1); 271 return result; 272 } 273 274 static struct SResource * 275 parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 276 { 277 struct SResource *result = NULL; 278 struct UString *tokenValue; 279 FileStream *file = NULL; 280 char filename[256] = { '\0' }; 281 char cs[128] = { '\0' }; 282 uint32_t line; 283 int len=0; 284 UBool quoted = FALSE; 285 UCHARBUF *ucbuf=NULL; 286 UChar32 c = 0; 287 const char* cp = NULL; 288 UChar *pTarget = NULL; 289 UChar *target = NULL; 290 UChar *targetLimit = NULL; 291 int32_t size = 0; 292 293 expect(TOK_STRING, &tokenValue, NULL, &line, status); 294 295 if(isVerbose()){ 296 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 297 } 298 299 if (U_FAILURE(*status)) 300 { 301 return NULL; 302 } 303 /* make the filename including the directory */ 304 if (inputdir != NULL) 305 { 306 uprv_strcat(filename, inputdir); 307 308 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 309 { 310 uprv_strcat(filename, U_FILE_SEP_STRING); 311 } 312 } 313 314 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 315 316 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 317 318 if (U_FAILURE(*status)) 319 { 320 return NULL; 321 } 322 uprv_strcat(filename, cs); 323 324 if(gOmitCollationRules) { 325 return res_none(); 326 } 327 328 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 329 330 if (U_FAILURE(*status)) { 331 error(line, "An error occured while opening the input file %s\n", filename); 332 return NULL; 333 } 334 335 /* We allocate more space than actually required 336 * since the actual size needed for storing UChars 337 * is not known in UTF-8 byte stream 338 */ 339 size = ucbuf_size(ucbuf) + 1; 340 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); 341 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 342 target = pTarget; 343 targetLimit = pTarget+size; 344 345 /* read the rules into the buffer */ 346 while (target < targetLimit) 347 { 348 c = ucbuf_getc(ucbuf, status); 349 if(c == QUOTE) { 350 quoted = (UBool)!quoted; 351 } 352 /* weiv (06/26/2002): adding the following: 353 * - preserving spaces in commands [...] 354 * - # comments until the end of line 355 */ 356 if (c == STARTCOMMAND && !quoted) 357 { 358 /* preserve commands 359 * closing bracket will be handled by the 360 * append at the end of the loop 361 */ 362 while(c != ENDCOMMAND) { 363 U_APPEND_CHAR32(c, target,len); 364 c = ucbuf_getc(ucbuf, status); 365 } 366 } 367 else if (c == HASH && !quoted) { 368 /* skip comments */ 369 while(c != CR && c != LF) { 370 c = ucbuf_getc(ucbuf, status); 371 } 372 continue; 373 } 374 else if (c == ESCAPE) 375 { 376 c = unescape(ucbuf, status); 377 378 if (c == U_ERR) 379 { 380 uprv_free(pTarget); 381 T_FileStream_close(file); 382 return NULL; 383 } 384 } 385 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) 386 { 387 /* ignore spaces carriage returns 388 * and line feed unless in the form \uXXXX 389 */ 390 continue; 391 } 392 393 /* Append UChar * after dissembling if c > 0xffff*/ 394 if (c != U_EOF) 395 { 396 U_APPEND_CHAR32(c, target,len); 397 } 398 else 399 { 400 break; 401 } 402 } 403 404 /* terminate the string */ 405 if(target < targetLimit){ 406 *target = 0x0000; 407 } 408 409 result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); 410 411 412 ucbuf_close(ucbuf); 413 uprv_free(pTarget); 414 T_FileStream_close(file); 415 416 return result; 417 } 418 419 static struct SResource * 420 parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 421 { 422 struct SResource *result = NULL; 423 struct UString *tokenValue; 424 FileStream *file = NULL; 425 char filename[256] = { '\0' }; 426 char cs[128] = { '\0' }; 427 uint32_t line; 428 UCHARBUF *ucbuf=NULL; 429 const char* cp = NULL; 430 UChar *pTarget = NULL; 431 const UChar *pSource = NULL; 432 int32_t size = 0; 433 434 expect(TOK_STRING, &tokenValue, NULL, &line, status); 435 436 if(isVerbose()){ 437 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 438 } 439 440 if (U_FAILURE(*status)) 441 { 442 return NULL; 443 } 444 /* make the filename including the directory */ 445 if (inputdir != NULL) 446 { 447 uprv_strcat(filename, inputdir); 448 449 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 450 { 451 uprv_strcat(filename, U_FILE_SEP_STRING); 452 } 453 } 454 455 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 456 457 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 458 459 if (U_FAILURE(*status)) 460 { 461 return NULL; 462 } 463 uprv_strcat(filename, cs); 464 465 466 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 467 468 if (U_FAILURE(*status)) { 469 error(line, "An error occured while opening the input file %s\n", filename); 470 return NULL; 471 } 472 473 /* We allocate more space than actually required 474 * since the actual size needed for storing UChars 475 * is not known in UTF-8 byte stream 476 */ 477 pSource = ucbuf_getBuffer(ucbuf, &size, status); 478 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); 479 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 480 481 #if !UCONFIG_NO_TRANSLITERATION 482 size = utrans_stripRules(pSource, size, pTarget, status); 483 #else 484 size = 0; 485 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); 486 #endif 487 result = string_open(bundle, tag, pTarget, size, NULL, status); 488 489 ucbuf_close(ucbuf); 490 uprv_free(pTarget); 491 T_FileStream_close(file); 492 493 return result; 494 } 495 static struct SResource* dependencyArray = NULL; 496 497 static struct SResource * 498 parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 499 { 500 struct SResource *result = NULL; 501 struct SResource *elem = NULL; 502 struct UString *tokenValue; 503 uint32_t line; 504 char filename[256] = { '\0' }; 505 char cs[128] = { '\0' }; 506 507 expect(TOK_STRING, &tokenValue, NULL, &line, status); 508 509 if(isVerbose()){ 510 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 511 } 512 513 if (U_FAILURE(*status)) 514 { 515 return NULL; 516 } 517 /* make the filename including the directory */ 518 if (outputdir != NULL) 519 { 520 uprv_strcat(filename, outputdir); 521 522 if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR) 523 { 524 uprv_strcat(filename, U_FILE_SEP_STRING); 525 } 526 } 527 528 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 529 530 if (U_FAILURE(*status)) 531 { 532 return NULL; 533 } 534 uprv_strcat(filename, cs); 535 if(!T_FileStream_file_exists(filename)){ 536 if(isStrict()){ 537 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 538 }else{ 539 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 540 } 541 } 542 if(dependencyArray==NULL){ 543 dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status); 544 } 545 if(tag!=NULL){ 546 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 547 } 548 elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); 549 550 array_add(dependencyArray, elem, status); 551 552 if (U_FAILURE(*status)) 553 { 554 return NULL; 555 } 556 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 557 return result; 558 } 559 static struct SResource * 560 parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 561 { 562 struct UString *tokenValue; 563 struct SResource *result = NULL; 564 565 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) 566 { 567 return parseUCARules(tag, startline, status); 568 }*/ 569 if(isVerbose()){ 570 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 571 } 572 expect(TOK_STRING, &tokenValue, NULL, NULL, status); 573 574 if (U_SUCCESS(*status)) 575 { 576 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 577 doesn't survive expect either) */ 578 579 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 580 if(U_SUCCESS(*status) && result) { 581 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 582 583 if (U_FAILURE(*status)) 584 { 585 res_close(result); 586 return NULL; 587 } 588 } 589 } 590 591 return result; 592 } 593 594 static struct SResource * 595 parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 596 { 597 struct UString *tokenValue; 598 struct SResource *result = NULL; 599 600 expect(TOK_STRING, &tokenValue, NULL, NULL, status); 601 602 if(isVerbose()){ 603 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 604 } 605 606 if (U_SUCCESS(*status)) 607 { 608 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 609 doesn't survive expect either) */ 610 611 result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 612 613 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 614 615 if (U_FAILURE(*status)) 616 { 617 res_close(result); 618 return NULL; 619 } 620 } 621 622 return result; 623 } 624 625 static struct SResource * 626 addCollation(struct SResource *result, uint32_t startline, UErrorCode *status) 627 { 628 struct SResource *member = NULL; 629 struct UString *tokenValue; 630 struct UString comment; 631 enum ETokenType token; 632 char subtag[1024]; 633 UVersionInfo version; 634 UBool override = FALSE; 635 uint32_t line; 636 /* '{' . (name resource)* '}' */ 637 version[0]=0; version[1]=0; version[2]=0; version[3]=0; 638 639 for (;;) 640 { 641 ustr_init(&comment); 642 token = getToken(&tokenValue, &comment, &line, status); 643 644 if (token == TOK_CLOSE_BRACE) 645 { 646 return result; 647 } 648 649 if (token != TOK_STRING) 650 { 651 res_close(result); 652 *status = U_INVALID_FORMAT_ERROR; 653 654 if (token == TOK_EOF) 655 { 656 error(startline, "unterminated table"); 657 } 658 else 659 { 660 error(line, "Unexpected token %s", tokenNames[token]); 661 } 662 663 return NULL; 664 } 665 666 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 667 668 if (U_FAILURE(*status)) 669 { 670 res_close(result); 671 return NULL; 672 } 673 674 member = parseResource(subtag, NULL, status); 675 676 if (U_FAILURE(*status)) 677 { 678 res_close(result); 679 return NULL; 680 } 681 682 if (uprv_strcmp(subtag, "Version") == 0) 683 { 684 char ver[40]; 685 int32_t length = member->u.fString.fLength; 686 687 if (length >= (int32_t) sizeof(ver)) 688 { 689 length = (int32_t) sizeof(ver) - 1; 690 } 691 692 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */ 693 u_versionFromString(version, ver); 694 695 table_add(result, member, line, status); 696 697 } 698 else if (uprv_strcmp(subtag, "Override") == 0) 699 { 700 override = FALSE; 701 702 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0) 703 { 704 override = TRUE; 705 } 706 table_add(result, member, line, status); 707 708 } 709 else if(uprv_strcmp(subtag, "%%CollationBin")==0) 710 { 711 /* discard duplicate %%CollationBin if any*/ 712 } 713 else if (uprv_strcmp(subtag, "Sequence") == 0) 714 { 715 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO 716 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); 717 #else 718 if(gMakeBinaryCollation) { 719 UErrorCode intStatus = U_ZERO_ERROR; 720 721 /* do the collation elements */ 722 int32_t len = 0; 723 uint8_t *data = NULL; 724 UCollator *coll = NULL; 725 UParseError parseError; 726 727 coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength, 728 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus); 729 730 if (U_SUCCESS(intStatus) && coll != NULL) 731 { 732 len = ucol_cloneBinary(coll, NULL, 0, &intStatus); 733 data = (uint8_t *)uprv_malloc(len); 734 intStatus = U_ZERO_ERROR; 735 len = ucol_cloneBinary(coll, data, len, &intStatus); 736 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/ 737 738 /* tailoring rules version */ 739 /* This is wrong! */ 740 /*coll->dataInfo.dataVersion[1] = version[0];*/ 741 /* Copy tailoring version. Builder version already */ 742 /* set in ucol_openRules */ 743 ((UCATableHeader *)data)->version[1] = version[0]; 744 ((UCATableHeader *)data)->version[2] = version[1]; 745 ((UCATableHeader *)data)->version[3] = version[2]; 746 747 if (U_SUCCESS(intStatus) && data != NULL) 748 { 749 struct SResource *collationBin = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status); 750 table_add(result, collationBin, line, status); 751 uprv_free(data); 752 } 753 else 754 { 755 warning(line, "could not obtain rules from collator"); 756 if(isStrict()){ 757 *status = U_INVALID_FORMAT_ERROR; 758 return NULL; 759 } 760 } 761 762 ucol_close(coll); 763 } 764 else 765 { 766 if(intStatus == U_FILE_ACCESS_ERROR) { 767 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly."); 768 *status = intStatus; 769 return NULL; 770 } 771 warning(line, "%%Collation could not be constructed from CollationElements - check context!"); 772 if(isStrict()){ 773 *status = intStatus; 774 return NULL; 775 } 776 } 777 } else { 778 if(isVerbose()) { 779 printf("Not building Collation binary\n"); 780 } 781 } 782 #endif 783 /* in order to achieve smaller data files, we can direct genrb */ 784 /* to omit collation rules */ 785 if(gOmitCollationRules) { 786 bundle_closeString(bundle, member); 787 } else { 788 table_add(result, member, line, status); 789 } 790 } 791 792 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 793 794 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 795 796 if (U_FAILURE(*status)) 797 { 798 res_close(result); 799 return NULL; 800 } 801 } 802 803 /* not reached */ 804 /* A compiler warning will appear if all paths don't contain a return statement. */ 805 /* *status = U_INTERNAL_PROGRAM_ERROR; 806 return NULL;*/ 807 } 808 809 static struct SResource * 810 parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) 811 { 812 struct SResource *result = NULL; 813 struct SResource *member = NULL; 814 struct SResource *collationRes = NULL; 815 struct UString *tokenValue; 816 struct UString comment; 817 enum ETokenType token; 818 char subtag[1024], typeKeyword[1024]; 819 uint32_t line; 820 821 result = table_open(bundle, tag, NULL, status); 822 823 if (result == NULL || U_FAILURE(*status)) 824 { 825 return NULL; 826 } 827 if(isVerbose()){ 828 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 829 } 830 if(!newCollation) { 831 return addCollation(result, startline, status); 832 } 833 else { 834 for(;;) { 835 ustr_init(&comment); 836 token = getToken(&tokenValue, &comment, &line, status); 837 838 if (token == TOK_CLOSE_BRACE) 839 { 840 return result; 841 } 842 843 if (token != TOK_STRING) 844 { 845 res_close(result); 846 *status = U_INVALID_FORMAT_ERROR; 847 848 if (token == TOK_EOF) 849 { 850 error(startline, "unterminated table"); 851 } 852 else 853 { 854 error(line, "Unexpected token %s", tokenNames[token]); 855 } 856 857 return NULL; 858 } 859 860 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 861 862 if (U_FAILURE(*status)) 863 { 864 res_close(result); 865 return NULL; 866 } 867 868 if (uprv_strcmp(subtag, "default") == 0) 869 { 870 member = parseResource(subtag, NULL, status); 871 872 if (U_FAILURE(*status)) 873 { 874 res_close(result); 875 return NULL; 876 } 877 878 table_add(result, member, line, status); 879 } 880 else 881 { 882 token = peekToken(0, &tokenValue, &line, &comment, status); 883 /* this probably needs to be refactored or recursively use the parser */ 884 /* first we assume that our collation table won't have the explicit type */ 885 /* then, we cannot handle aliases */ 886 if(token == TOK_OPEN_BRACE) { 887 token = getToken(&tokenValue, &comment, &line, status); 888 collationRes = table_open(bundle, subtag, NULL, status); 889 table_add(result, addCollation(collationRes, startline, status), startline, status); 890 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ 891 /* we could have a table too */ 892 token = peekToken(1, &tokenValue, &line, &comment, status); 893 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); 894 if(uprv_strcmp(typeKeyword, "alias") == 0) { 895 member = parseResource(subtag, NULL, status); 896 897 if (U_FAILURE(*status)) 898 { 899 res_close(result); 900 return NULL; 901 } 902 903 table_add(result, member, line, status); 904 } else { 905 res_close(result); 906 *status = U_INVALID_FORMAT_ERROR; 907 return NULL; 908 } 909 } else { 910 res_close(result); 911 *status = U_INVALID_FORMAT_ERROR; 912 return NULL; 913 } 914 } 915 916 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 917 918 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 919 920 if (U_FAILURE(*status)) 921 { 922 res_close(result); 923 return NULL; 924 } 925 } 926 } 927 } 928 929 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, 930 if this weren't special-cased, wouldn't be set until the entire file had been processed. */ 931 static struct SResource * 932 realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status) 933 { 934 struct SResource *member = NULL; 935 struct UString *tokenValue=NULL; 936 struct UString comment; 937 enum ETokenType token; 938 char subtag[1024]; 939 uint32_t line; 940 UBool readToken = FALSE; 941 942 /* '{' . (name resource)* '}' */ 943 if(isVerbose()){ 944 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 945 } 946 for (;;) 947 { 948 ustr_init(&comment); 949 token = getToken(&tokenValue, &comment, &line, status); 950 951 if (token == TOK_CLOSE_BRACE) 952 { 953 if (!readToken) { 954 warning(startline, "Encountered empty table"); 955 } 956 return table; 957 } 958 959 if (token != TOK_STRING) 960 { 961 *status = U_INVALID_FORMAT_ERROR; 962 963 if (token == TOK_EOF) 964 { 965 error(startline, "unterminated table"); 966 } 967 else 968 { 969 error(line, "unexpected token %s", tokenNames[token]); 970 } 971 972 return NULL; 973 } 974 975 if(uprv_isInvariantUString(tokenValue->fChars, -1)) { 976 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 977 } else { 978 *status = U_INVALID_FORMAT_ERROR; 979 error(line, "invariant characters required for table keys"); 980 return NULL; 981 } 982 983 if (U_FAILURE(*status)) 984 { 985 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); 986 return NULL; 987 } 988 989 member = parseResource(subtag, &comment, status); 990 991 if (member == NULL || U_FAILURE(*status)) 992 { 993 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); 994 return NULL; 995 } 996 997 table_add(table, member, line, status); 998 999 if (U_FAILURE(*status)) 1000 { 1001 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); 1002 return NULL; 1003 } 1004 readToken = TRUE; 1005 ustr_deinit(&comment); 1006 } 1007 1008 /* not reached */ 1009 /* A compiler warning will appear if all paths don't contain a return statement. */ 1010 /* *status = U_INTERNAL_PROGRAM_ERROR; 1011 return NULL;*/ 1012 } 1013 1014 static struct SResource * 1015 parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1016 { 1017 struct SResource *result; 1018 1019 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) 1020 { 1021 return parseCollationElements(tag, startline, FALSE, status); 1022 } 1023 if (tag != NULL && uprv_strcmp(tag, "collations") == 0) 1024 { 1025 return parseCollationElements(tag, startline, TRUE, status); 1026 } 1027 if(isVerbose()){ 1028 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1029 } 1030 1031 result = table_open(bundle, tag, comment, status); 1032 1033 if (result == NULL || U_FAILURE(*status)) 1034 { 1035 return NULL; 1036 } 1037 1038 return realParseTable(result, tag, startline, status); 1039 } 1040 1041 static struct SResource * 1042 parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1043 { 1044 struct SResource *result = NULL; 1045 struct SResource *member = NULL; 1046 struct UString *tokenValue; 1047 struct UString memberComments; 1048 enum ETokenType token; 1049 UBool readToken = FALSE; 1050 1051 result = array_open(bundle, tag, comment, status); 1052 1053 if (result == NULL || U_FAILURE(*status)) 1054 { 1055 return NULL; 1056 } 1057 if(isVerbose()){ 1058 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1059 } 1060 1061 ustr_init(&memberComments); 1062 1063 /* '{' . resource [','] '}' */ 1064 for (;;) 1065 { 1066 /* reset length */ 1067 ustr_setlen(&memberComments, 0, status); 1068 1069 /* check for end of array, but don't consume next token unless it really is the end */ 1070 token = peekToken(0, &tokenValue, NULL, &memberComments, status); 1071 1072 1073 if (token == TOK_CLOSE_BRACE) 1074 { 1075 getToken(NULL, NULL, NULL, status); 1076 if (!readToken) { 1077 warning(startline, "Encountered empty array"); 1078 } 1079 break; 1080 } 1081 1082 if (token == TOK_EOF) 1083 { 1084 res_close(result); 1085 *status = U_INVALID_FORMAT_ERROR; 1086 error(startline, "unterminated array"); 1087 return NULL; 1088 } 1089 1090 /* string arrays are a special case */ 1091 if (token == TOK_STRING) 1092 { 1093 getToken(&tokenValue, &memberComments, NULL, status); 1094 member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); 1095 } 1096 else 1097 { 1098 member = parseResource(NULL, &memberComments, status); 1099 } 1100 1101 if (member == NULL || U_FAILURE(*status)) 1102 { 1103 res_close(result); 1104 return NULL; 1105 } 1106 1107 array_add(result, member, status); 1108 1109 if (U_FAILURE(*status)) 1110 { 1111 res_close(result); 1112 return NULL; 1113 } 1114 1115 /* eat optional comma if present */ 1116 token = peekToken(0, NULL, NULL, NULL, status); 1117 1118 if (token == TOK_COMMA) 1119 { 1120 getToken(NULL, NULL, NULL, status); 1121 } 1122 1123 if (U_FAILURE(*status)) 1124 { 1125 res_close(result); 1126 return NULL; 1127 } 1128 readToken = TRUE; 1129 } 1130 1131 ustr_deinit(&memberComments); 1132 return result; 1133 } 1134 1135 static struct SResource * 1136 parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1137 { 1138 struct SResource *result = NULL; 1139 enum ETokenType token; 1140 char *string; 1141 int32_t value; 1142 UBool readToken = FALSE; 1143 char *stopstring; 1144 uint32_t len; 1145 struct UString memberComments; 1146 1147 result = intvector_open(bundle, tag, comment, status); 1148 1149 if (result == NULL || U_FAILURE(*status)) 1150 { 1151 return NULL; 1152 } 1153 1154 if(isVerbose()){ 1155 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1156 } 1157 ustr_init(&memberComments); 1158 /* '{' . string [','] '}' */ 1159 for (;;) 1160 { 1161 ustr_setlen(&memberComments, 0, status); 1162 1163 /* check for end of array, but don't consume next token unless it really is the end */ 1164 token = peekToken(0, NULL, NULL,&memberComments, status); 1165 1166 if (token == TOK_CLOSE_BRACE) 1167 { 1168 /* it's the end, consume the close brace */ 1169 getToken(NULL, NULL, NULL, status); 1170 if (!readToken) { 1171 warning(startline, "Encountered empty int vector"); 1172 } 1173 ustr_deinit(&memberComments); 1174 return result; 1175 } 1176 1177 string = getInvariantString(NULL, NULL, status); 1178 1179 if (U_FAILURE(*status)) 1180 { 1181 res_close(result); 1182 return NULL; 1183 } 1184 1185 /* For handling illegal char in the Intvector */ 1186 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ 1187 len=(uint32_t)(stopstring-string); 1188 1189 if(len==uprv_strlen(string)) 1190 { 1191 intvector_add(result, value, status); 1192 uprv_free(string); 1193 token = peekToken(0, NULL, NULL, NULL, status); 1194 } 1195 else 1196 { 1197 uprv_free(string); 1198 *status=U_INVALID_CHAR_FOUND; 1199 } 1200 1201 if (U_FAILURE(*status)) 1202 { 1203 res_close(result); 1204 return NULL; 1205 } 1206 1207 /* the comma is optional (even though it is required to prevent the reader from concatenating 1208 consecutive entries) so that a missing comma on the last entry isn't an error */ 1209 if (token == TOK_COMMA) 1210 { 1211 getToken(NULL, NULL, NULL, status); 1212 } 1213 readToken = TRUE; 1214 } 1215 1216 /* not reached */ 1217 /* A compiler warning will appear if all paths don't contain a return statement. */ 1218 /* intvector_close(result, status); 1219 *status = U_INTERNAL_PROGRAM_ERROR; 1220 return NULL;*/ 1221 } 1222 1223 static struct SResource * 1224 parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1225 { 1226 struct SResource *result = NULL; 1227 uint8_t *value; 1228 char *string; 1229 char toConv[3] = {'\0', '\0', '\0'}; 1230 uint32_t count; 1231 uint32_t i; 1232 uint32_t line; 1233 char *stopstring; 1234 uint32_t len; 1235 1236 string = getInvariantString(&line, NULL, status); 1237 1238 if (string == NULL || U_FAILURE(*status)) 1239 { 1240 return NULL; 1241 } 1242 1243 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1244 1245 if (U_FAILURE(*status)) 1246 { 1247 uprv_free(string); 1248 return NULL; 1249 } 1250 1251 if(isVerbose()){ 1252 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1253 } 1254 1255 count = (uint32_t)uprv_strlen(string); 1256 if (count > 0){ 1257 if((count % 2)==0){ 1258 value = uprv_malloc(sizeof(uint8_t) * count); 1259 1260 if (value == NULL) 1261 { 1262 uprv_free(string); 1263 *status = U_MEMORY_ALLOCATION_ERROR; 1264 return NULL; 1265 } 1266 1267 for (i = 0; i < count; i += 2) 1268 { 1269 toConv[0] = string[i]; 1270 toConv[1] = string[i + 1]; 1271 1272 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); 1273 len=(uint32_t)(stopstring-toConv); 1274 1275 if(len!=uprv_strlen(toConv)) 1276 { 1277 uprv_free(string); 1278 *status=U_INVALID_CHAR_FOUND; 1279 return NULL; 1280 } 1281 } 1282 1283 result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status); 1284 1285 uprv_free(value); 1286 } 1287 else 1288 { 1289 *status = U_INVALID_CHAR_FOUND; 1290 uprv_free(string); 1291 error(line, "Encountered invalid binary string"); 1292 return NULL; 1293 } 1294 } 1295 else 1296 { 1297 result = bin_open(bundle, tag, 0, NULL, "",comment,status); 1298 warning(startline, "Encountered empty binary tag"); 1299 } 1300 uprv_free(string); 1301 1302 return result; 1303 } 1304 1305 static struct SResource * 1306 parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1307 { 1308 struct SResource *result = NULL; 1309 int32_t value; 1310 char *string; 1311 char *stopstring; 1312 uint32_t len; 1313 1314 string = getInvariantString(NULL, NULL, status); 1315 1316 if (string == NULL || U_FAILURE(*status)) 1317 { 1318 return NULL; 1319 } 1320 1321 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1322 1323 if (U_FAILURE(*status)) 1324 { 1325 uprv_free(string); 1326 return NULL; 1327 } 1328 1329 if(isVerbose()){ 1330 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1331 } 1332 1333 if (uprv_strlen(string) <= 0) 1334 { 1335 warning(startline, "Encountered empty integer. Default value is 0."); 1336 } 1337 1338 /* Allow integer support for hexdecimal, octal digit and decimal*/ 1339 /* and handle illegal char in the integer*/ 1340 value = uprv_strtoul(string, &stopstring, 0); 1341 len=(uint32_t)(stopstring-string); 1342 if(len==uprv_strlen(string)) 1343 { 1344 result = int_open(bundle, tag, value, comment, status); 1345 } 1346 else 1347 { 1348 *status=U_INVALID_CHAR_FOUND; 1349 } 1350 uprv_free(string); 1351 1352 return result; 1353 } 1354 1355 static struct SResource * 1356 parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1357 { 1358 struct SResource *result; 1359 FileStream *file; 1360 int32_t len; 1361 uint8_t *data; 1362 char *filename; 1363 uint32_t line; 1364 char *fullname = NULL; 1365 int32_t numRead = 0; 1366 filename = getInvariantString(&line, NULL, status); 1367 1368 if (U_FAILURE(*status)) 1369 { 1370 return NULL; 1371 } 1372 1373 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1374 1375 if (U_FAILURE(*status)) 1376 { 1377 uprv_free(filename); 1378 return NULL; 1379 } 1380 1381 if(isVerbose()){ 1382 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1383 } 1384 1385 /* Open the input file for reading */ 1386 if (inputdir == NULL) 1387 { 1388 #if 1 1389 /* 1390 * Always save file file name, even if there's 1391 * no input directory specified. MIGHT BREAK SOMETHING 1392 */ 1393 int32_t filenameLength = uprv_strlen(filename); 1394 1395 fullname = (char *) uprv_malloc(filenameLength + 1); 1396 uprv_strcpy(fullname, filename); 1397 #endif 1398 1399 file = T_FileStream_open(filename, "rb"); 1400 } 1401 else 1402 { 1403 1404 int32_t count = (int32_t)uprv_strlen(filename); 1405 1406 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 1407 { 1408 fullname = (char *) uprv_malloc(inputdirLength + count + 2); 1409 1410 /* test for NULL */ 1411 if(fullname == NULL) 1412 { 1413 *status = U_MEMORY_ALLOCATION_ERROR; 1414 return NULL; 1415 } 1416 1417 uprv_strcpy(fullname, inputdir); 1418 1419 fullname[inputdirLength] = U_FILE_SEP_CHAR; 1420 fullname[inputdirLength + 1] = '\0'; 1421 1422 uprv_strcat(fullname, filename); 1423 } 1424 else 1425 { 1426 fullname = (char *) uprv_malloc(inputdirLength + count + 1); 1427 1428 /* test for NULL */ 1429 if(fullname == NULL) 1430 { 1431 *status = U_MEMORY_ALLOCATION_ERROR; 1432 return NULL; 1433 } 1434 1435 uprv_strcpy(fullname, inputdir); 1436 uprv_strcat(fullname, filename); 1437 } 1438 1439 file = T_FileStream_open(fullname, "rb"); 1440 1441 } 1442 1443 if (file == NULL) 1444 { 1445 error(line, "couldn't open input file %s", filename); 1446 *status = U_FILE_ACCESS_ERROR; 1447 return NULL; 1448 } 1449 1450 len = T_FileStream_size(file); 1451 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t)); 1452 /* test for NULL */ 1453 if(data == NULL) 1454 { 1455 *status = U_MEMORY_ALLOCATION_ERROR; 1456 T_FileStream_close (file); 1457 return NULL; 1458 } 1459 1460 numRead = T_FileStream_read (file, data, len); 1461 T_FileStream_close (file); 1462 1463 result = bin_open(bundle, tag, len, data, fullname, comment, status); 1464 1465 uprv_free(data); 1466 uprv_free(filename); 1467 uprv_free(fullname); 1468 1469 return result; 1470 } 1471 1472 static struct SResource * 1473 parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1474 { 1475 struct SResource *result; 1476 int32_t len=0; 1477 char *filename; 1478 uint32_t line; 1479 UChar *pTarget = NULL; 1480 1481 UCHARBUF *ucbuf; 1482 char *fullname = NULL; 1483 int32_t count = 0; 1484 const char* cp = NULL; 1485 const UChar* uBuffer = NULL; 1486 1487 filename = getInvariantString(&line, NULL, status); 1488 count = (int32_t)uprv_strlen(filename); 1489 1490 if (U_FAILURE(*status)) 1491 { 1492 return NULL; 1493 } 1494 1495 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1496 1497 if (U_FAILURE(*status)) 1498 { 1499 uprv_free(filename); 1500 return NULL; 1501 } 1502 1503 if(isVerbose()){ 1504 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1505 } 1506 1507 fullname = (char *) uprv_malloc(inputdirLength + count + 2); 1508 /* test for NULL */ 1509 if(fullname == NULL) 1510 { 1511 *status = U_MEMORY_ALLOCATION_ERROR; 1512 uprv_free(filename); 1513 return NULL; 1514 } 1515 1516 if(inputdir!=NULL){ 1517 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 1518 { 1519 1520 uprv_strcpy(fullname, inputdir); 1521 1522 fullname[inputdirLength] = U_FILE_SEP_CHAR; 1523 fullname[inputdirLength + 1] = '\0'; 1524 1525 uprv_strcat(fullname, filename); 1526 } 1527 else 1528 { 1529 uprv_strcpy(fullname, inputdir); 1530 uprv_strcat(fullname, filename); 1531 } 1532 }else{ 1533 uprv_strcpy(fullname,filename); 1534 } 1535 1536 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); 1537 1538 if (U_FAILURE(*status)) { 1539 error(line, "couldn't open input file %s\n", filename); 1540 return NULL; 1541 } 1542 1543 uBuffer = ucbuf_getBuffer(ucbuf,&len,status); 1544 result = string_open(bundle, tag, uBuffer, len, comment, status); 1545 1546 uprv_free(pTarget); 1547 1548 uprv_free(filename); 1549 uprv_free(fullname); 1550 1551 return result; 1552 } 1553 1554 1555 1556 1557 1558 U_STRING_DECL(k_type_string, "string", 6); 1559 U_STRING_DECL(k_type_binary, "binary", 6); 1560 U_STRING_DECL(k_type_bin, "bin", 3); 1561 U_STRING_DECL(k_type_table, "table", 5); 1562 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); 1563 U_STRING_DECL(k_type_int, "int", 3); 1564 U_STRING_DECL(k_type_integer, "integer", 7); 1565 U_STRING_DECL(k_type_array, "array", 5); 1566 U_STRING_DECL(k_type_alias, "alias", 5); 1567 U_STRING_DECL(k_type_intvector, "intvector", 9); 1568 U_STRING_DECL(k_type_import, "import", 6); 1569 U_STRING_DECL(k_type_include, "include", 7); 1570 U_STRING_DECL(k_type_reserved, "reserved", 8); 1571 1572 /* Various non-standard processing plugins that create one or more special resources. */ 1573 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1574 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); 1575 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); 1576 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); 1577 1578 typedef enum EResourceType 1579 { 1580 RT_UNKNOWN, 1581 RT_STRING, 1582 RT_BINARY, 1583 RT_TABLE, 1584 RT_TABLE_NO_FALLBACK, 1585 RT_INTEGER, 1586 RT_ARRAY, 1587 RT_ALIAS, 1588 RT_INTVECTOR, 1589 RT_IMPORT, 1590 RT_INCLUDE, 1591 RT_PROCESS_UCA_RULES, 1592 RT_PROCESS_COLLATION, 1593 RT_PROCESS_TRANSLITERATOR, 1594 RT_PROCESS_DEPENDENCY, 1595 RT_RESERVED 1596 } EResourceType; 1597 1598 static struct { 1599 const char *nameChars; /* only used for debugging */ 1600 const UChar *nameUChars; 1601 ParseResourceFunction *parseFunction; 1602 } gResourceTypes[] = { 1603 {"Unknown", NULL, NULL}, 1604 {"string", k_type_string, parseString}, 1605 {"binary", k_type_binary, parseBinary}, 1606 {"table", k_type_table, parseTable}, 1607 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ 1608 {"integer", k_type_integer, parseInteger}, 1609 {"array", k_type_array, parseArray}, 1610 {"alias", k_type_alias, parseAlias}, 1611 {"intvector", k_type_intvector, parseIntVector}, 1612 {"import", k_type_import, parseImport}, 1613 {"include", k_type_include, parseInclude}, 1614 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, 1615 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, 1616 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, 1617 {"process(dependency)", k_type_plugin_dependency, parseDependency}, 1618 {"reserved", NULL, NULL} 1619 }; 1620 1621 void initParser(UBool omitBinaryCollation, UBool omitCollationRules) 1622 { 1623 uint32_t i; 1624 1625 U_STRING_INIT(k_type_string, "string", 6); 1626 U_STRING_INIT(k_type_binary, "binary", 6); 1627 U_STRING_INIT(k_type_bin, "bin", 3); 1628 U_STRING_INIT(k_type_table, "table", 5); 1629 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); 1630 U_STRING_INIT(k_type_int, "int", 3); 1631 U_STRING_INIT(k_type_integer, "integer", 7); 1632 U_STRING_INIT(k_type_array, "array", 5); 1633 U_STRING_INIT(k_type_alias, "alias", 5); 1634 U_STRING_INIT(k_type_intvector, "intvector", 9); 1635 U_STRING_INIT(k_type_import, "import", 6); 1636 U_STRING_INIT(k_type_reserved, "reserved", 8); 1637 U_STRING_INIT(k_type_include, "include", 7); 1638 1639 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1640 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); 1641 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); 1642 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); 1643 1644 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) 1645 { 1646 ustr_init(&lookahead[i].value); 1647 } 1648 gMakeBinaryCollation = !omitBinaryCollation; 1649 gOmitCollationRules = omitCollationRules; 1650 } 1651 1652 static U_INLINE UBool isTable(enum EResourceType type) { 1653 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); 1654 } 1655 1656 static enum EResourceType 1657 parseResourceType(UErrorCode *status) 1658 { 1659 struct UString *tokenValue; 1660 struct UString comment; 1661 enum EResourceType result = RT_UNKNOWN; 1662 uint32_t line=0; 1663 ustr_init(&comment); 1664 expect(TOK_STRING, &tokenValue, &comment, &line, status); 1665 1666 if (U_FAILURE(*status)) 1667 { 1668 return RT_UNKNOWN; 1669 } 1670 1671 *status = U_ZERO_ERROR; 1672 1673 /* Search for normal types */ 1674 result=RT_UNKNOWN; 1675 while (++result < RT_RESERVED) { 1676 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { 1677 break; 1678 } 1679 } 1680 /* Now search for the aliases */ 1681 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { 1682 result = RT_INTEGER; 1683 } 1684 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { 1685 result = RT_BINARY; 1686 } 1687 else if (result == RT_RESERVED) { 1688 char tokenBuffer[1024]; 1689 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); 1690 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; 1691 *status = U_INVALID_FORMAT_ERROR; 1692 error(line, "unknown resource type '%s'", tokenBuffer); 1693 } 1694 1695 return result; 1696 } 1697 1698 /* parse a non-top-level resource */ 1699 static struct SResource * 1700 parseResource(char *tag, const struct UString *comment, UErrorCode *status) 1701 { 1702 enum ETokenType token; 1703 enum EResourceType resType = RT_UNKNOWN; 1704 ParseResourceFunction *parseFunction = NULL; 1705 struct UString *tokenValue; 1706 uint32_t startline; 1707 uint32_t line; 1708 1709 token = getToken(&tokenValue, NULL, &startline, status); 1710 1711 if(isVerbose()){ 1712 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1713 } 1714 1715 /* name . [ ':' type ] '{' resource '}' */ 1716 /* This function parses from the colon onwards. If the colon is present, parse the 1717 type then try to parse a resource of that type. If there is no explicit type, 1718 work it out using the lookahead tokens. */ 1719 switch (token) 1720 { 1721 case TOK_EOF: 1722 *status = U_INVALID_FORMAT_ERROR; 1723 error(startline, "Unexpected EOF encountered"); 1724 return NULL; 1725 1726 case TOK_ERROR: 1727 *status = U_INVALID_FORMAT_ERROR; 1728 return NULL; 1729 1730 case TOK_COLON: 1731 resType = parseResourceType(status); 1732 expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); 1733 1734 if (U_FAILURE(*status)) 1735 { 1736 return NULL; 1737 } 1738 1739 break; 1740 1741 case TOK_OPEN_BRACE: 1742 break; 1743 1744 default: 1745 *status = U_INVALID_FORMAT_ERROR; 1746 error(startline, "syntax error while reading a resource, expected '{' or ':'"); 1747 return NULL; 1748 } 1749 1750 if (resType == RT_UNKNOWN) 1751 { 1752 /* No explicit type, so try to work it out. At this point, we've read the first '{'. 1753 We could have any of the following: 1754 { { => array (nested) 1755 { :/} => array 1756 { string , => string array 1757 1758 { string { => table 1759 1760 { string :/{ => table 1761 { string } => string 1762 */ 1763 1764 token = peekToken(0, NULL, &line, NULL,status); 1765 1766 if (U_FAILURE(*status)) 1767 { 1768 return NULL; 1769 } 1770 1771 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) 1772 { 1773 resType = RT_ARRAY; 1774 } 1775 else if (token == TOK_STRING) 1776 { 1777 token = peekToken(1, NULL, &line, NULL, status); 1778 1779 if (U_FAILURE(*status)) 1780 { 1781 return NULL; 1782 } 1783 1784 switch (token) 1785 { 1786 case TOK_COMMA: resType = RT_ARRAY; break; 1787 case TOK_OPEN_BRACE: resType = RT_TABLE; break; 1788 case TOK_CLOSE_BRACE: resType = RT_STRING; break; 1789 case TOK_COLON: resType = RT_TABLE; break; 1790 default: 1791 *status = U_INVALID_FORMAT_ERROR; 1792 error(line, "Unexpected token after string, expected ',', '{' or '}'"); 1793 return NULL; 1794 } 1795 } 1796 else 1797 { 1798 *status = U_INVALID_FORMAT_ERROR; 1799 error(line, "Unexpected token after '{'"); 1800 return NULL; 1801 } 1802 1803 /* printf("Type guessed as %s\n", resourceNames[resType]); */ 1804 } else if(resType == RT_TABLE_NO_FALLBACK) { 1805 *status = U_INVALID_FORMAT_ERROR; 1806 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); 1807 return NULL; 1808 } 1809 1810 /* We should now know what we need to parse next, so call the appropriate parser 1811 function and return. */ 1812 parseFunction = gResourceTypes[resType].parseFunction; 1813 if (parseFunction != NULL) { 1814 return parseFunction(tag, startline, comment, status); 1815 } 1816 else { 1817 *status = U_INTERNAL_PROGRAM_ERROR; 1818 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); 1819 } 1820 1821 return NULL; 1822 } 1823 1824 /* parse the top-level resource */ 1825 struct SRBRoot * 1826 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status) 1827 { 1828 struct UString *tokenValue; 1829 struct UString comment; 1830 uint32_t line; 1831 enum EResourceType bundleType; 1832 enum ETokenType token; 1833 1834 initLookahead(buf, status); 1835 1836 inputdir = inputDir; 1837 inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0; 1838 outputdir = outputDir; 1839 outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0; 1840 1841 ustr_init(&comment); 1842 expect(TOK_STRING, &tokenValue, &comment, NULL, status); 1843 1844 bundle = bundle_open(&comment, FALSE, status); 1845 1846 if (bundle == NULL || U_FAILURE(*status)) 1847 { 1848 return NULL; 1849 } 1850 1851 1852 bundle_setlocale(bundle, tokenValue->fChars, status); 1853 /* The following code is to make Empty bundle work no matter with :table specifer or not */ 1854 token = getToken(NULL, NULL, &line, status); 1855 if(token==TOK_COLON) { 1856 *status=U_ZERO_ERROR; 1857 bundleType=parseResourceType(status); 1858 1859 if(isTable(bundleType)) 1860 { 1861 expect(TOK_OPEN_BRACE, NULL, NULL, &line, status); 1862 } 1863 else 1864 { 1865 *status=U_PARSE_ERROR; 1866 error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); 1867 } 1868 } 1869 else 1870 { 1871 /* not a colon */ 1872 if(token==TOK_OPEN_BRACE) 1873 { 1874 *status=U_ZERO_ERROR; 1875 bundleType=RT_TABLE; 1876 } 1877 else 1878 { 1879 /* neither colon nor open brace */ 1880 *status=U_PARSE_ERROR; 1881 bundleType=RT_UNKNOWN; 1882 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); 1883 } 1884 } 1885 1886 if (U_FAILURE(*status)) 1887 { 1888 bundle_close(bundle, status); 1889 return NULL; 1890 } 1891 1892 if(bundleType==RT_TABLE_NO_FALLBACK) { 1893 /* 1894 * Parse a top-level table with the table(nofallback) declaration. 1895 * This is the same as a regular table, but also sets the 1896 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 1897 */ 1898 bundle->noFallback=TRUE; 1899 } 1900 /* top-level tables need not handle special table names like "collations" */ 1901 realParseTable(bundle->fRoot, NULL, line, status); 1902 1903 if(dependencyArray!=NULL){ 1904 table_add(bundle->fRoot, dependencyArray, 0, status); 1905 dependencyArray = NULL; 1906 } 1907 if (U_FAILURE(*status)) 1908 { 1909 bundle_close(bundle, status); 1910 res_close(dependencyArray); 1911 return NULL; 1912 } 1913 1914 if (getToken(NULL, NULL, &line, status) != TOK_EOF) 1915 { 1916 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); 1917 if(isStrict()){ 1918 *status = U_INVALID_FORMAT_ERROR; 1919 return NULL; 1920 } 1921 } 1922 1923 cleanupLookahead(); 1924 ustr_deinit(&comment); 1925 return bundle; 1926 } 1927 1928