1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File parse.cpp 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/26/99 stephen Creation. 15 * 02/25/00 weiv Overhaul to write udata 16 * 5/10/01 Ram removed ustdio dependency 17 * 06/10/2001 Dominic Ludlam <dom (at) recoil.org> Rewritten 18 ******************************************************************************* 19 */ 20 21 #include "ucol_imp.h" 22 #include "parse.h" 23 #include "errmsg.h" 24 #include "uhash.h" 25 #include "cmemory.h" 26 #include "cstring.h" 27 #include "uinvchar.h" 28 #include "read.h" 29 #include "ustr.h" 30 #include "reslist.h" 31 #include "rbt_pars.h" 32 #include "genrb.h" 33 #include "unicode/ustring.h" 34 #include "unicode/uscript.h" 35 #include "unicode/putil.h" 36 #include <stdio.h> 37 38 /* Number of tokens to read ahead of the current stream position */ 39 #define MAX_LOOKAHEAD 3 40 41 #define CR 0x000D 42 #define LF 0x000A 43 #define SPACE 0x0020 44 #define TAB 0x0009 45 #define ESCAPE 0x005C 46 #define HASH 0x0023 47 #define QUOTE 0x0027 48 #define ZERO 0x0030 49 #define STARTCOMMAND 0x005B 50 #define ENDCOMMAND 0x005D 51 #define OPENSQBRACKET 0x005B 52 #define CLOSESQBRACKET 0x005D 53 54 struct Lookahead 55 { 56 enum ETokenType type; 57 struct UString value; 58 struct UString comment; 59 uint32_t line; 60 }; 61 62 /* keep in sync with token defines in read.h */ 63 const char *tokenNames[TOK_TOKEN_COUNT] = 64 { 65 "string", /* A string token, such as "MonthNames" */ 66 "'{'", /* An opening brace character */ 67 "'}'", /* A closing brace character */ 68 "','", /* A comma */ 69 "':'", /* A colon */ 70 71 "<end of file>", /* End of the file has been reached successfully */ 72 "<end of line>" 73 }; 74 75 /* Just to store "TRUE" */ 76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; 77 78 typedef struct { 79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; 80 uint32_t lookaheadPosition; 81 UCHARBUF *buffer; 82 struct SRBRoot *bundle; 83 const char *inputdir; 84 uint32_t inputdirLength; 85 const char *outputdir; 86 uint32_t outputdirLength; 87 UBool makeBinaryCollation; 88 } ParseState; 89 90 static UBool gOmitCollationRules = FALSE; 91 92 typedef struct SResource * 93 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); 94 95 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status); 96 97 /* The nature of the lookahead buffer: 98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides 99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 100 When getToken is called, the current pointer is moved to the next slot and the 101 old slot is filled with the next token from the reader by calling getNextToken. 102 The token values are stored in the slot, which means that token values don't 103 survive a call to getToken, ie. 104 105 UString *value; 106 107 getToken(&value, NULL, status); 108 getToken(NULL, NULL, status); bad - value is now a different string 109 */ 110 static void 111 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status) 112 { 113 static uint32_t initTypeStrings = 0; 114 uint32_t i; 115 116 if (!initTypeStrings) 117 { 118 initTypeStrings = 1; 119 } 120 121 state->lookaheadPosition = 0; 122 state->buffer = buf; 123 124 resetLineNumber(); 125 126 for (i = 0; i < MAX_LOOKAHEAD; i++) 127 { 128 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 129 if (U_FAILURE(*status)) 130 { 131 return; 132 } 133 } 134 135 *status = U_ZERO_ERROR; 136 } 137 138 static void 139 cleanupLookahead(ParseState* state) 140 { 141 uint32_t i; 142 for (i = 0; i <= MAX_LOOKAHEAD; i++) 143 { 144 ustr_deinit(&state->lookahead[i].value); 145 ustr_deinit(&state->lookahead[i].comment); 146 } 147 148 } 149 150 static enum ETokenType 151 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) 152 { 153 enum ETokenType result; 154 uint32_t i; 155 156 result = state->lookahead[state->lookaheadPosition].type; 157 158 if (tokenValue != NULL) 159 { 160 *tokenValue = &state->lookahead[state->lookaheadPosition].value; 161 } 162 163 if (linenumber != NULL) 164 { 165 *linenumber = state->lookahead[state->lookaheadPosition].line; 166 } 167 168 if (comment != NULL) 169 { 170 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 171 } 172 173 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); 174 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); 175 ustr_setlen(&state->lookahead[i].comment, 0, status); 176 ustr_setlen(&state->lookahead[i].value, 0, status); 177 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 178 179 /* printf("getToken, returning %s\n", tokenNames[result]); */ 180 181 return result; 182 } 183 184 static enum ETokenType 185 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) 186 { 187 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); 188 189 if (U_FAILURE(*status)) 190 { 191 return TOK_ERROR; 192 } 193 194 if (lookaheadCount >= MAX_LOOKAHEAD) 195 { 196 *status = U_INTERNAL_PROGRAM_ERROR; 197 return TOK_ERROR; 198 } 199 200 if (tokenValue != NULL) 201 { 202 *tokenValue = &state->lookahead[i].value; 203 } 204 205 if (linenumber != NULL) 206 { 207 *linenumber = state->lookahead[i].line; 208 } 209 210 if(comment != NULL){ 211 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 212 } 213 214 return state->lookahead[i].type; 215 } 216 217 static void 218 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) 219 { 220 uint32_t line; 221 222 enum ETokenType token = getToken(state, tokenValue, comment, &line, status); 223 224 if (linenumber != NULL) 225 { 226 *linenumber = line; 227 } 228 229 if (U_FAILURE(*status)) 230 { 231 return; 232 } 233 234 if (token != expectedToken) 235 { 236 *status = U_INVALID_FORMAT_ERROR; 237 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); 238 } 239 else 240 { 241 *status = U_ZERO_ERROR; 242 } 243 } 244 245 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) 246 { 247 struct UString *tokenValue; 248 char *result; 249 uint32_t count; 250 251 expect(state, TOK_STRING, &tokenValue, comment, line, status); 252 253 if (U_FAILURE(*status)) 254 { 255 return NULL; 256 } 257 258 count = u_strlen(tokenValue->fChars); 259 if(!uprv_isInvariantUString(tokenValue->fChars, count)) { 260 *status = U_INVALID_FORMAT_ERROR; 261 error(*line, "invariant characters required for table keys, binary data, etc."); 262 return NULL; 263 } 264 265 result = static_cast<char *>(uprv_malloc(count+1)); 266 267 if (result == NULL) 268 { 269 *status = U_MEMORY_ALLOCATION_ERROR; 270 return NULL; 271 } 272 273 u_UCharsToChars(tokenValue->fChars, result, count+1); 274 return result; 275 } 276 277 static struct SResource * 278 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) 279 { 280 struct SResource *result = NULL; 281 struct UString *tokenValue; 282 FileStream *file = NULL; 283 char filename[256] = { '\0' }; 284 char cs[128] = { '\0' }; 285 uint32_t line; 286 UBool quoted = FALSE; 287 UCHARBUF *ucbuf=NULL; 288 UChar32 c = 0; 289 const char* cp = NULL; 290 UChar *pTarget = NULL; 291 UChar *target = NULL; 292 UChar *targetLimit = NULL; 293 int32_t size = 0; 294 295 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 296 297 if(isVerbose()){ 298 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 299 } 300 301 if (U_FAILURE(*status)) 302 { 303 return NULL; 304 } 305 /* make the filename including the directory */ 306 if (state->inputdir != NULL) 307 { 308 uprv_strcat(filename, state->inputdir); 309 310 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 311 { 312 uprv_strcat(filename, U_FILE_SEP_STRING); 313 } 314 } 315 316 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 317 318 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 319 320 if (U_FAILURE(*status)) 321 { 322 return NULL; 323 } 324 uprv_strcat(filename, cs); 325 326 if(gOmitCollationRules) { 327 return res_none(); 328 } 329 330 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 331 332 if (U_FAILURE(*status)) { 333 error(line, "An error occured while opening the input file %s\n", filename); 334 return NULL; 335 } 336 337 /* We allocate more space than actually required 338 * since the actual size needed for storing UChars 339 * is not known in UTF-8 byte stream 340 */ 341 size = ucbuf_size(ucbuf) + 1; 342 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); 343 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 344 target = pTarget; 345 targetLimit = pTarget+size; 346 347 /* read the rules into the buffer */ 348 while (target < targetLimit) 349 { 350 c = ucbuf_getc(ucbuf, status); 351 if(c == QUOTE) { 352 quoted = (UBool)!quoted; 353 } 354 /* weiv (06/26/2002): adding the following: 355 * - preserving spaces in commands [...] 356 * - # comments until the end of line 357 */ 358 if (c == STARTCOMMAND && !quoted) 359 { 360 /* preserve commands 361 * closing bracket will be handled by the 362 * append at the end of the loop 363 */ 364 while(c != ENDCOMMAND) { 365 U_APPEND_CHAR32_ONLY(c, target); 366 c = ucbuf_getc(ucbuf, status); 367 } 368 } 369 else if (c == HASH && !quoted) { 370 /* skip comments */ 371 while(c != CR && c != LF) { 372 c = ucbuf_getc(ucbuf, status); 373 } 374 continue; 375 } 376 else if (c == ESCAPE) 377 { 378 c = unescape(ucbuf, status); 379 380 if (c == (UChar32)U_ERR) 381 { 382 uprv_free(pTarget); 383 T_FileStream_close(file); 384 return NULL; 385 } 386 } 387 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) 388 { 389 /* ignore spaces carriage returns 390 * and line feed unless in the form \uXXXX 391 */ 392 continue; 393 } 394 395 /* Append UChar * after dissembling if c > 0xffff*/ 396 if (c != (UChar32)U_EOF) 397 { 398 U_APPEND_CHAR32_ONLY(c, target); 399 } 400 else 401 { 402 break; 403 } 404 } 405 406 /* terminate the string */ 407 if(target < targetLimit){ 408 *target = 0x0000; 409 } 410 411 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); 412 413 414 ucbuf_close(ucbuf); 415 uprv_free(pTarget); 416 T_FileStream_close(file); 417 418 return result; 419 } 420 421 static struct SResource * 422 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) 423 { 424 struct SResource *result = NULL; 425 struct UString *tokenValue; 426 FileStream *file = NULL; 427 char filename[256] = { '\0' }; 428 char cs[128] = { '\0' }; 429 uint32_t line; 430 UCHARBUF *ucbuf=NULL; 431 const char* cp = NULL; 432 UChar *pTarget = NULL; 433 const UChar *pSource = NULL; 434 int32_t size = 0; 435 436 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 437 438 if(isVerbose()){ 439 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 440 } 441 442 if (U_FAILURE(*status)) 443 { 444 return NULL; 445 } 446 /* make the filename including the directory */ 447 if (state->inputdir != NULL) 448 { 449 uprv_strcat(filename, state->inputdir); 450 451 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 452 { 453 uprv_strcat(filename, U_FILE_SEP_STRING); 454 } 455 } 456 457 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 458 459 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 460 461 if (U_FAILURE(*status)) 462 { 463 return NULL; 464 } 465 uprv_strcat(filename, cs); 466 467 468 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 469 470 if (U_FAILURE(*status)) { 471 error(line, "An error occured while opening the input file %s\n", filename); 472 return NULL; 473 } 474 475 /* We allocate more space than actually required 476 * since the actual size needed for storing UChars 477 * is not known in UTF-8 byte stream 478 */ 479 pSource = ucbuf_getBuffer(ucbuf, &size, status); 480 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); 481 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 482 483 #if !UCONFIG_NO_TRANSLITERATION 484 size = utrans_stripRules(pSource, size, pTarget, status); 485 #else 486 size = 0; 487 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); 488 #endif 489 result = string_open(state->bundle, tag, pTarget, size, NULL, status); 490 491 ucbuf_close(ucbuf); 492 uprv_free(pTarget); 493 T_FileStream_close(file); 494 495 return result; 496 } 497 static struct SResource* dependencyArray = NULL; 498 499 static struct SResource * 500 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 501 { 502 struct SResource *result = NULL; 503 struct SResource *elem = NULL; 504 struct UString *tokenValue; 505 uint32_t line; 506 char filename[256] = { '\0' }; 507 char cs[128] = { '\0' }; 508 509 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 510 511 if(isVerbose()){ 512 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 513 } 514 515 if (U_FAILURE(*status)) 516 { 517 return NULL; 518 } 519 /* make the filename including the directory */ 520 if (state->outputdir != NULL) 521 { 522 uprv_strcat(filename, state->outputdir); 523 524 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR) 525 { 526 uprv_strcat(filename, U_FILE_SEP_STRING); 527 } 528 } 529 530 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 531 532 if (U_FAILURE(*status)) 533 { 534 return NULL; 535 } 536 uprv_strcat(filename, cs); 537 if(!T_FileStream_file_exists(filename)){ 538 if(isStrict()){ 539 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 540 }else{ 541 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 542 } 543 } 544 if(dependencyArray==NULL){ 545 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status); 546 } 547 if(tag!=NULL){ 548 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 549 } 550 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); 551 552 array_add(dependencyArray, elem, status); 553 554 if (U_FAILURE(*status)) 555 { 556 return NULL; 557 } 558 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 559 return result; 560 } 561 static struct SResource * 562 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 563 { 564 struct UString *tokenValue; 565 struct SResource *result = NULL; 566 567 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) 568 { 569 return parseUCARules(tag, startline, status); 570 }*/ 571 if(isVerbose()){ 572 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 573 } 574 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 575 576 if (U_SUCCESS(*status)) 577 { 578 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 579 doesn't survive expect either) */ 580 581 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 582 if(U_SUCCESS(*status) && result) { 583 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 584 585 if (U_FAILURE(*status)) 586 { 587 res_close(result); 588 return NULL; 589 } 590 } 591 } 592 593 return result; 594 } 595 596 static struct SResource * 597 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 598 { 599 struct UString *tokenValue; 600 struct SResource *result = NULL; 601 602 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 603 604 if(isVerbose()){ 605 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 606 } 607 608 if (U_SUCCESS(*status)) 609 { 610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 611 doesn't survive expect either) */ 612 613 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 614 615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 616 617 if (U_FAILURE(*status)) 618 { 619 res_close(result); 620 return NULL; 621 } 622 } 623 624 return result; 625 } 626 627 typedef struct{ 628 const char* inputDir; 629 const char* outputDir; 630 } GenrbData; 631 632 static struct SResource* resLookup(struct SResource* res, const char* key){ 633 struct SResource *current = NULL; 634 struct SResTable *list; 635 if (res == res_none()) { 636 return NULL; 637 } 638 639 list = &(res->u.fTable); 640 641 current = list->fFirst; 642 while (current != NULL) { 643 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { 644 return current; 645 } 646 current = current->fNext; 647 } 648 return NULL; 649 } 650 651 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){ 652 struct SRBRoot *data = NULL; 653 UCHARBUF *ucbuf = NULL; 654 GenrbData* genrbdata = (GenrbData*) context; 655 int localeLength = strlen(locale); 656 char* filename = (char*)uprv_malloc(localeLength+5); 657 char *inputDirBuf = NULL; 658 char *openFileName = NULL; 659 const char* cp = ""; 660 UChar* urules = NULL; 661 int32_t urulesLength = 0; 662 int32_t i = 0; 663 int32_t dirlen = 0; 664 int32_t filelen = 0; 665 struct SResource* root; 666 struct SResource* collations; 667 struct SResource* collation; 668 struct SResource* sequence; 669 670 memcpy(filename, locale, localeLength); 671 for(i = 0; i < localeLength; i++){ 672 if(filename[i] == '-'){ 673 filename[i] = '_'; 674 } 675 } 676 filename[localeLength] = '.'; 677 filename[localeLength+1] = 't'; 678 filename[localeLength+2] = 'x'; 679 filename[localeLength+3] = 't'; 680 filename[localeLength+4] = 0; 681 682 683 if (status==NULL || U_FAILURE(*status)) { 684 return NULL; 685 } 686 if(filename==NULL){ 687 *status=U_ILLEGAL_ARGUMENT_ERROR; 688 return NULL; 689 }else{ 690 filelen = (int32_t)uprv_strlen(filename); 691 } 692 if(genrbdata->inputDir == NULL) { 693 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); 694 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); 695 openFileName[0] = '\0'; 696 if (filenameBegin != NULL) { 697 /* 698 * When a filename ../../../data/root.txt is specified, 699 * we presume that the input directory is ../../../data 700 * This is very important when the resource file includes 701 * another file, like UCARules.txt or thaidict.brk. 702 */ 703 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); 704 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize); 705 706 /* test for NULL */ 707 if(inputDirBuf == NULL) { 708 *status = U_MEMORY_ALLOCATION_ERROR; 709 goto finish; 710 } 711 712 inputDirBuf[filenameSize - 1] = 0; 713 genrbdata->inputDir = inputDirBuf; 714 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); 715 } 716 }else{ 717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); 718 719 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 720 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); 721 722 /* test for NULL */ 723 if(openFileName == NULL) { 724 *status = U_MEMORY_ALLOCATION_ERROR; 725 goto finish; 726 } 727 728 openFileName[0] = '\0'; 729 /* 730 * append the input dir to openFileName if the first char in 731 * filename is not file seperation char and the last char input directory is not '.'. 732 * This is to support : 733 * genrb -s. /home/icu/data 734 * genrb -s. icu/data 735 * The user cannot mix notations like 736 * genrb -s. /icu/data --- the absolute path specified. -s redundant 737 * user should use 738 * genrb -s. icu/data --- start from CWD and look in icu/data dir 739 */ 740 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){ 741 uprv_strcpy(openFileName, genrbdata->inputDir); 742 openFileName[dirlen] = U_FILE_SEP_CHAR; 743 } 744 openFileName[dirlen + 1] = '\0'; 745 } else { 746 openFileName = (char *) uprv_malloc(dirlen + filelen + 1); 747 748 /* test for NULL */ 749 if(openFileName == NULL) { 750 *status = U_MEMORY_ALLOCATION_ERROR; 751 goto finish; 752 } 753 754 uprv_strcpy(openFileName, genrbdata->inputDir); 755 756 } 757 } 758 uprv_strcat(openFileName, filename); 759 /* printf("%s\n", openFileName); */ 760 *status = U_ZERO_ERROR; 761 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status); 762 763 if(*status == U_FILE_ACCESS_ERROR) { 764 765 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName); 766 goto finish; 767 } 768 if (ucbuf == NULL || U_FAILURE(*status)) { 769 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status)); 770 goto finish; 771 } 772 773 /* Parse the data into an SRBRoot */ 774 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status); 775 776 root = data->fRoot; 777 collations = resLookup(root, "collations"); 778 if (collations != NULL) { 779 collation = resLookup(collations, type); 780 if (collation != NULL) { 781 sequence = resLookup(collation, "Sequence"); 782 if (sequence != NULL) { 783 urules = sequence->u.fString.fChars; 784 urulesLength = sequence->u.fString.fLength; 785 *pLength = urulesLength; 786 } 787 } 788 } 789 790 finish: 791 if (inputDirBuf != NULL) { 792 uprv_free(inputDirBuf); 793 } 794 795 if (openFileName != NULL) { 796 uprv_free(openFileName); 797 } 798 799 if(ucbuf) { 800 ucbuf_close(ucbuf); 801 } 802 803 return urules; 804 } 805 806 // Quick-and-dirty escaping function. 807 // Assumes that we are on an ASCII-based platform. 808 static void 809 escape(const UChar *s, char *buffer) { 810 int32_t length = u_strlen(s); 811 int32_t i = 0; 812 for (;;) { 813 UChar32 c; 814 U16_NEXT(s, i, length, c); 815 if (c == 0) { 816 *buffer = 0; 817 return; 818 } else if (0x20 <= c && c <= 0x7e) { 819 // printable ASCII 820 *buffer++ = (char)c; // assumes ASCII-based platform 821 } else { 822 buffer += sprintf(buffer, "\\u%04X", (int)c); 823 } 824 } 825 } 826 827 static struct SResource * 828 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status) 829 { 830 struct SResource *member = NULL; 831 struct UString *tokenValue; 832 struct UString comment; 833 enum ETokenType token; 834 char subtag[1024]; 835 UVersionInfo version; 836 uint32_t line; 837 GenrbData genrbdata; 838 /* '{' . (name resource)* '}' */ 839 version[0]=0; version[1]=0; version[2]=0; version[3]=0; 840 841 for (;;) 842 { 843 ustr_init(&comment); 844 token = getToken(state, &tokenValue, &comment, &line, status); 845 846 if (token == TOK_CLOSE_BRACE) 847 { 848 return result; 849 } 850 851 if (token != TOK_STRING) 852 { 853 res_close(result); 854 *status = U_INVALID_FORMAT_ERROR; 855 856 if (token == TOK_EOF) 857 { 858 error(startline, "unterminated table"); 859 } 860 else 861 { 862 error(line, "Unexpected token %s", tokenNames[token]); 863 } 864 865 return NULL; 866 } 867 868 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 869 870 if (U_FAILURE(*status)) 871 { 872 res_close(result); 873 return NULL; 874 } 875 876 member = parseResource(state, subtag, NULL, status); 877 878 if (U_FAILURE(*status)) 879 { 880 res_close(result); 881 return NULL; 882 } 883 884 if (uprv_strcmp(subtag, "Version") == 0) 885 { 886 char ver[40]; 887 int32_t length = member->u.fString.fLength; 888 889 if (length >= (int32_t) sizeof(ver)) 890 { 891 length = (int32_t) sizeof(ver) - 1; 892 } 893 894 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */ 895 u_versionFromString(version, ver); 896 897 table_add(result, member, line, status); 898 899 } 900 else if (uprv_strcmp(subtag, "Override") == 0) 901 { 902 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0); 903 table_add(result, member, line, status); 904 905 } 906 else if(uprv_strcmp(subtag, "%%CollationBin")==0) 907 { 908 /* discard duplicate %%CollationBin if any*/ 909 } 910 else if (uprv_strcmp(subtag, "Sequence") == 0) 911 { 912 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO 913 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); 914 #else 915 if(state->makeBinaryCollation) { 916 917 /* do the collation elements */ 918 int32_t len = 0; 919 uint8_t *data = NULL; 920 UCollator *coll = NULL; 921 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)]; 922 int32_t reorderCodeCount; 923 int32_t reorderCodeIndex; 924 UParseError parseError; 925 926 genrbdata.inputDir = state->inputdir; 927 genrbdata.outputDir = state->outputdir; 928 929 UErrorCode intStatus = U_ZERO_ERROR; 930 uprv_memset(&parseError, 0, sizeof(parseError)); 931 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength, 932 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus); 933 934 if (U_SUCCESS(intStatus) && coll != NULL) 935 { 936 len = ucol_cloneBinary(coll, NULL, 0, &intStatus); 937 data = (uint8_t *)uprv_malloc(len); 938 intStatus = U_ZERO_ERROR; 939 len = ucol_cloneBinary(coll, data, len, &intStatus); 940 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/ 941 942 /* tailoring rules version */ 943 /* This is wrong! */ 944 /*coll->dataInfo.dataVersion[1] = version[0];*/ 945 /* Copy tailoring version. Builder version already */ 946 /* set in ucol_openRules */ 947 ((UCATableHeader *)data)->version[1] = version[0]; 948 ((UCATableHeader *)data)->version[2] = version[1]; 949 ((UCATableHeader *)data)->version[3] = version[2]; 950 951 if (U_SUCCESS(intStatus) && data != NULL) 952 { 953 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status); 954 table_add(result, collationBin, line, status); 955 uprv_free(data); 956 957 reorderCodeCount = ucol_getReorderCodes( 958 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus); 959 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) { 960 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status); 961 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) { 962 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status); 963 } 964 table_add(result, reorderCodeRes, line, status); 965 } 966 } 967 else 968 { 969 warning(line, "could not obtain rules from collator"); 970 if(isStrict()){ 971 *status = U_INVALID_FORMAT_ERROR; 972 return NULL; 973 } 974 } 975 976 ucol_close(coll); 977 } 978 else 979 { 980 if(intStatus == U_FILE_ACCESS_ERROR) { 981 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly."); 982 *status = intStatus; 983 return NULL; 984 } 985 char preBuffer[100], postBuffer[100]; 986 escape(parseError.preContext, preBuffer); 987 escape(parseError.postContext, postBuffer); 988 warning(line, 989 "%%%%CollationBin could not be constructed from CollationElements\n" 990 " check context, check that the FractionalUCA.txt UCA version " 991 "matches the current UCD version\n" 992 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }", 993 u_errorName(intStatus), 994 parseError.line, 995 parseError.offset, 996 preBuffer, 997 postBuffer); 998 if(isStrict()){ 999 *status = intStatus; 1000 return NULL; 1001 } 1002 } 1003 } else { 1004 if(isVerbose()) { 1005 printf("Not building Collation binary\n"); 1006 } 1007 } 1008 #endif 1009 /* in order to achieve smaller data files, we can direct genrb */ 1010 /* to omit collation rules */ 1011 if(gOmitCollationRules) { 1012 bundle_closeString(state->bundle, member); 1013 } else { 1014 table_add(result, member, line, status); 1015 } 1016 } 1017 if (U_FAILURE(*status)) 1018 { 1019 res_close(result); 1020 return NULL; 1021 } 1022 } 1023 1024 // Reached the end without a TOK_CLOSE_BRACE. Should be an error. 1025 *status = U_INTERNAL_PROGRAM_ERROR; 1026 return NULL; 1027 } 1028 1029 static struct SResource * 1030 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) 1031 { 1032 struct SResource *result = NULL; 1033 struct SResource *member = NULL; 1034 struct SResource *collationRes = NULL; 1035 struct UString *tokenValue; 1036 struct UString comment; 1037 enum ETokenType token; 1038 char subtag[1024], typeKeyword[1024]; 1039 uint32_t line; 1040 1041 result = table_open(state->bundle, tag, NULL, status); 1042 1043 if (result == NULL || U_FAILURE(*status)) 1044 { 1045 return NULL; 1046 } 1047 if(isVerbose()){ 1048 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1049 } 1050 if(!newCollation) { 1051 return addCollation(state, result, startline, status); 1052 } 1053 else { 1054 for(;;) { 1055 ustr_init(&comment); 1056 token = getToken(state, &tokenValue, &comment, &line, status); 1057 1058 if (token == TOK_CLOSE_BRACE) 1059 { 1060 return result; 1061 } 1062 1063 if (token != TOK_STRING) 1064 { 1065 res_close(result); 1066 *status = U_INVALID_FORMAT_ERROR; 1067 1068 if (token == TOK_EOF) 1069 { 1070 error(startline, "unterminated table"); 1071 } 1072 else 1073 { 1074 error(line, "Unexpected token %s", tokenNames[token]); 1075 } 1076 1077 return NULL; 1078 } 1079 1080 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1081 1082 if (U_FAILURE(*status)) 1083 { 1084 res_close(result); 1085 return NULL; 1086 } 1087 1088 if (uprv_strcmp(subtag, "default") == 0) 1089 { 1090 member = parseResource(state, subtag, NULL, status); 1091 1092 if (U_FAILURE(*status)) 1093 { 1094 res_close(result); 1095 return NULL; 1096 } 1097 1098 table_add(result, member, line, status); 1099 } 1100 else 1101 { 1102 token = peekToken(state, 0, &tokenValue, &line, &comment, status); 1103 /* this probably needs to be refactored or recursively use the parser */ 1104 /* first we assume that our collation table won't have the explicit type */ 1105 /* then, we cannot handle aliases */ 1106 if(token == TOK_OPEN_BRACE) { 1107 token = getToken(state, &tokenValue, &comment, &line, status); 1108 collationRes = table_open(state->bundle, subtag, NULL, status); 1109 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */ 1110 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) { 1111 table_add(result, collationRes, startline, status); 1112 } 1113 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ 1114 /* we could have a table too */ 1115 token = peekToken(state, 1, &tokenValue, &line, &comment, status); 1116 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); 1117 if(uprv_strcmp(typeKeyword, "alias") == 0) { 1118 member = parseResource(state, subtag, NULL, status); 1119 if (U_FAILURE(*status)) 1120 { 1121 res_close(result); 1122 return NULL; 1123 } 1124 1125 table_add(result, member, line, status); 1126 } else { 1127 res_close(result); 1128 *status = U_INVALID_FORMAT_ERROR; 1129 return NULL; 1130 } 1131 } else { 1132 res_close(result); 1133 *status = U_INVALID_FORMAT_ERROR; 1134 return NULL; 1135 } 1136 } 1137 1138 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 1139 1140 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 1141 1142 if (U_FAILURE(*status)) 1143 { 1144 res_close(result); 1145 return NULL; 1146 } 1147 } 1148 } 1149 } 1150 1151 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, 1152 if this weren't special-cased, wouldn't be set until the entire file had been processed. */ 1153 static struct SResource * 1154 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status) 1155 { 1156 struct SResource *member = NULL; 1157 struct UString *tokenValue=NULL; 1158 struct UString comment; 1159 enum ETokenType token; 1160 char subtag[1024]; 1161 uint32_t line; 1162 UBool readToken = FALSE; 1163 1164 /* '{' . (name resource)* '}' */ 1165 1166 if(isVerbose()){ 1167 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1168 } 1169 for (;;) 1170 { 1171 ustr_init(&comment); 1172 token = getToken(state, &tokenValue, &comment, &line, status); 1173 1174 if (token == TOK_CLOSE_BRACE) 1175 { 1176 if (!readToken) { 1177 warning(startline, "Encountered empty table"); 1178 } 1179 return table; 1180 } 1181 1182 if (token != TOK_STRING) 1183 { 1184 *status = U_INVALID_FORMAT_ERROR; 1185 1186 if (token == TOK_EOF) 1187 { 1188 error(startline, "unterminated table"); 1189 } 1190 else 1191 { 1192 error(line, "unexpected token %s", tokenNames[token]); 1193 } 1194 1195 return NULL; 1196 } 1197 1198 if(uprv_isInvariantUString(tokenValue->fChars, -1)) { 1199 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1200 } else { 1201 *status = U_INVALID_FORMAT_ERROR; 1202 error(line, "invariant characters required for table keys"); 1203 return NULL; 1204 } 1205 1206 if (U_FAILURE(*status)) 1207 { 1208 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); 1209 return NULL; 1210 } 1211 1212 member = parseResource(state, subtag, &comment, status); 1213 1214 if (member == NULL || U_FAILURE(*status)) 1215 { 1216 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); 1217 return NULL; 1218 } 1219 1220 table_add(table, member, line, status); 1221 1222 if (U_FAILURE(*status)) 1223 { 1224 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); 1225 return NULL; 1226 } 1227 readToken = TRUE; 1228 ustr_deinit(&comment); 1229 } 1230 1231 /* not reached */ 1232 /* A compiler warning will appear if all paths don't contain a return statement. */ 1233 /* *status = U_INTERNAL_PROGRAM_ERROR; 1234 return NULL;*/ 1235 } 1236 1237 static struct SResource * 1238 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1239 { 1240 struct SResource *result; 1241 1242 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) 1243 { 1244 return parseCollationElements(state, tag, startline, FALSE, status); 1245 } 1246 if (tag != NULL && uprv_strcmp(tag, "collations") == 0) 1247 { 1248 return parseCollationElements(state, tag, startline, TRUE, status); 1249 } 1250 if(isVerbose()){ 1251 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1252 } 1253 1254 result = table_open(state->bundle, tag, comment, status); 1255 1256 if (result == NULL || U_FAILURE(*status)) 1257 { 1258 return NULL; 1259 } 1260 return realParseTable(state, result, tag, startline, status); 1261 } 1262 1263 static struct SResource * 1264 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1265 { 1266 struct SResource *result = NULL; 1267 struct SResource *member = NULL; 1268 struct UString *tokenValue; 1269 struct UString memberComments; 1270 enum ETokenType token; 1271 UBool readToken = FALSE; 1272 1273 result = array_open(state->bundle, tag, comment, status); 1274 1275 if (result == NULL || U_FAILURE(*status)) 1276 { 1277 return NULL; 1278 } 1279 if(isVerbose()){ 1280 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1281 } 1282 1283 ustr_init(&memberComments); 1284 1285 /* '{' . resource [','] '}' */ 1286 for (;;) 1287 { 1288 /* reset length */ 1289 ustr_setlen(&memberComments, 0, status); 1290 1291 /* check for end of array, but don't consume next token unless it really is the end */ 1292 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status); 1293 1294 1295 if (token == TOK_CLOSE_BRACE) 1296 { 1297 getToken(state, NULL, NULL, NULL, status); 1298 if (!readToken) { 1299 warning(startline, "Encountered empty array"); 1300 } 1301 break; 1302 } 1303 1304 if (token == TOK_EOF) 1305 { 1306 res_close(result); 1307 *status = U_INVALID_FORMAT_ERROR; 1308 error(startline, "unterminated array"); 1309 return NULL; 1310 } 1311 1312 /* string arrays are a special case */ 1313 if (token == TOK_STRING) 1314 { 1315 getToken(state, &tokenValue, &memberComments, NULL, status); 1316 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); 1317 } 1318 else 1319 { 1320 member = parseResource(state, NULL, &memberComments, status); 1321 } 1322 1323 if (member == NULL || U_FAILURE(*status)) 1324 { 1325 res_close(result); 1326 return NULL; 1327 } 1328 1329 array_add(result, member, status); 1330 1331 if (U_FAILURE(*status)) 1332 { 1333 res_close(result); 1334 return NULL; 1335 } 1336 1337 /* eat optional comma if present */ 1338 token = peekToken(state, 0, NULL, NULL, NULL, status); 1339 1340 if (token == TOK_COMMA) 1341 { 1342 getToken(state, NULL, NULL, NULL, status); 1343 } 1344 1345 if (U_FAILURE(*status)) 1346 { 1347 res_close(result); 1348 return NULL; 1349 } 1350 readToken = TRUE; 1351 } 1352 1353 ustr_deinit(&memberComments); 1354 return result; 1355 } 1356 1357 static struct SResource * 1358 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1359 { 1360 struct SResource *result = NULL; 1361 enum ETokenType token; 1362 char *string; 1363 int32_t value; 1364 UBool readToken = FALSE; 1365 char *stopstring; 1366 uint32_t len; 1367 struct UString memberComments; 1368 1369 result = intvector_open(state->bundle, tag, comment, status); 1370 1371 if (result == NULL || U_FAILURE(*status)) 1372 { 1373 return NULL; 1374 } 1375 1376 if(isVerbose()){ 1377 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1378 } 1379 ustr_init(&memberComments); 1380 /* '{' . string [','] '}' */ 1381 for (;;) 1382 { 1383 ustr_setlen(&memberComments, 0, status); 1384 1385 /* check for end of array, but don't consume next token unless it really is the end */ 1386 token = peekToken(state, 0, NULL, NULL,&memberComments, status); 1387 1388 if (token == TOK_CLOSE_BRACE) 1389 { 1390 /* it's the end, consume the close brace */ 1391 getToken(state, NULL, NULL, NULL, status); 1392 if (!readToken) { 1393 warning(startline, "Encountered empty int vector"); 1394 } 1395 ustr_deinit(&memberComments); 1396 return result; 1397 } 1398 1399 string = getInvariantString(state, NULL, NULL, status); 1400 1401 if (U_FAILURE(*status)) 1402 { 1403 res_close(result); 1404 return NULL; 1405 } 1406 1407 /* For handling illegal char in the Intvector */ 1408 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ 1409 len=(uint32_t)(stopstring-string); 1410 1411 if(len==uprv_strlen(string)) 1412 { 1413 intvector_add(result, value, status); 1414 uprv_free(string); 1415 token = peekToken(state, 0, NULL, NULL, NULL, status); 1416 } 1417 else 1418 { 1419 uprv_free(string); 1420 *status=U_INVALID_CHAR_FOUND; 1421 } 1422 1423 if (U_FAILURE(*status)) 1424 { 1425 res_close(result); 1426 return NULL; 1427 } 1428 1429 /* the comma is optional (even though it is required to prevent the reader from concatenating 1430 consecutive entries) so that a missing comma on the last entry isn't an error */ 1431 if (token == TOK_COMMA) 1432 { 1433 getToken(state, NULL, NULL, NULL, status); 1434 } 1435 readToken = TRUE; 1436 } 1437 1438 /* not reached */ 1439 /* A compiler warning will appear if all paths don't contain a return statement. */ 1440 /* intvector_close(result, status); 1441 *status = U_INTERNAL_PROGRAM_ERROR; 1442 return NULL;*/ 1443 } 1444 1445 static struct SResource * 1446 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1447 { 1448 struct SResource *result = NULL; 1449 uint8_t *value; 1450 char *string; 1451 char toConv[3] = {'\0', '\0', '\0'}; 1452 uint32_t count; 1453 uint32_t i; 1454 uint32_t line; 1455 char *stopstring; 1456 uint32_t len; 1457 1458 string = getInvariantString(state, &line, NULL, status); 1459 1460 if (string == NULL || U_FAILURE(*status)) 1461 { 1462 return NULL; 1463 } 1464 1465 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1466 1467 if (U_FAILURE(*status)) 1468 { 1469 uprv_free(string); 1470 return NULL; 1471 } 1472 1473 if(isVerbose()){ 1474 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1475 } 1476 1477 count = (uint32_t)uprv_strlen(string); 1478 if (count > 0){ 1479 if((count % 2)==0){ 1480 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count)); 1481 1482 if (value == NULL) 1483 { 1484 uprv_free(string); 1485 *status = U_MEMORY_ALLOCATION_ERROR; 1486 return NULL; 1487 } 1488 1489 for (i = 0; i < count; i += 2) 1490 { 1491 toConv[0] = string[i]; 1492 toConv[1] = string[i + 1]; 1493 1494 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); 1495 len=(uint32_t)(stopstring-toConv); 1496 1497 if(len!=uprv_strlen(toConv)) 1498 { 1499 uprv_free(string); 1500 *status=U_INVALID_CHAR_FOUND; 1501 return NULL; 1502 } 1503 } 1504 1505 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status); 1506 1507 uprv_free(value); 1508 } 1509 else 1510 { 1511 *status = U_INVALID_CHAR_FOUND; 1512 uprv_free(string); 1513 error(line, "Encountered invalid binary string"); 1514 return NULL; 1515 } 1516 } 1517 else 1518 { 1519 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status); 1520 warning(startline, "Encountered empty binary tag"); 1521 } 1522 uprv_free(string); 1523 1524 return result; 1525 } 1526 1527 static struct SResource * 1528 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1529 { 1530 struct SResource *result = NULL; 1531 int32_t value; 1532 char *string; 1533 char *stopstring; 1534 uint32_t len; 1535 1536 string = getInvariantString(state, NULL, NULL, status); 1537 1538 if (string == NULL || U_FAILURE(*status)) 1539 { 1540 return NULL; 1541 } 1542 1543 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1544 1545 if (U_FAILURE(*status)) 1546 { 1547 uprv_free(string); 1548 return NULL; 1549 } 1550 1551 if(isVerbose()){ 1552 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1553 } 1554 1555 if (uprv_strlen(string) <= 0) 1556 { 1557 warning(startline, "Encountered empty integer. Default value is 0."); 1558 } 1559 1560 /* Allow integer support for hexdecimal, octal digit and decimal*/ 1561 /* and handle illegal char in the integer*/ 1562 value = uprv_strtoul(string, &stopstring, 0); 1563 len=(uint32_t)(stopstring-string); 1564 if(len==uprv_strlen(string)) 1565 { 1566 result = int_open(state->bundle, tag, value, comment, status); 1567 } 1568 else 1569 { 1570 *status=U_INVALID_CHAR_FOUND; 1571 } 1572 uprv_free(string); 1573 1574 return result; 1575 } 1576 1577 static struct SResource * 1578 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1579 { 1580 struct SResource *result; 1581 FileStream *file; 1582 int32_t len; 1583 uint8_t *data; 1584 char *filename; 1585 uint32_t line; 1586 char *fullname = NULL; 1587 filename = getInvariantString(state, &line, NULL, status); 1588 1589 if (U_FAILURE(*status)) 1590 { 1591 return NULL; 1592 } 1593 1594 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1595 1596 if (U_FAILURE(*status)) 1597 { 1598 uprv_free(filename); 1599 return NULL; 1600 } 1601 1602 if(isVerbose()){ 1603 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1604 } 1605 1606 /* Open the input file for reading */ 1607 if (state->inputdir == NULL) 1608 { 1609 #if 1 1610 /* 1611 * Always save file file name, even if there's 1612 * no input directory specified. MIGHT BREAK SOMETHING 1613 */ 1614 int32_t filenameLength = uprv_strlen(filename); 1615 1616 fullname = (char *) uprv_malloc(filenameLength + 1); 1617 uprv_strcpy(fullname, filename); 1618 #endif 1619 1620 file = T_FileStream_open(filename, "rb"); 1621 } 1622 else 1623 { 1624 1625 int32_t count = (int32_t)uprv_strlen(filename); 1626 1627 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 1628 { 1629 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); 1630 1631 /* test for NULL */ 1632 if(fullname == NULL) 1633 { 1634 *status = U_MEMORY_ALLOCATION_ERROR; 1635 return NULL; 1636 } 1637 1638 uprv_strcpy(fullname, state->inputdir); 1639 1640 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; 1641 fullname[state->inputdirLength + 1] = '\0'; 1642 1643 uprv_strcat(fullname, filename); 1644 } 1645 else 1646 { 1647 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1); 1648 1649 /* test for NULL */ 1650 if(fullname == NULL) 1651 { 1652 *status = U_MEMORY_ALLOCATION_ERROR; 1653 return NULL; 1654 } 1655 1656 uprv_strcpy(fullname, state->inputdir); 1657 uprv_strcat(fullname, filename); 1658 } 1659 1660 file = T_FileStream_open(fullname, "rb"); 1661 1662 } 1663 1664 if (file == NULL) 1665 { 1666 error(line, "couldn't open input file %s", filename); 1667 *status = U_FILE_ACCESS_ERROR; 1668 return NULL; 1669 } 1670 1671 len = T_FileStream_size(file); 1672 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t)); 1673 /* test for NULL */ 1674 if(data == NULL) 1675 { 1676 *status = U_MEMORY_ALLOCATION_ERROR; 1677 T_FileStream_close (file); 1678 return NULL; 1679 } 1680 1681 /* int32_t numRead = */ T_FileStream_read (file, data, len); 1682 T_FileStream_close (file); 1683 1684 result = bin_open(state->bundle, tag, len, data, fullname, comment, status); 1685 1686 uprv_free(data); 1687 uprv_free(filename); 1688 uprv_free(fullname); 1689 1690 return result; 1691 } 1692 1693 static struct SResource * 1694 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1695 { 1696 struct SResource *result; 1697 int32_t len=0; 1698 char *filename; 1699 uint32_t line; 1700 UChar *pTarget = NULL; 1701 1702 UCHARBUF *ucbuf; 1703 char *fullname = NULL; 1704 int32_t count = 0; 1705 const char* cp = NULL; 1706 const UChar* uBuffer = NULL; 1707 1708 filename = getInvariantString(state, &line, NULL, status); 1709 count = (int32_t)uprv_strlen(filename); 1710 1711 if (U_FAILURE(*status)) 1712 { 1713 return NULL; 1714 } 1715 1716 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1717 1718 if (U_FAILURE(*status)) 1719 { 1720 uprv_free(filename); 1721 return NULL; 1722 } 1723 1724 if(isVerbose()){ 1725 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1726 } 1727 1728 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); 1729 /* test for NULL */ 1730 if(fullname == NULL) 1731 { 1732 *status = U_MEMORY_ALLOCATION_ERROR; 1733 uprv_free(filename); 1734 return NULL; 1735 } 1736 1737 if(state->inputdir!=NULL){ 1738 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 1739 { 1740 1741 uprv_strcpy(fullname, state->inputdir); 1742 1743 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; 1744 fullname[state->inputdirLength + 1] = '\0'; 1745 1746 uprv_strcat(fullname, filename); 1747 } 1748 else 1749 { 1750 uprv_strcpy(fullname, state->inputdir); 1751 uprv_strcat(fullname, filename); 1752 } 1753 }else{ 1754 uprv_strcpy(fullname,filename); 1755 } 1756 1757 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); 1758 1759 if (U_FAILURE(*status)) { 1760 error(line, "couldn't open input file %s\n", filename); 1761 return NULL; 1762 } 1763 1764 uBuffer = ucbuf_getBuffer(ucbuf,&len,status); 1765 result = string_open(state->bundle, tag, uBuffer, len, comment, status); 1766 1767 ucbuf_close(ucbuf); 1768 1769 uprv_free(pTarget); 1770 1771 uprv_free(filename); 1772 uprv_free(fullname); 1773 1774 return result; 1775 } 1776 1777 1778 1779 1780 1781 U_STRING_DECL(k_type_string, "string", 6); 1782 U_STRING_DECL(k_type_binary, "binary", 6); 1783 U_STRING_DECL(k_type_bin, "bin", 3); 1784 U_STRING_DECL(k_type_table, "table", 5); 1785 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); 1786 U_STRING_DECL(k_type_int, "int", 3); 1787 U_STRING_DECL(k_type_integer, "integer", 7); 1788 U_STRING_DECL(k_type_array, "array", 5); 1789 U_STRING_DECL(k_type_alias, "alias", 5); 1790 U_STRING_DECL(k_type_intvector, "intvector", 9); 1791 U_STRING_DECL(k_type_import, "import", 6); 1792 U_STRING_DECL(k_type_include, "include", 7); 1793 1794 /* Various non-standard processing plugins that create one or more special resources. */ 1795 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1796 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); 1797 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); 1798 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); 1799 1800 typedef enum EResourceType 1801 { 1802 RT_UNKNOWN, 1803 RT_STRING, 1804 RT_BINARY, 1805 RT_TABLE, 1806 RT_TABLE_NO_FALLBACK, 1807 RT_INTEGER, 1808 RT_ARRAY, 1809 RT_ALIAS, 1810 RT_INTVECTOR, 1811 RT_IMPORT, 1812 RT_INCLUDE, 1813 RT_PROCESS_UCA_RULES, 1814 RT_PROCESS_COLLATION, 1815 RT_PROCESS_TRANSLITERATOR, 1816 RT_PROCESS_DEPENDENCY, 1817 RT_RESERVED 1818 } EResourceType; 1819 1820 static struct { 1821 const char *nameChars; /* only used for debugging */ 1822 const UChar *nameUChars; 1823 ParseResourceFunction *parseFunction; 1824 } gResourceTypes[] = { 1825 {"Unknown", NULL, NULL}, 1826 {"string", k_type_string, parseString}, 1827 {"binary", k_type_binary, parseBinary}, 1828 {"table", k_type_table, parseTable}, 1829 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ 1830 {"integer", k_type_integer, parseInteger}, 1831 {"array", k_type_array, parseArray}, 1832 {"alias", k_type_alias, parseAlias}, 1833 {"intvector", k_type_intvector, parseIntVector}, 1834 {"import", k_type_import, parseImport}, 1835 {"include", k_type_include, parseInclude}, 1836 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, 1837 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, 1838 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, 1839 {"process(dependency)", k_type_plugin_dependency, parseDependency}, 1840 {"reserved", NULL, NULL} 1841 }; 1842 1843 void initParser(UBool omitCollationRules) 1844 { 1845 U_STRING_INIT(k_type_string, "string", 6); 1846 U_STRING_INIT(k_type_binary, "binary", 6); 1847 U_STRING_INIT(k_type_bin, "bin", 3); 1848 U_STRING_INIT(k_type_table, "table", 5); 1849 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); 1850 U_STRING_INIT(k_type_int, "int", 3); 1851 U_STRING_INIT(k_type_integer, "integer", 7); 1852 U_STRING_INIT(k_type_array, "array", 5); 1853 U_STRING_INIT(k_type_alias, "alias", 5); 1854 U_STRING_INIT(k_type_intvector, "intvector", 9); 1855 U_STRING_INIT(k_type_import, "import", 6); 1856 U_STRING_INIT(k_type_include, "include", 7); 1857 1858 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1859 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); 1860 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); 1861 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); 1862 1863 gOmitCollationRules = omitCollationRules; 1864 } 1865 1866 static inline UBool isTable(enum EResourceType type) { 1867 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); 1868 } 1869 1870 static enum EResourceType 1871 parseResourceType(ParseState* state, UErrorCode *status) 1872 { 1873 struct UString *tokenValue; 1874 struct UString comment; 1875 enum EResourceType result = RT_UNKNOWN; 1876 uint32_t line=0; 1877 ustr_init(&comment); 1878 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); 1879 1880 if (U_FAILURE(*status)) 1881 { 1882 return RT_UNKNOWN; 1883 } 1884 1885 *status = U_ZERO_ERROR; 1886 1887 /* Search for normal types */ 1888 result=RT_UNKNOWN; 1889 while ((result=(EResourceType)(result+1)) < RT_RESERVED) { 1890 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { 1891 break; 1892 } 1893 } 1894 /* Now search for the aliases */ 1895 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { 1896 result = RT_INTEGER; 1897 } 1898 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { 1899 result = RT_BINARY; 1900 } 1901 else if (result == RT_RESERVED) { 1902 char tokenBuffer[1024]; 1903 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); 1904 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; 1905 *status = U_INVALID_FORMAT_ERROR; 1906 error(line, "unknown resource type '%s'", tokenBuffer); 1907 } 1908 1909 return result; 1910 } 1911 1912 /* parse a non-top-level resource */ 1913 static struct SResource * 1914 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) 1915 { 1916 enum ETokenType token; 1917 enum EResourceType resType = RT_UNKNOWN; 1918 ParseResourceFunction *parseFunction = NULL; 1919 struct UString *tokenValue; 1920 uint32_t startline; 1921 uint32_t line; 1922 1923 1924 token = getToken(state, &tokenValue, NULL, &startline, status); 1925 1926 if(isVerbose()){ 1927 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1928 } 1929 1930 /* name . [ ':' type ] '{' resource '}' */ 1931 /* This function parses from the colon onwards. If the colon is present, parse the 1932 type then try to parse a resource of that type. If there is no explicit type, 1933 work it out using the lookahead tokens. */ 1934 switch (token) 1935 { 1936 case TOK_EOF: 1937 *status = U_INVALID_FORMAT_ERROR; 1938 error(startline, "Unexpected EOF encountered"); 1939 return NULL; 1940 1941 case TOK_ERROR: 1942 *status = U_INVALID_FORMAT_ERROR; 1943 return NULL; 1944 1945 case TOK_COLON: 1946 resType = parseResourceType(state, status); 1947 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); 1948 1949 if (U_FAILURE(*status)) 1950 { 1951 return NULL; 1952 } 1953 1954 break; 1955 1956 case TOK_OPEN_BRACE: 1957 break; 1958 1959 default: 1960 *status = U_INVALID_FORMAT_ERROR; 1961 error(startline, "syntax error while reading a resource, expected '{' or ':'"); 1962 return NULL; 1963 } 1964 1965 1966 if (resType == RT_UNKNOWN) 1967 { 1968 /* No explicit type, so try to work it out. At this point, we've read the first '{'. 1969 We could have any of the following: 1970 { { => array (nested) 1971 { :/} => array 1972 { string , => string array 1973 1974 { string { => table 1975 1976 { string :/{ => table 1977 { string } => string 1978 */ 1979 1980 token = peekToken(state, 0, NULL, &line, NULL,status); 1981 1982 if (U_FAILURE(*status)) 1983 { 1984 return NULL; 1985 } 1986 1987 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) 1988 { 1989 resType = RT_ARRAY; 1990 } 1991 else if (token == TOK_STRING) 1992 { 1993 token = peekToken(state, 1, NULL, &line, NULL, status); 1994 1995 if (U_FAILURE(*status)) 1996 { 1997 return NULL; 1998 } 1999 2000 switch (token) 2001 { 2002 case TOK_COMMA: resType = RT_ARRAY; break; 2003 case TOK_OPEN_BRACE: resType = RT_TABLE; break; 2004 case TOK_CLOSE_BRACE: resType = RT_STRING; break; 2005 case TOK_COLON: resType = RT_TABLE; break; 2006 default: 2007 *status = U_INVALID_FORMAT_ERROR; 2008 error(line, "Unexpected token after string, expected ',', '{' or '}'"); 2009 return NULL; 2010 } 2011 } 2012 else 2013 { 2014 *status = U_INVALID_FORMAT_ERROR; 2015 error(line, "Unexpected token after '{'"); 2016 return NULL; 2017 } 2018 2019 /* printf("Type guessed as %s\n", resourceNames[resType]); */ 2020 } else if(resType == RT_TABLE_NO_FALLBACK) { 2021 *status = U_INVALID_FORMAT_ERROR; 2022 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); 2023 return NULL; 2024 } 2025 2026 2027 /* We should now know what we need to parse next, so call the appropriate parser 2028 function and return. */ 2029 parseFunction = gResourceTypes[resType].parseFunction; 2030 if (parseFunction != NULL) { 2031 return parseFunction(state, tag, startline, comment, status); 2032 } 2033 else { 2034 *status = U_INTERNAL_PROGRAM_ERROR; 2035 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); 2036 } 2037 2038 return NULL; 2039 } 2040 2041 /* parse the top-level resource */ 2042 struct SRBRoot * 2043 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation, 2044 UErrorCode *status) 2045 { 2046 struct UString *tokenValue; 2047 struct UString comment; 2048 uint32_t line; 2049 enum EResourceType bundleType; 2050 enum ETokenType token; 2051 ParseState state; 2052 uint32_t i; 2053 2054 2055 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) 2056 { 2057 ustr_init(&state.lookahead[i].value); 2058 ustr_init(&state.lookahead[i].comment); 2059 } 2060 2061 initLookahead(&state, buf, status); 2062 2063 state.inputdir = inputDir; 2064 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; 2065 state.outputdir = outputDir; 2066 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; 2067 state.makeBinaryCollation = makeBinaryCollation; 2068 2069 ustr_init(&comment); 2070 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); 2071 2072 state.bundle = bundle_open(&comment, FALSE, status); 2073 2074 if (state.bundle == NULL || U_FAILURE(*status)) 2075 { 2076 return NULL; 2077 } 2078 2079 2080 bundle_setlocale(state.bundle, tokenValue->fChars, status); 2081 2082 /* The following code is to make Empty bundle work no matter with :table specifer or not */ 2083 token = getToken(&state, NULL, NULL, &line, status); 2084 if(token==TOK_COLON) { 2085 *status=U_ZERO_ERROR; 2086 bundleType=parseResourceType(&state, status); 2087 2088 if(isTable(bundleType)) 2089 { 2090 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status); 2091 } 2092 else 2093 { 2094 *status=U_PARSE_ERROR; 2095 error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); 2096 } 2097 } 2098 else 2099 { 2100 /* not a colon */ 2101 if(token==TOK_OPEN_BRACE) 2102 { 2103 *status=U_ZERO_ERROR; 2104 bundleType=RT_TABLE; 2105 } 2106 else 2107 { 2108 /* neither colon nor open brace */ 2109 *status=U_PARSE_ERROR; 2110 bundleType=RT_UNKNOWN; 2111 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); 2112 } 2113 } 2114 2115 if (U_FAILURE(*status)) 2116 { 2117 bundle_close(state.bundle, status); 2118 return NULL; 2119 } 2120 2121 if(bundleType==RT_TABLE_NO_FALLBACK) { 2122 /* 2123 * Parse a top-level table with the table(nofallback) declaration. 2124 * This is the same as a regular table, but also sets the 2125 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 2126 */ 2127 state.bundle->noFallback=TRUE; 2128 } 2129 /* top-level tables need not handle special table names like "collations" */ 2130 realParseTable(&state, state.bundle->fRoot, NULL, line, status); 2131 if(dependencyArray!=NULL){ 2132 table_add(state.bundle->fRoot, dependencyArray, 0, status); 2133 dependencyArray = NULL; 2134 } 2135 if (U_FAILURE(*status)) 2136 { 2137 bundle_close(state.bundle, status); 2138 res_close(dependencyArray); 2139 return NULL; 2140 } 2141 2142 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF) 2143 { 2144 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); 2145 if(isStrict()){ 2146 *status = U_INVALID_FORMAT_ERROR; 2147 return NULL; 2148 } 2149 } 2150 2151 cleanupLookahead(&state); 2152 ustr_deinit(&comment); 2153 return state.bundle; 2154 } 2155