1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1998-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * File parse.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 05/26/99 stephen Creation. 17 * 02/25/00 weiv Overhaul to write udata 18 * 5/10/01 Ram removed ustdio dependency 19 * 06/10/2001 Dominic Ludlam <dom (at) recoil.org> Rewritten 20 ******************************************************************************* 21 */ 22 23 // Safer use of UnicodeString. 24 #ifndef UNISTR_FROM_CHAR_EXPLICIT 25 # define UNISTR_FROM_CHAR_EXPLICIT explicit 26 #endif 27 28 // Less important, but still a good idea. 29 #ifndef UNISTR_FROM_STRING_EXPLICIT 30 # define UNISTR_FROM_STRING_EXPLICIT explicit 31 #endif 32 33 #include <assert.h> 34 #include "parse.h" 35 #include "errmsg.h" 36 #include "uhash.h" 37 #include "cmemory.h" 38 #include "cstring.h" 39 #include "uinvchar.h" 40 #include "read.h" 41 #include "ustr.h" 42 #include "reslist.h" 43 #include "rbt_pars.h" 44 #include "genrb.h" 45 #include "unicode/stringpiece.h" 46 #include "unicode/unistr.h" 47 #include "unicode/ustring.h" 48 #include "unicode/uscript.h" 49 #include "unicode/utf16.h" 50 #include "unicode/putil.h" 51 #include "charstr.h" 52 #include "collationbuilder.h" 53 #include "collationdata.h" 54 #include "collationdatareader.h" 55 #include "collationdatawriter.h" 56 #include "collationfastlatinbuilder.h" 57 #include "collationinfo.h" 58 #include "collationroot.h" 59 #include "collationruleparser.h" 60 #include "collationtailoring.h" 61 #include <stdio.h> 62 63 /* Number of tokens to read ahead of the current stream position */ 64 #define MAX_LOOKAHEAD 3 65 66 #define CR 0x000D 67 #define LF 0x000A 68 #define SPACE 0x0020 69 #define TAB 0x0009 70 #define ESCAPE 0x005C 71 #define HASH 0x0023 72 #define QUOTE 0x0027 73 #define ZERO 0x0030 74 #define STARTCOMMAND 0x005B 75 #define ENDCOMMAND 0x005D 76 #define OPENSQBRACKET 0x005B 77 #define CLOSESQBRACKET 0x005D 78 79 using icu::CharString; 80 using icu::LocalMemory; 81 using icu::LocalPointer; 82 using icu::LocalUCHARBUFPointer; 83 using icu::StringPiece; 84 using icu::UnicodeString; 85 86 struct Lookahead 87 { 88 enum ETokenType type; 89 struct UString value; 90 struct UString comment; 91 uint32_t line; 92 }; 93 94 /* keep in sync with token defines in read.h */ 95 const char *tokenNames[TOK_TOKEN_COUNT] = 96 { 97 "string", /* A string token, such as "MonthNames" */ 98 "'{'", /* An opening brace character */ 99 "'}'", /* A closing brace character */ 100 "','", /* A comma */ 101 "':'", /* A colon */ 102 103 "<end of file>", /* End of the file has been reached successfully */ 104 "<end of line>" 105 }; 106 107 /* Just to store "TRUE" */ 108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; 109 110 typedef struct { 111 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; 112 uint32_t lookaheadPosition; 113 UCHARBUF *buffer; 114 struct SRBRoot *bundle; 115 const char *inputdir; 116 uint32_t inputdirLength; 117 const char *outputdir; 118 uint32_t outputdirLength; 119 const char *filename; 120 UBool makeBinaryCollation; 121 UBool omitCollationRules; 122 } ParseState; 123 124 typedef struct SResource * 125 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); 126 127 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status); 128 129 /* The nature of the lookahead buffer: 130 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides 131 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 132 When getToken is called, the current pointer is moved to the next slot and the 133 old slot is filled with the next token from the reader by calling getNextToken. 134 The token values are stored in the slot, which means that token values don't 135 survive a call to getToken, ie. 136 137 UString *value; 138 139 getToken(&value, NULL, status); 140 getToken(NULL, NULL, status); bad - value is now a different string 141 */ 142 static void 143 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status) 144 { 145 static uint32_t initTypeStrings = 0; 146 uint32_t i; 147 148 if (!initTypeStrings) 149 { 150 initTypeStrings = 1; 151 } 152 153 state->lookaheadPosition = 0; 154 state->buffer = buf; 155 156 resetLineNumber(); 157 158 for (i = 0; i < MAX_LOOKAHEAD; i++) 159 { 160 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 161 if (U_FAILURE(*status)) 162 { 163 return; 164 } 165 } 166 167 *status = U_ZERO_ERROR; 168 } 169 170 static void 171 cleanupLookahead(ParseState* state) 172 { 173 uint32_t i; 174 for (i = 0; i <= MAX_LOOKAHEAD; i++) 175 { 176 ustr_deinit(&state->lookahead[i].value); 177 ustr_deinit(&state->lookahead[i].comment); 178 } 179 180 } 181 182 static enum ETokenType 183 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) 184 { 185 enum ETokenType result; 186 uint32_t i; 187 188 result = state->lookahead[state->lookaheadPosition].type; 189 190 if (tokenValue != NULL) 191 { 192 *tokenValue = &state->lookahead[state->lookaheadPosition].value; 193 } 194 195 if (linenumber != NULL) 196 { 197 *linenumber = state->lookahead[state->lookaheadPosition].line; 198 } 199 200 if (comment != NULL) 201 { 202 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 203 } 204 205 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); 206 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); 207 ustr_setlen(&state->lookahead[i].comment, 0, status); 208 ustr_setlen(&state->lookahead[i].value, 0, status); 209 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 210 211 /* printf("getToken, returning %s\n", tokenNames[result]); */ 212 213 return result; 214 } 215 216 static enum ETokenType 217 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) 218 { 219 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); 220 221 if (U_FAILURE(*status)) 222 { 223 return TOK_ERROR; 224 } 225 226 if (lookaheadCount >= MAX_LOOKAHEAD) 227 { 228 *status = U_INTERNAL_PROGRAM_ERROR; 229 return TOK_ERROR; 230 } 231 232 if (tokenValue != NULL) 233 { 234 *tokenValue = &state->lookahead[i].value; 235 } 236 237 if (linenumber != NULL) 238 { 239 *linenumber = state->lookahead[i].line; 240 } 241 242 if(comment != NULL){ 243 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 244 } 245 246 return state->lookahead[i].type; 247 } 248 249 static void 250 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) 251 { 252 uint32_t line; 253 254 enum ETokenType token = getToken(state, tokenValue, comment, &line, status); 255 256 if (linenumber != NULL) 257 { 258 *linenumber = line; 259 } 260 261 if (U_FAILURE(*status)) 262 { 263 return; 264 } 265 266 if (token != expectedToken) 267 { 268 *status = U_INVALID_FORMAT_ERROR; 269 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); 270 } 271 else 272 { 273 *status = U_ZERO_ERROR; 274 } 275 } 276 277 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) 278 { 279 struct UString *tokenValue; 280 char *result; 281 uint32_t count; 282 283 expect(state, TOK_STRING, &tokenValue, comment, line, status); 284 285 if (U_FAILURE(*status)) 286 { 287 return NULL; 288 } 289 290 count = u_strlen(tokenValue->fChars); 291 if(!uprv_isInvariantUString(tokenValue->fChars, count)) { 292 *status = U_INVALID_FORMAT_ERROR; 293 error(*line, "invariant characters required for table keys, binary data, etc."); 294 return NULL; 295 } 296 297 result = static_cast<char *>(uprv_malloc(count+1)); 298 299 if (result == NULL) 300 { 301 *status = U_MEMORY_ALLOCATION_ERROR; 302 return NULL; 303 } 304 305 u_UCharsToChars(tokenValue->fChars, result, count+1); 306 return result; 307 } 308 309 static struct SResource * 310 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) 311 { 312 struct SResource *result = NULL; 313 struct UString *tokenValue; 314 FileStream *file = NULL; 315 char filename[256] = { '\0' }; 316 char cs[128] = { '\0' }; 317 uint32_t line; 318 UBool quoted = FALSE; 319 UCHARBUF *ucbuf=NULL; 320 UChar32 c = 0; 321 const char* cp = NULL; 322 UChar *pTarget = NULL; 323 UChar *target = NULL; 324 UChar *targetLimit = NULL; 325 int32_t size = 0; 326 327 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 328 329 if(isVerbose()){ 330 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 331 } 332 333 if (U_FAILURE(*status)) 334 { 335 return NULL; 336 } 337 /* make the filename including the directory */ 338 if (state->inputdir != NULL) 339 { 340 uprv_strcat(filename, state->inputdir); 341 342 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 343 { 344 uprv_strcat(filename, U_FILE_SEP_STRING); 345 } 346 } 347 348 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 349 350 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 351 352 if (U_FAILURE(*status)) 353 { 354 return NULL; 355 } 356 uprv_strcat(filename, cs); 357 358 if(state->omitCollationRules) { 359 return res_none(); 360 } 361 362 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 363 364 if (U_FAILURE(*status)) { 365 error(line, "An error occurred while opening the input file %s\n", filename); 366 return NULL; 367 } 368 369 /* We allocate more space than actually required 370 * since the actual size needed for storing UChars 371 * is not known in UTF-8 byte stream 372 */ 373 size = ucbuf_size(ucbuf) + 1; 374 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); 375 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 376 target = pTarget; 377 targetLimit = pTarget+size; 378 379 /* read the rules into the buffer */ 380 while (target < targetLimit) 381 { 382 c = ucbuf_getc(ucbuf, status); 383 if(c == QUOTE) { 384 quoted = (UBool)!quoted; 385 } 386 /* weiv (06/26/2002): adding the following: 387 * - preserving spaces in commands [...] 388 * - # comments until the end of line 389 */ 390 if (c == STARTCOMMAND && !quoted) 391 { 392 /* preserve commands 393 * closing bracket will be handled by the 394 * append at the end of the loop 395 */ 396 while(c != ENDCOMMAND) { 397 U_APPEND_CHAR32_ONLY(c, target); 398 c = ucbuf_getc(ucbuf, status); 399 } 400 } 401 else if (c == HASH && !quoted) { 402 /* skip comments */ 403 while(c != CR && c != LF) { 404 c = ucbuf_getc(ucbuf, status); 405 } 406 continue; 407 } 408 else if (c == ESCAPE) 409 { 410 c = unescape(ucbuf, status); 411 412 if (c == (UChar32)U_ERR) 413 { 414 uprv_free(pTarget); 415 T_FileStream_close(file); 416 return NULL; 417 } 418 } 419 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) 420 { 421 /* ignore spaces carriage returns 422 * and line feed unless in the form \uXXXX 423 */ 424 continue; 425 } 426 427 /* Append UChar * after dissembling if c > 0xffff*/ 428 if (c != (UChar32)U_EOF) 429 { 430 U_APPEND_CHAR32_ONLY(c, target); 431 } 432 else 433 { 434 break; 435 } 436 } 437 438 /* terminate the string */ 439 if(target < targetLimit){ 440 *target = 0x0000; 441 } 442 443 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); 444 445 446 ucbuf_close(ucbuf); 447 uprv_free(pTarget); 448 T_FileStream_close(file); 449 450 return result; 451 } 452 453 static struct SResource * 454 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) 455 { 456 struct SResource *result = NULL; 457 struct UString *tokenValue; 458 FileStream *file = NULL; 459 char filename[256] = { '\0' }; 460 char cs[128] = { '\0' }; 461 uint32_t line; 462 UCHARBUF *ucbuf=NULL; 463 const char* cp = NULL; 464 UChar *pTarget = NULL; 465 const UChar *pSource = NULL; 466 int32_t size = 0; 467 468 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 469 470 if(isVerbose()){ 471 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 472 } 473 474 if (U_FAILURE(*status)) 475 { 476 return NULL; 477 } 478 /* make the filename including the directory */ 479 if (state->inputdir != NULL) 480 { 481 uprv_strcat(filename, state->inputdir); 482 483 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 484 { 485 uprv_strcat(filename, U_FILE_SEP_STRING); 486 } 487 } 488 489 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 490 491 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 492 493 if (U_FAILURE(*status)) 494 { 495 return NULL; 496 } 497 uprv_strcat(filename, cs); 498 499 500 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 501 502 if (U_FAILURE(*status)) { 503 error(line, "An error occurred while opening the input file %s\n", filename); 504 return NULL; 505 } 506 507 /* We allocate more space than actually required 508 * since the actual size needed for storing UChars 509 * is not known in UTF-8 byte stream 510 */ 511 pSource = ucbuf_getBuffer(ucbuf, &size, status); 512 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); 513 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 514 515 #if !UCONFIG_NO_TRANSLITERATION 516 size = utrans_stripRules(pSource, size, pTarget, status); 517 #else 518 size = 0; 519 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); 520 #endif 521 result = string_open(state->bundle, tag, pTarget, size, NULL, status); 522 523 ucbuf_close(ucbuf); 524 uprv_free(pTarget); 525 T_FileStream_close(file); 526 527 return result; 528 } 529 static ArrayResource* dependencyArray = NULL; 530 531 static struct SResource * 532 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 533 { 534 struct SResource *result = NULL; 535 struct SResource *elem = NULL; 536 struct UString *tokenValue; 537 uint32_t line; 538 char filename[256] = { '\0' }; 539 char cs[128] = { '\0' }; 540 541 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 542 543 if(isVerbose()){ 544 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 545 } 546 547 if (U_FAILURE(*status)) 548 { 549 return NULL; 550 } 551 /* make the filename including the directory */ 552 if (state->outputdir != NULL) 553 { 554 uprv_strcat(filename, state->outputdir); 555 556 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR) 557 { 558 uprv_strcat(filename, U_FILE_SEP_STRING); 559 } 560 } 561 562 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 563 564 if (U_FAILURE(*status)) 565 { 566 return NULL; 567 } 568 uprv_strcat(filename, cs); 569 if(!T_FileStream_file_exists(filename)){ 570 if(isStrict()){ 571 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 572 }else{ 573 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 574 } 575 } 576 if(dependencyArray==NULL){ 577 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status); 578 } 579 if(tag!=NULL){ 580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 581 } 582 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); 583 584 dependencyArray->add(elem); 585 586 if (U_FAILURE(*status)) 587 { 588 return NULL; 589 } 590 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 591 return result; 592 } 593 static struct SResource * 594 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 595 { 596 struct UString *tokenValue; 597 struct SResource *result = NULL; 598 599 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) 600 { 601 return parseUCARules(tag, startline, status); 602 }*/ 603 if(isVerbose()){ 604 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 605 } 606 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 607 608 if (U_SUCCESS(*status)) 609 { 610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 611 doesn't survive expect either) */ 612 613 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 614 if(U_SUCCESS(*status) && result) { 615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 616 617 if (U_FAILURE(*status)) 618 { 619 res_close(result); 620 return NULL; 621 } 622 } 623 } 624 625 return result; 626 } 627 628 static struct SResource * 629 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 630 { 631 struct UString *tokenValue; 632 struct SResource *result = NULL; 633 634 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 635 636 if(isVerbose()){ 637 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 638 } 639 640 if (U_SUCCESS(*status)) 641 { 642 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 643 doesn't survive expect either) */ 644 645 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 646 647 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 648 649 if (U_FAILURE(*status)) 650 { 651 res_close(result); 652 return NULL; 653 } 654 } 655 656 return result; 657 } 658 659 #if !UCONFIG_NO_COLLATION 660 661 namespace { 662 663 static struct SResource* resLookup(struct SResource* res, const char* key){ 664 if (res == res_none() || !res->isTable()) { 665 return NULL; 666 } 667 668 TableResource *list = static_cast<TableResource *>(res); 669 SResource *current = list->fFirst; 670 while (current != NULL) { 671 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { 672 return current; 673 } 674 current = current->fNext; 675 } 676 return NULL; 677 } 678 679 class GenrbImporter : public icu::CollationRuleParser::Importer { 680 public: 681 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {} 682 virtual ~GenrbImporter(); 683 virtual void getRules( 684 const char *localeID, const char *collationType, 685 UnicodeString &rules, 686 const char *&errorReason, UErrorCode &errorCode); 687 688 private: 689 const char *inputDir; 690 const char *outputDir; 691 }; 692 693 GenrbImporter::~GenrbImporter() {} 694 695 void 696 GenrbImporter::getRules( 697 const char *localeID, const char *collationType, 698 UnicodeString &rules, 699 const char *& /*errorReason*/, UErrorCode &errorCode) { 700 CharString filename(localeID, errorCode); 701 for(int32_t i = 0; i < filename.length(); i++){ 702 if(filename[i] == '-'){ 703 filename.data()[i] = '_'; 704 } 705 } 706 filename.append(".txt", errorCode); 707 if (U_FAILURE(errorCode)) { 708 return; 709 } 710 CharString inputDirBuf; 711 CharString openFileName; 712 if(inputDir == NULL) { 713 const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR); 714 if (filenameBegin != NULL) { 715 /* 716 * When a filename ../../../data/root.txt is specified, 717 * we presume that the input directory is ../../../data 718 * This is very important when the resource file includes 719 * another file, like UCARules.txt or thaidict.brk. 720 */ 721 StringPiece dir = filename.toStringPiece(); 722 const char *filenameLimit = filename.data() + filename.length(); 723 dir.remove_suffix((int32_t)(filenameLimit - filenameBegin)); 724 inputDirBuf.append(dir, errorCode); 725 inputDir = inputDirBuf.data(); 726 } 727 }else{ 728 int32_t dirlen = (int32_t)uprv_strlen(inputDir); 729 730 if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) { 731 /* 732 * append the input dir to openFileName if the first char in 733 * filename is not file separator char and the last char input directory is not '.'. 734 * This is to support : 735 * genrb -s. /home/icu/data 736 * genrb -s. icu/data 737 * The user cannot mix notations like 738 * genrb -s. /icu/data --- the absolute path specified. -s redundant 739 * user should use 740 * genrb -s. icu/data --- start from CWD and look in icu/data dir 741 */ 742 openFileName.append(inputDir, dirlen, errorCode); 743 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 744 openFileName.append(U_FILE_SEP_CHAR, errorCode); 745 } 746 } 747 } 748 openFileName.append(filename, errorCode); 749 if(U_FAILURE(errorCode)) { 750 return; 751 } 752 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data()); 753 const char* cp = ""; 754 LocalUCHARBUFPointer ucbuf( 755 ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode)); 756 if(errorCode == U_FILE_ACCESS_ERROR) { 757 fprintf(stderr, "couldn't open file %s\n", openFileName.data()); 758 return; 759 } 760 if (ucbuf.isNull() || U_FAILURE(errorCode)) { 761 fprintf(stderr, "An error occurred processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode)); 762 return; 763 } 764 765 /* Parse the data into an SRBRoot */ 766 LocalPointer<SRBRoot> data( 767 parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode)); 768 if (U_FAILURE(errorCode)) { 769 return; 770 } 771 772 struct SResource *root = data->fRoot; 773 struct SResource *collations = resLookup(root, "collations"); 774 if (collations != NULL) { 775 struct SResource *collation = resLookup(collations, collationType); 776 if (collation != NULL) { 777 struct SResource *sequence = resLookup(collation, "Sequence"); 778 if (sequence != NULL && sequence->isString()) { 779 // No string pointer aliasing so that we need not hold onto the resource bundle. 780 StringResource *sr = static_cast<StringResource *>(sequence); 781 rules = sr->fString; 782 } 783 } 784 } 785 } 786 787 // Quick-and-dirty escaping function. 788 // Assumes that we are on an ASCII-based platform. 789 static void 790 escape(const UChar *s, char *buffer) { 791 int32_t length = u_strlen(s); 792 int32_t i = 0; 793 for (;;) { 794 UChar32 c; 795 U16_NEXT(s, i, length, c); 796 if (c == 0) { 797 *buffer = 0; 798 return; 799 } else if (0x20 <= c && c <= 0x7e) { 800 // printable ASCII 801 *buffer++ = (char)c; // assumes ASCII-based platform 802 } else { 803 buffer += sprintf(buffer, "\\u%04X", (int)c); 804 } 805 } 806 } 807 808 } // namespace 809 810 #endif // !UCONFIG_NO_COLLATION 811 812 static TableResource * 813 addCollation(ParseState* state, TableResource *result, const char *collationType, 814 uint32_t startline, UErrorCode *status) 815 { 816 // TODO: Use LocalPointer for result, or make caller close it when there is a failure. 817 struct SResource *member = NULL; 818 struct UString *tokenValue; 819 struct UString comment; 820 enum ETokenType token; 821 char subtag[1024]; 822 UnicodeString rules; 823 UBool haveRules = FALSE; 824 UVersionInfo version; 825 uint32_t line; 826 827 /* '{' . (name resource)* '}' */ 828 version[0]=0; version[1]=0; version[2]=0; version[3]=0; 829 830 for (;;) 831 { 832 ustr_init(&comment); 833 token = getToken(state, &tokenValue, &comment, &line, status); 834 835 if (token == TOK_CLOSE_BRACE) 836 { 837 break; 838 } 839 840 if (token != TOK_STRING) 841 { 842 res_close(result); 843 *status = U_INVALID_FORMAT_ERROR; 844 845 if (token == TOK_EOF) 846 { 847 error(startline, "unterminated table"); 848 } 849 else 850 { 851 error(line, "Unexpected token %s", tokenNames[token]); 852 } 853 854 return NULL; 855 } 856 857 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 858 859 if (U_FAILURE(*status)) 860 { 861 res_close(result); 862 return NULL; 863 } 864 865 member = parseResource(state, subtag, NULL, status); 866 867 if (U_FAILURE(*status)) 868 { 869 res_close(result); 870 return NULL; 871 } 872 if (result == NULL) 873 { 874 // Ignore the parsed resources, continue parsing. 875 } 876 else if (uprv_strcmp(subtag, "Version") == 0 && member->isString()) 877 { 878 StringResource *sr = static_cast<StringResource *>(member); 879 char ver[40]; 880 int32_t length = sr->length(); 881 882 if (length >= UPRV_LENGTHOF(ver)) 883 { 884 length = UPRV_LENGTHOF(ver) - 1; 885 } 886 887 sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV); 888 u_versionFromString(version, ver); 889 890 result->add(member, line, *status); 891 member = NULL; 892 } 893 else if(uprv_strcmp(subtag, "%%CollationBin")==0) 894 { 895 /* discard duplicate %%CollationBin if any*/ 896 } 897 else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString()) 898 { 899 StringResource *sr = static_cast<StringResource *>(member); 900 rules = sr->fString; 901 haveRules = TRUE; 902 // Defer building the collator until we have seen 903 // all sub-elements of the collation table, including the Version. 904 /* in order to achieve smaller data files, we can direct genrb */ 905 /* to omit collation rules */ 906 if(!state->omitCollationRules) { 907 result->add(member, line, *status); 908 member = NULL; 909 } 910 } 911 else // Just copy non-special items. 912 { 913 result->add(member, line, *status); 914 member = NULL; 915 } 916 res_close(member); // TODO: use LocalPointer 917 if (U_FAILURE(*status)) 918 { 919 res_close(result); 920 return NULL; 921 } 922 } 923 924 if (!haveRules) { return result; } 925 926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO 927 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); 928 (void)collationType; 929 #else 930 // CLDR ticket #3949, ICU ticket #8082: 931 // Do not build collation binary data for for-import-only "private" collation rule strings. 932 if (uprv_strncmp(collationType, "private-", 8) == 0) { 933 if(isVerbose()) { 934 printf("Not building %s~%s collation binary\n", state->filename, collationType); 935 } 936 return result; 937 } 938 939 if(!state->makeBinaryCollation) { 940 if(isVerbose()) { 941 printf("Not building %s~%s collation binary\n", state->filename, collationType); 942 } 943 return result; 944 } 945 UErrorCode intStatus = U_ZERO_ERROR; 946 UParseError parseError; 947 uprv_memset(&parseError, 0, sizeof(parseError)); 948 GenrbImporter importer(state->inputdir, state->outputdir); 949 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus); 950 if(U_FAILURE(intStatus)) { 951 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus)); 952 res_close(result); 953 return NULL; // TODO: use LocalUResourceBundlePointer for result 954 } 955 icu::CollationBuilder builder(base, intStatus); 956 if(uprv_strncmp(collationType, "search", 6) == 0) { 957 builder.disableFastLatin(); // build fast-Latin table unless search collator 958 } 959 LocalPointer<icu::CollationTailoring> t( 960 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus)); 961 if(U_FAILURE(intStatus)) { 962 const char *reason = builder.getErrorReason(); 963 if(reason == NULL) { reason = ""; } 964 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s", 965 state->filename, collationType, 966 (long)parseError.offset, u_errorName(intStatus), reason); 967 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { 968 // Print pre- and post-context. 969 char preBuffer[100], postBuffer[100]; 970 escape(parseError.preContext, preBuffer); 971 escape(parseError.postContext, postBuffer); 972 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer); 973 } 974 if(isStrict() || t.isNull()) { 975 *status = intStatus; 976 res_close(result); 977 return NULL; 978 } 979 } 980 icu::LocalMemory<uint8_t> buffer; 981 int32_t capacity = 100000; 982 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity); 983 if(dest == NULL) { 984 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", 985 (long)capacity); 986 *status = U_MEMORY_ALLOCATION_ERROR; 987 res_close(result); 988 return NULL; 989 } 990 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1]; 991 int32_t totalSize = icu::CollationDataWriter::writeTailoring( 992 *t, *t->settings, indexes, dest, capacity, intStatus); 993 if(intStatus == U_BUFFER_OVERFLOW_ERROR) { 994 intStatus = U_ZERO_ERROR; 995 capacity = totalSize; 996 dest = buffer.allocateInsteadAndCopy(capacity); 997 if(dest == NULL) { 998 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", 999 (long)capacity); 1000 *status = U_MEMORY_ALLOCATION_ERROR; 1001 res_close(result); 1002 return NULL; 1003 } 1004 totalSize = icu::CollationDataWriter::writeTailoring( 1005 *t, *t->settings, indexes, dest, capacity, intStatus); 1006 } 1007 if(U_FAILURE(intStatus)) { 1008 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n", 1009 u_errorName(intStatus)); 1010 res_close(result); 1011 return NULL; 1012 } 1013 if(isVerbose()) { 1014 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType); 1015 icu::CollationInfo::printSizes(totalSize, indexes); 1016 if(t->settings->hasReordering()) { 1017 printf("%s~%s collation reordering ranges:\n", state->filename, collationType); 1018 icu::CollationInfo::printReorderRanges( 1019 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength); 1020 } 1021 } 1022 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status); 1023 result->add(collationBin, line, *status); 1024 if (U_FAILURE(*status)) { 1025 res_close(result); 1026 return NULL; 1027 } 1028 #endif 1029 return result; 1030 } 1031 1032 static UBool 1033 keepCollationType(const char *type) { // android-changed 1034 // BEGIN android-added 1035 if (uprv_strcmp(type, "big5han") == 0) { return FALSE; } 1036 if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; } 1037 // END android-added 1038 return TRUE; 1039 } 1040 1041 static struct SResource * 1042 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) 1043 { 1044 TableResource *result = NULL; 1045 struct SResource *member = NULL; 1046 struct UString *tokenValue; 1047 struct UString comment; 1048 enum ETokenType token; 1049 char subtag[1024], typeKeyword[1024]; 1050 uint32_t line; 1051 1052 result = table_open(state->bundle, tag, NULL, status); 1053 1054 if (result == NULL || U_FAILURE(*status)) 1055 { 1056 return NULL; 1057 } 1058 if(isVerbose()){ 1059 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1060 } 1061 if(!newCollation) { 1062 return addCollation(state, result, "(no type)", startline, status); 1063 } 1064 else { 1065 for(;;) { 1066 ustr_init(&comment); 1067 token = getToken(state, &tokenValue, &comment, &line, status); 1068 1069 if (token == TOK_CLOSE_BRACE) 1070 { 1071 return result; 1072 } 1073 1074 if (token != TOK_STRING) 1075 { 1076 res_close(result); 1077 *status = U_INVALID_FORMAT_ERROR; 1078 1079 if (token == TOK_EOF) 1080 { 1081 error(startline, "unterminated table"); 1082 } 1083 else 1084 { 1085 error(line, "Unexpected token %s", tokenNames[token]); 1086 } 1087 1088 return NULL; 1089 } 1090 1091 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1092 1093 if (U_FAILURE(*status)) 1094 { 1095 res_close(result); 1096 return NULL; 1097 } 1098 1099 if (uprv_strcmp(subtag, "default") == 0) 1100 { 1101 member = parseResource(state, subtag, NULL, status); 1102 1103 if (U_FAILURE(*status)) 1104 { 1105 res_close(result); 1106 return NULL; 1107 } 1108 1109 result->add(member, line, *status); 1110 } 1111 else 1112 { 1113 token = peekToken(state, 0, &tokenValue, &line, &comment, status); 1114 /* this probably needs to be refactored or recursively use the parser */ 1115 /* first we assume that our collation table won't have the explicit type */ 1116 /* then, we cannot handle aliases */ 1117 if(token == TOK_OPEN_BRACE) { 1118 token = getToken(state, &tokenValue, &comment, &line, status); 1119 TableResource *collationRes; 1120 if (keepCollationType(subtag)) { 1121 collationRes = table_open(state->bundle, subtag, NULL, status); 1122 } else { 1123 collationRes = NULL; 1124 } 1125 // need to parse the collation data regardless 1126 collationRes = addCollation(state, collationRes, subtag, startline, status); 1127 if (collationRes != NULL) { 1128 result->add(collationRes, startline, *status); 1129 } 1130 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ 1131 /* we could have a table too */ 1132 token = peekToken(state, 1, &tokenValue, &line, &comment, status); 1133 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); 1134 if(uprv_strcmp(typeKeyword, "alias") == 0) { 1135 member = parseResource(state, subtag, NULL, status); 1136 if (U_FAILURE(*status)) 1137 { 1138 res_close(result); 1139 return NULL; 1140 } 1141 1142 result->add(member, line, *status); 1143 } else { 1144 res_close(result); 1145 *status = U_INVALID_FORMAT_ERROR; 1146 return NULL; 1147 } 1148 } else { 1149 res_close(result); 1150 *status = U_INVALID_FORMAT_ERROR; 1151 return NULL; 1152 } 1153 } 1154 1155 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 1156 1157 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 1158 1159 if (U_FAILURE(*status)) 1160 { 1161 res_close(result); 1162 return NULL; 1163 } 1164 } 1165 } 1166 } 1167 1168 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, 1169 if this weren't special-cased, wouldn't be set until the entire file had been processed. */ 1170 static struct SResource * 1171 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status) 1172 { 1173 struct SResource *member = NULL; 1174 struct UString *tokenValue=NULL; 1175 struct UString comment; 1176 enum ETokenType token; 1177 char subtag[1024]; 1178 uint32_t line; 1179 UBool readToken = FALSE; 1180 1181 /* '{' . (name resource)* '}' */ 1182 1183 if(isVerbose()){ 1184 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1185 } 1186 for (;;) 1187 { 1188 ustr_init(&comment); 1189 token = getToken(state, &tokenValue, &comment, &line, status); 1190 1191 if (token == TOK_CLOSE_BRACE) 1192 { 1193 if (!readToken) { 1194 warning(startline, "Encountered empty table"); 1195 } 1196 return table; 1197 } 1198 1199 if (token != TOK_STRING) 1200 { 1201 *status = U_INVALID_FORMAT_ERROR; 1202 1203 if (token == TOK_EOF) 1204 { 1205 error(startline, "unterminated table"); 1206 } 1207 else 1208 { 1209 error(line, "unexpected token %s", tokenNames[token]); 1210 } 1211 1212 return NULL; 1213 } 1214 1215 if(uprv_isInvariantUString(tokenValue->fChars, -1)) { 1216 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1217 } else { 1218 *status = U_INVALID_FORMAT_ERROR; 1219 error(line, "invariant characters required for table keys"); 1220 return NULL; 1221 } 1222 1223 if (U_FAILURE(*status)) 1224 { 1225 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); 1226 return NULL; 1227 } 1228 1229 member = parseResource(state, subtag, &comment, status); 1230 1231 if (member == NULL || U_FAILURE(*status)) 1232 { 1233 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); 1234 return NULL; 1235 } 1236 1237 table->add(member, line, *status); 1238 1239 if (U_FAILURE(*status)) 1240 { 1241 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); 1242 return NULL; 1243 } 1244 readToken = TRUE; 1245 ustr_deinit(&comment); 1246 } 1247 1248 /* not reached */ 1249 /* A compiler warning will appear if all paths don't contain a return statement. */ 1250 /* *status = U_INTERNAL_PROGRAM_ERROR; 1251 return NULL;*/ 1252 } 1253 1254 static struct SResource * 1255 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1256 { 1257 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) 1258 { 1259 return parseCollationElements(state, tag, startline, FALSE, status); 1260 } 1261 if (tag != NULL && uprv_strcmp(tag, "collations") == 0) 1262 { 1263 return parseCollationElements(state, tag, startline, TRUE, status); 1264 } 1265 if(isVerbose()){ 1266 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1267 } 1268 1269 TableResource *result = table_open(state->bundle, tag, comment, status); 1270 1271 if (result == NULL || U_FAILURE(*status)) 1272 { 1273 return NULL; 1274 } 1275 return realParseTable(state, result, tag, startline, status); 1276 } 1277 1278 static struct SResource * 1279 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1280 { 1281 struct SResource *member = NULL; 1282 struct UString *tokenValue; 1283 struct UString memberComments; 1284 enum ETokenType token; 1285 UBool readToken = FALSE; 1286 1287 ArrayResource *result = array_open(state->bundle, tag, comment, status); 1288 1289 if (result == NULL || U_FAILURE(*status)) 1290 { 1291 return NULL; 1292 } 1293 if(isVerbose()){ 1294 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1295 } 1296 1297 ustr_init(&memberComments); 1298 1299 /* '{' . resource [','] '}' */ 1300 for (;;) 1301 { 1302 /* reset length */ 1303 ustr_setlen(&memberComments, 0, status); 1304 1305 /* check for end of array, but don't consume next token unless it really is the end */ 1306 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status); 1307 1308 1309 if (token == TOK_CLOSE_BRACE) 1310 { 1311 getToken(state, NULL, NULL, NULL, status); 1312 if (!readToken) { 1313 warning(startline, "Encountered empty array"); 1314 } 1315 break; 1316 } 1317 1318 if (token == TOK_EOF) 1319 { 1320 res_close(result); 1321 *status = U_INVALID_FORMAT_ERROR; 1322 error(startline, "unterminated array"); 1323 return NULL; 1324 } 1325 1326 /* string arrays are a special case */ 1327 if (token == TOK_STRING) 1328 { 1329 getToken(state, &tokenValue, &memberComments, NULL, status); 1330 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); 1331 } 1332 else 1333 { 1334 member = parseResource(state, NULL, &memberComments, status); 1335 } 1336 1337 if (member == NULL || U_FAILURE(*status)) 1338 { 1339 res_close(result); 1340 return NULL; 1341 } 1342 1343 result->add(member); 1344 1345 /* eat optional comma if present */ 1346 token = peekToken(state, 0, NULL, NULL, NULL, status); 1347 1348 if (token == TOK_COMMA) 1349 { 1350 getToken(state, NULL, NULL, NULL, status); 1351 } 1352 1353 if (U_FAILURE(*status)) 1354 { 1355 res_close(result); 1356 return NULL; 1357 } 1358 readToken = TRUE; 1359 } 1360 1361 ustr_deinit(&memberComments); 1362 return result; 1363 } 1364 1365 static struct SResource * 1366 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1367 { 1368 enum ETokenType token; 1369 char *string; 1370 int32_t value; 1371 UBool readToken = FALSE; 1372 char *stopstring; 1373 uint32_t len; 1374 struct UString memberComments; 1375 1376 IntVectorResource *result = intvector_open(state->bundle, tag, comment, status); 1377 1378 if (result == NULL || U_FAILURE(*status)) 1379 { 1380 return NULL; 1381 } 1382 1383 if(isVerbose()){ 1384 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1385 } 1386 ustr_init(&memberComments); 1387 /* '{' . string [','] '}' */ 1388 for (;;) 1389 { 1390 ustr_setlen(&memberComments, 0, status); 1391 1392 /* check for end of array, but don't consume next token unless it really is the end */ 1393 token = peekToken(state, 0, NULL, NULL,&memberComments, status); 1394 1395 if (token == TOK_CLOSE_BRACE) 1396 { 1397 /* it's the end, consume the close brace */ 1398 getToken(state, NULL, NULL, NULL, status); 1399 if (!readToken) { 1400 warning(startline, "Encountered empty int vector"); 1401 } 1402 ustr_deinit(&memberComments); 1403 return result; 1404 } 1405 1406 string = getInvariantString(state, NULL, NULL, status); 1407 1408 if (U_FAILURE(*status)) 1409 { 1410 res_close(result); 1411 return NULL; 1412 } 1413 1414 /* For handling illegal char in the Intvector */ 1415 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ 1416 len=(uint32_t)(stopstring-string); 1417 1418 if(len==uprv_strlen(string)) 1419 { 1420 result->add(value, *status); 1421 uprv_free(string); 1422 token = peekToken(state, 0, NULL, NULL, NULL, status); 1423 } 1424 else 1425 { 1426 uprv_free(string); 1427 *status=U_INVALID_CHAR_FOUND; 1428 } 1429 1430 if (U_FAILURE(*status)) 1431 { 1432 res_close(result); 1433 return NULL; 1434 } 1435 1436 /* the comma is optional (even though it is required to prevent the reader from concatenating 1437 consecutive entries) so that a missing comma on the last entry isn't an error */ 1438 if (token == TOK_COMMA) 1439 { 1440 getToken(state, NULL, NULL, NULL, status); 1441 } 1442 readToken = TRUE; 1443 } 1444 1445 /* not reached */ 1446 /* A compiler warning will appear if all paths don't contain a return statement. */ 1447 /* intvector_close(result, status); 1448 *status = U_INTERNAL_PROGRAM_ERROR; 1449 return NULL;*/ 1450 } 1451 1452 static struct SResource * 1453 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1454 { 1455 uint32_t line; 1456 LocalMemory<char> string(getInvariantString(state, &line, NULL, status)); 1457 if (string.isNull() || U_FAILURE(*status)) 1458 { 1459 return NULL; 1460 } 1461 1462 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1463 if (U_FAILURE(*status)) 1464 { 1465 return NULL; 1466 } 1467 1468 if(isVerbose()){ 1469 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1470 } 1471 1472 uint32_t count = (uint32_t)uprv_strlen(string.getAlias()); 1473 if (count > 0){ 1474 if((count % 2)==0){ 1475 LocalMemory<uint8_t> value; 1476 if (value.allocateInsteadAndCopy(count) == NULL) 1477 { 1478 *status = U_MEMORY_ALLOCATION_ERROR; 1479 return NULL; 1480 } 1481 1482 char toConv[3] = {'\0', '\0', '\0'}; 1483 for (uint32_t i = 0; i < count; i += 2) 1484 { 1485 toConv[0] = string[i]; 1486 toConv[1] = string[i + 1]; 1487 1488 char *stopstring; 1489 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); 1490 uint32_t len=(uint32_t)(stopstring-toConv); 1491 1492 if(len!=2) 1493 { 1494 *status=U_INVALID_CHAR_FOUND; 1495 return NULL; 1496 } 1497 } 1498 1499 return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status); 1500 } 1501 else 1502 { 1503 *status = U_INVALID_CHAR_FOUND; 1504 error(line, "Encountered invalid binary value (length is odd)"); 1505 return NULL; 1506 } 1507 } 1508 else 1509 { 1510 warning(startline, "Encountered empty binary value"); 1511 return bin_open(state->bundle, tag, 0, NULL, "", comment, status); 1512 } 1513 } 1514 1515 static struct SResource * 1516 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1517 { 1518 struct SResource *result = NULL; 1519 int32_t value; 1520 char *string; 1521 char *stopstring; 1522 uint32_t len; 1523 1524 string = getInvariantString(state, NULL, NULL, status); 1525 1526 if (string == NULL || U_FAILURE(*status)) 1527 { 1528 return NULL; 1529 } 1530 1531 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1532 1533 if (U_FAILURE(*status)) 1534 { 1535 uprv_free(string); 1536 return NULL; 1537 } 1538 1539 if(isVerbose()){ 1540 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1541 } 1542 1543 if (uprv_strlen(string) <= 0) 1544 { 1545 warning(startline, "Encountered empty integer. Default value is 0."); 1546 } 1547 1548 /* Allow integer support for hexdecimal, octal digit and decimal*/ 1549 /* and handle illegal char in the integer*/ 1550 value = uprv_strtoul(string, &stopstring, 0); 1551 len=(uint32_t)(stopstring-string); 1552 if(len==uprv_strlen(string)) 1553 { 1554 result = int_open(state->bundle, tag, value, comment, status); 1555 } 1556 else 1557 { 1558 *status=U_INVALID_CHAR_FOUND; 1559 } 1560 uprv_free(string); 1561 1562 return result; 1563 } 1564 1565 static struct SResource * 1566 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1567 { 1568 uint32_t line; 1569 LocalMemory<char> filename(getInvariantString(state, &line, NULL, status)); 1570 if (U_FAILURE(*status)) 1571 { 1572 return NULL; 1573 } 1574 1575 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1576 1577 if (U_FAILURE(*status)) 1578 { 1579 return NULL; 1580 } 1581 1582 if(isVerbose()){ 1583 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1584 } 1585 1586 /* Open the input file for reading */ 1587 CharString fullname; 1588 if (state->inputdir != NULL) { 1589 fullname.append(state->inputdir, *status); 1590 } 1591 fullname.appendPathPart(filename.getAlias(), *status); 1592 if (U_FAILURE(*status)) { 1593 return NULL; 1594 } 1595 1596 FileStream *file = T_FileStream_open(fullname.data(), "rb"); 1597 if (file == NULL) 1598 { 1599 error(line, "couldn't open input file %s", filename.getAlias()); 1600 *status = U_FILE_ACCESS_ERROR; 1601 return NULL; 1602 } 1603 1604 int32_t len = T_FileStream_size(file); 1605 LocalMemory<uint8_t> data; 1606 if(data.allocateInsteadAndCopy(len) == NULL) 1607 { 1608 *status = U_MEMORY_ALLOCATION_ERROR; 1609 T_FileStream_close (file); 1610 return NULL; 1611 } 1612 1613 /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len); 1614 T_FileStream_close (file); 1615 1616 return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status); 1617 } 1618 1619 static struct SResource * 1620 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1621 { 1622 struct SResource *result; 1623 int32_t len=0; 1624 char *filename; 1625 uint32_t line; 1626 UChar *pTarget = NULL; 1627 1628 UCHARBUF *ucbuf; 1629 char *fullname = NULL; 1630 int32_t count = 0; 1631 const char* cp = NULL; 1632 const UChar* uBuffer = NULL; 1633 1634 filename = getInvariantString(state, &line, NULL, status); 1635 count = (int32_t)uprv_strlen(filename); 1636 1637 if (U_FAILURE(*status)) 1638 { 1639 return NULL; 1640 } 1641 1642 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1643 1644 if (U_FAILURE(*status)) 1645 { 1646 uprv_free(filename); 1647 return NULL; 1648 } 1649 1650 if(isVerbose()){ 1651 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1652 } 1653 1654 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); 1655 /* test for NULL */ 1656 if(fullname == NULL) 1657 { 1658 *status = U_MEMORY_ALLOCATION_ERROR; 1659 uprv_free(filename); 1660 return NULL; 1661 } 1662 1663 if(state->inputdir!=NULL){ 1664 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 1665 { 1666 1667 uprv_strcpy(fullname, state->inputdir); 1668 1669 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; 1670 fullname[state->inputdirLength + 1] = '\0'; 1671 1672 uprv_strcat(fullname, filename); 1673 } 1674 else 1675 { 1676 uprv_strcpy(fullname, state->inputdir); 1677 uprv_strcat(fullname, filename); 1678 } 1679 }else{ 1680 uprv_strcpy(fullname,filename); 1681 } 1682 1683 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); 1684 1685 if (U_FAILURE(*status)) { 1686 error(line, "couldn't open input file %s\n", filename); 1687 return NULL; 1688 } 1689 1690 uBuffer = ucbuf_getBuffer(ucbuf,&len,status); 1691 result = string_open(state->bundle, tag, uBuffer, len, comment, status); 1692 1693 ucbuf_close(ucbuf); 1694 1695 uprv_free(pTarget); 1696 1697 uprv_free(filename); 1698 uprv_free(fullname); 1699 1700 return result; 1701 } 1702 1703 1704 1705 1706 1707 U_STRING_DECL(k_type_string, "string", 6); 1708 U_STRING_DECL(k_type_binary, "binary", 6); 1709 U_STRING_DECL(k_type_bin, "bin", 3); 1710 U_STRING_DECL(k_type_table, "table", 5); 1711 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); 1712 U_STRING_DECL(k_type_int, "int", 3); 1713 U_STRING_DECL(k_type_integer, "integer", 7); 1714 U_STRING_DECL(k_type_array, "array", 5); 1715 U_STRING_DECL(k_type_alias, "alias", 5); 1716 U_STRING_DECL(k_type_intvector, "intvector", 9); 1717 U_STRING_DECL(k_type_import, "import", 6); 1718 U_STRING_DECL(k_type_include, "include", 7); 1719 1720 /* Various non-standard processing plugins that create one or more special resources. */ 1721 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1722 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); 1723 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); 1724 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); 1725 1726 typedef enum EResourceType 1727 { 1728 RESTYPE_UNKNOWN, 1729 RESTYPE_STRING, 1730 RESTYPE_BINARY, 1731 RESTYPE_TABLE, 1732 RESTYPE_TABLE_NO_FALLBACK, 1733 RESTYPE_INTEGER, 1734 RESTYPE_ARRAY, 1735 RESTYPE_ALIAS, 1736 RESTYPE_INTVECTOR, 1737 RESTYPE_IMPORT, 1738 RESTYPE_INCLUDE, 1739 RESTYPE_PROCESS_UCA_RULES, 1740 RESTYPE_PROCESS_COLLATION, 1741 RESTYPE_PROCESS_TRANSLITERATOR, 1742 RESTYPE_PROCESS_DEPENDENCY, 1743 RESTYPE_RESERVED 1744 } EResourceType; 1745 1746 static struct { 1747 const char *nameChars; /* only used for debugging */ 1748 const UChar *nameUChars; 1749 ParseResourceFunction *parseFunction; 1750 } gResourceTypes[] = { 1751 {"Unknown", NULL, NULL}, 1752 {"string", k_type_string, parseString}, 1753 {"binary", k_type_binary, parseBinary}, 1754 {"table", k_type_table, parseTable}, 1755 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ 1756 {"integer", k_type_integer, parseInteger}, 1757 {"array", k_type_array, parseArray}, 1758 {"alias", k_type_alias, parseAlias}, 1759 {"intvector", k_type_intvector, parseIntVector}, 1760 {"import", k_type_import, parseImport}, 1761 {"include", k_type_include, parseInclude}, 1762 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, 1763 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, 1764 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, 1765 {"process(dependency)", k_type_plugin_dependency, parseDependency}, 1766 {"reserved", NULL, NULL} 1767 }; 1768 1769 void initParser() 1770 { 1771 U_STRING_INIT(k_type_string, "string", 6); 1772 U_STRING_INIT(k_type_binary, "binary", 6); 1773 U_STRING_INIT(k_type_bin, "bin", 3); 1774 U_STRING_INIT(k_type_table, "table", 5); 1775 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); 1776 U_STRING_INIT(k_type_int, "int", 3); 1777 U_STRING_INIT(k_type_integer, "integer", 7); 1778 U_STRING_INIT(k_type_array, "array", 5); 1779 U_STRING_INIT(k_type_alias, "alias", 5); 1780 U_STRING_INIT(k_type_intvector, "intvector", 9); 1781 U_STRING_INIT(k_type_import, "import", 6); 1782 U_STRING_INIT(k_type_include, "include", 7); 1783 1784 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1785 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); 1786 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); 1787 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); 1788 } 1789 1790 static inline UBool isTable(enum EResourceType type) { 1791 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK); 1792 } 1793 1794 static enum EResourceType 1795 parseResourceType(ParseState* state, UErrorCode *status) 1796 { 1797 struct UString *tokenValue; 1798 struct UString comment; 1799 enum EResourceType result = RESTYPE_UNKNOWN; 1800 uint32_t line=0; 1801 ustr_init(&comment); 1802 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); 1803 1804 if (U_FAILURE(*status)) 1805 { 1806 return RESTYPE_UNKNOWN; 1807 } 1808 1809 *status = U_ZERO_ERROR; 1810 1811 /* Search for normal types */ 1812 result=RESTYPE_UNKNOWN; 1813 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) { 1814 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { 1815 break; 1816 } 1817 } 1818 /* Now search for the aliases */ 1819 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { 1820 result = RESTYPE_INTEGER; 1821 } 1822 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { 1823 result = RESTYPE_BINARY; 1824 } 1825 else if (result == RESTYPE_RESERVED) { 1826 char tokenBuffer[1024]; 1827 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); 1828 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; 1829 *status = U_INVALID_FORMAT_ERROR; 1830 error(line, "unknown resource type '%s'", tokenBuffer); 1831 } 1832 1833 return result; 1834 } 1835 1836 /* parse a non-top-level resource */ 1837 static struct SResource * 1838 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) 1839 { 1840 enum ETokenType token; 1841 enum EResourceType resType = RESTYPE_UNKNOWN; 1842 ParseResourceFunction *parseFunction = NULL; 1843 struct UString *tokenValue; 1844 uint32_t startline; 1845 uint32_t line; 1846 1847 1848 token = getToken(state, &tokenValue, NULL, &startline, status); 1849 1850 if(isVerbose()){ 1851 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1852 } 1853 1854 /* name . [ ':' type ] '{' resource '}' */ 1855 /* This function parses from the colon onwards. If the colon is present, parse the 1856 type then try to parse a resource of that type. If there is no explicit type, 1857 work it out using the lookahead tokens. */ 1858 switch (token) 1859 { 1860 case TOK_EOF: 1861 *status = U_INVALID_FORMAT_ERROR; 1862 error(startline, "Unexpected EOF encountered"); 1863 return NULL; 1864 1865 case TOK_ERROR: 1866 *status = U_INVALID_FORMAT_ERROR; 1867 return NULL; 1868 1869 case TOK_COLON: 1870 resType = parseResourceType(state, status); 1871 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); 1872 1873 if (U_FAILURE(*status)) 1874 { 1875 return NULL; 1876 } 1877 1878 break; 1879 1880 case TOK_OPEN_BRACE: 1881 break; 1882 1883 default: 1884 *status = U_INVALID_FORMAT_ERROR; 1885 error(startline, "syntax error while reading a resource, expected '{' or ':'"); 1886 return NULL; 1887 } 1888 1889 1890 if (resType == RESTYPE_UNKNOWN) 1891 { 1892 /* No explicit type, so try to work it out. At this point, we've read the first '{'. 1893 We could have any of the following: 1894 { { => array (nested) 1895 { :/} => array 1896 { string , => string array 1897 1898 { string { => table 1899 1900 { string :/{ => table 1901 { string } => string 1902 */ 1903 1904 token = peekToken(state, 0, NULL, &line, NULL,status); 1905 1906 if (U_FAILURE(*status)) 1907 { 1908 return NULL; 1909 } 1910 1911 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) 1912 { 1913 resType = RESTYPE_ARRAY; 1914 } 1915 else if (token == TOK_STRING) 1916 { 1917 token = peekToken(state, 1, NULL, &line, NULL, status); 1918 1919 if (U_FAILURE(*status)) 1920 { 1921 return NULL; 1922 } 1923 1924 switch (token) 1925 { 1926 case TOK_COMMA: resType = RESTYPE_ARRAY; break; 1927 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break; 1928 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break; 1929 case TOK_COLON: resType = RESTYPE_TABLE; break; 1930 default: 1931 *status = U_INVALID_FORMAT_ERROR; 1932 error(line, "Unexpected token after string, expected ',', '{' or '}'"); 1933 return NULL; 1934 } 1935 } 1936 else 1937 { 1938 *status = U_INVALID_FORMAT_ERROR; 1939 error(line, "Unexpected token after '{'"); 1940 return NULL; 1941 } 1942 1943 /* printf("Type guessed as %s\n", resourceNames[resType]); */ 1944 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) { 1945 *status = U_INVALID_FORMAT_ERROR; 1946 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); 1947 return NULL; 1948 } 1949 1950 1951 /* We should now know what we need to parse next, so call the appropriate parser 1952 function and return. */ 1953 parseFunction = gResourceTypes[resType].parseFunction; 1954 if (parseFunction != NULL) { 1955 return parseFunction(state, tag, startline, comment, status); 1956 } 1957 else { 1958 *status = U_INTERNAL_PROGRAM_ERROR; 1959 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); 1960 } 1961 1962 return NULL; 1963 } 1964 1965 /* parse the top-level resource */ 1966 struct SRBRoot * 1967 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename, 1968 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status) 1969 { 1970 struct UString *tokenValue; 1971 struct UString comment; 1972 uint32_t line; 1973 enum EResourceType bundleType; 1974 enum ETokenType token; 1975 ParseState state; 1976 uint32_t i; 1977 1978 1979 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) 1980 { 1981 ustr_init(&state.lookahead[i].value); 1982 ustr_init(&state.lookahead[i].comment); 1983 } 1984 1985 initLookahead(&state, buf, status); 1986 1987 state.inputdir = inputDir; 1988 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; 1989 state.outputdir = outputDir; 1990 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; 1991 state.filename = filename; 1992 state.makeBinaryCollation = makeBinaryCollation; 1993 state.omitCollationRules = omitCollationRules; 1994 1995 ustr_init(&comment); 1996 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); 1997 1998 state.bundle = new SRBRoot(&comment, FALSE, *status); 1999 2000 if (state.bundle == NULL || U_FAILURE(*status)) 2001 { 2002 return NULL; 2003 } 2004 2005 2006 state.bundle->setLocale(tokenValue->fChars, *status); 2007 2008 /* The following code is to make Empty bundle work no matter with :table specifer or not */ 2009 token = getToken(&state, NULL, NULL, &line, status); 2010 if(token==TOK_COLON) { 2011 *status=U_ZERO_ERROR; 2012 bundleType=parseResourceType(&state, status); 2013 2014 if(isTable(bundleType)) 2015 { 2016 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status); 2017 } 2018 else 2019 { 2020 *status=U_PARSE_ERROR; 2021 error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); 2022 } 2023 } 2024 else 2025 { 2026 /* not a colon */ 2027 if(token==TOK_OPEN_BRACE) 2028 { 2029 *status=U_ZERO_ERROR; 2030 bundleType=RESTYPE_TABLE; 2031 } 2032 else 2033 { 2034 /* neither colon nor open brace */ 2035 *status=U_PARSE_ERROR; 2036 bundleType=RESTYPE_UNKNOWN; 2037 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); 2038 } 2039 } 2040 2041 if (U_FAILURE(*status)) 2042 { 2043 delete state.bundle; 2044 return NULL; 2045 } 2046 2047 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) { 2048 /* 2049 * Parse a top-level table with the table(nofallback) declaration. 2050 * This is the same as a regular table, but also sets the 2051 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 2052 */ 2053 state.bundle->fNoFallback=TRUE; 2054 } 2055 /* top-level tables need not handle special table names like "collations" */ 2056 assert(!state.bundle->fIsPoolBundle); 2057 assert(state.bundle->fRoot->fType == URES_TABLE); 2058 TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot); 2059 realParseTable(&state, rootTable, NULL, line, status); 2060 if(dependencyArray!=NULL){ 2061 rootTable->add(dependencyArray, 0, *status); 2062 dependencyArray = NULL; 2063 } 2064 if (U_FAILURE(*status)) 2065 { 2066 delete state.bundle; 2067 res_close(dependencyArray); 2068 return NULL; 2069 } 2070 2071 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF) 2072 { 2073 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); 2074 if(isStrict()){ 2075 *status = U_INVALID_FORMAT_ERROR; 2076 return NULL; 2077 } 2078 } 2079 2080 cleanupLookahead(&state); 2081 ustr_deinit(&comment); 2082 return state.bundle; 2083 } 2084