1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2015, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File parse.cpp 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/26/99 stephen Creation. 15 * 02/25/00 weiv Overhaul to write udata 16 * 5/10/01 Ram removed ustdio dependency 17 * 06/10/2001 Dominic Ludlam <dom (at) recoil.org> Rewritten 18 ******************************************************************************* 19 */ 20 21 // Safer use of UnicodeString. 22 #ifndef UNISTR_FROM_CHAR_EXPLICIT 23 # define UNISTR_FROM_CHAR_EXPLICIT explicit 24 #endif 25 26 // Less important, but still a good idea. 27 #ifndef UNISTR_FROM_STRING_EXPLICIT 28 # define UNISTR_FROM_STRING_EXPLICIT explicit 29 #endif 30 31 #include <assert.h> 32 #include "parse.h" 33 #include "errmsg.h" 34 #include "uhash.h" 35 #include "cmemory.h" 36 #include "cstring.h" 37 #include "uinvchar.h" 38 #include "read.h" 39 #include "ustr.h" 40 #include "reslist.h" 41 #include "rbt_pars.h" 42 #include "genrb.h" 43 #include "unicode/stringpiece.h" 44 #include "unicode/unistr.h" 45 #include "unicode/ustring.h" 46 #include "unicode/uscript.h" 47 #include "unicode/utf16.h" 48 #include "unicode/putil.h" 49 #include "charstr.h" 50 #include "collationbuilder.h" 51 #include "collationdata.h" 52 #include "collationdatareader.h" 53 #include "collationdatawriter.h" 54 #include "collationfastlatinbuilder.h" 55 #include "collationinfo.h" 56 #include "collationroot.h" 57 #include "collationruleparser.h" 58 #include "collationtailoring.h" 59 #include <stdio.h> 60 61 /* Number of tokens to read ahead of the current stream position */ 62 #define MAX_LOOKAHEAD 3 63 64 #define CR 0x000D 65 #define LF 0x000A 66 #define SPACE 0x0020 67 #define TAB 0x0009 68 #define ESCAPE 0x005C 69 #define HASH 0x0023 70 #define QUOTE 0x0027 71 #define ZERO 0x0030 72 #define STARTCOMMAND 0x005B 73 #define ENDCOMMAND 0x005D 74 #define OPENSQBRACKET 0x005B 75 #define CLOSESQBRACKET 0x005D 76 77 using icu::CharString; 78 using icu::LocalMemory; 79 using icu::LocalPointer; 80 using icu::LocalUCHARBUFPointer; 81 using icu::StringPiece; 82 using icu::UnicodeString; 83 84 struct Lookahead 85 { 86 enum ETokenType type; 87 struct UString value; 88 struct UString comment; 89 uint32_t line; 90 }; 91 92 /* keep in sync with token defines in read.h */ 93 const char *tokenNames[TOK_TOKEN_COUNT] = 94 { 95 "string", /* A string token, such as "MonthNames" */ 96 "'{'", /* An opening brace character */ 97 "'}'", /* A closing brace character */ 98 "','", /* A comma */ 99 "':'", /* A colon */ 100 101 "<end of file>", /* End of the file has been reached successfully */ 102 "<end of line>" 103 }; 104 105 /* Just to store "TRUE" */ 106 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; 107 108 typedef struct { 109 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; 110 uint32_t lookaheadPosition; 111 UCHARBUF *buffer; 112 struct SRBRoot *bundle; 113 const char *inputdir; 114 uint32_t inputdirLength; 115 const char *outputdir; 116 uint32_t outputdirLength; 117 const char *filename; 118 UBool makeBinaryCollation; 119 UBool omitCollationRules; 120 } ParseState; 121 122 typedef struct SResource * 123 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); 124 125 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status); 126 127 /* The nature of the lookahead buffer: 128 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides 129 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 130 When getToken is called, the current pointer is moved to the next slot and the 131 old slot is filled with the next token from the reader by calling getNextToken. 132 The token values are stored in the slot, which means that token values don't 133 survive a call to getToken, ie. 134 135 UString *value; 136 137 getToken(&value, NULL, status); 138 getToken(NULL, NULL, status); bad - value is now a different string 139 */ 140 static void 141 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status) 142 { 143 static uint32_t initTypeStrings = 0; 144 uint32_t i; 145 146 if (!initTypeStrings) 147 { 148 initTypeStrings = 1; 149 } 150 151 state->lookaheadPosition = 0; 152 state->buffer = buf; 153 154 resetLineNumber(); 155 156 for (i = 0; i < MAX_LOOKAHEAD; i++) 157 { 158 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 159 if (U_FAILURE(*status)) 160 { 161 return; 162 } 163 } 164 165 *status = U_ZERO_ERROR; 166 } 167 168 static void 169 cleanupLookahead(ParseState* state) 170 { 171 uint32_t i; 172 for (i = 0; i <= MAX_LOOKAHEAD; i++) 173 { 174 ustr_deinit(&state->lookahead[i].value); 175 ustr_deinit(&state->lookahead[i].comment); 176 } 177 178 } 179 180 static enum ETokenType 181 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) 182 { 183 enum ETokenType result; 184 uint32_t i; 185 186 result = state->lookahead[state->lookaheadPosition].type; 187 188 if (tokenValue != NULL) 189 { 190 *tokenValue = &state->lookahead[state->lookaheadPosition].value; 191 } 192 193 if (linenumber != NULL) 194 { 195 *linenumber = state->lookahead[state->lookaheadPosition].line; 196 } 197 198 if (comment != NULL) 199 { 200 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 201 } 202 203 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); 204 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); 205 ustr_setlen(&state->lookahead[i].comment, 0, status); 206 ustr_setlen(&state->lookahead[i].value, 0, status); 207 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); 208 209 /* printf("getToken, returning %s\n", tokenNames[result]); */ 210 211 return result; 212 } 213 214 static enum ETokenType 215 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) 216 { 217 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); 218 219 if (U_FAILURE(*status)) 220 { 221 return TOK_ERROR; 222 } 223 224 if (lookaheadCount >= MAX_LOOKAHEAD) 225 { 226 *status = U_INTERNAL_PROGRAM_ERROR; 227 return TOK_ERROR; 228 } 229 230 if (tokenValue != NULL) 231 { 232 *tokenValue = &state->lookahead[i].value; 233 } 234 235 if (linenumber != NULL) 236 { 237 *linenumber = state->lookahead[i].line; 238 } 239 240 if(comment != NULL){ 241 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); 242 } 243 244 return state->lookahead[i].type; 245 } 246 247 static void 248 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) 249 { 250 uint32_t line; 251 252 enum ETokenType token = getToken(state, tokenValue, comment, &line, status); 253 254 if (linenumber != NULL) 255 { 256 *linenumber = line; 257 } 258 259 if (U_FAILURE(*status)) 260 { 261 return; 262 } 263 264 if (token != expectedToken) 265 { 266 *status = U_INVALID_FORMAT_ERROR; 267 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); 268 } 269 else 270 { 271 *status = U_ZERO_ERROR; 272 } 273 } 274 275 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) 276 { 277 struct UString *tokenValue; 278 char *result; 279 uint32_t count; 280 281 expect(state, TOK_STRING, &tokenValue, comment, line, status); 282 283 if (U_FAILURE(*status)) 284 { 285 return NULL; 286 } 287 288 count = u_strlen(tokenValue->fChars); 289 if(!uprv_isInvariantUString(tokenValue->fChars, count)) { 290 *status = U_INVALID_FORMAT_ERROR; 291 error(*line, "invariant characters required for table keys, binary data, etc."); 292 return NULL; 293 } 294 295 result = static_cast<char *>(uprv_malloc(count+1)); 296 297 if (result == NULL) 298 { 299 *status = U_MEMORY_ALLOCATION_ERROR; 300 return NULL; 301 } 302 303 u_UCharsToChars(tokenValue->fChars, result, count+1); 304 return result; 305 } 306 307 static struct SResource * 308 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) 309 { 310 struct SResource *result = NULL; 311 struct UString *tokenValue; 312 FileStream *file = NULL; 313 char filename[256] = { '\0' }; 314 char cs[128] = { '\0' }; 315 uint32_t line; 316 UBool quoted = FALSE; 317 UCHARBUF *ucbuf=NULL; 318 UChar32 c = 0; 319 const char* cp = NULL; 320 UChar *pTarget = NULL; 321 UChar *target = NULL; 322 UChar *targetLimit = NULL; 323 int32_t size = 0; 324 325 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 326 327 if(isVerbose()){ 328 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 329 } 330 331 if (U_FAILURE(*status)) 332 { 333 return NULL; 334 } 335 /* make the filename including the directory */ 336 if (state->inputdir != NULL) 337 { 338 uprv_strcat(filename, state->inputdir); 339 340 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 341 { 342 uprv_strcat(filename, U_FILE_SEP_STRING); 343 } 344 } 345 346 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 347 348 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 349 350 if (U_FAILURE(*status)) 351 { 352 return NULL; 353 } 354 uprv_strcat(filename, cs); 355 356 if(state->omitCollationRules) { 357 return res_none(); 358 } 359 360 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 361 362 if (U_FAILURE(*status)) { 363 error(line, "An error occured while opening the input file %s\n", filename); 364 return NULL; 365 } 366 367 /* We allocate more space than actually required 368 * since the actual size needed for storing UChars 369 * is not known in UTF-8 byte stream 370 */ 371 size = ucbuf_size(ucbuf) + 1; 372 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); 373 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 374 target = pTarget; 375 targetLimit = pTarget+size; 376 377 /* read the rules into the buffer */ 378 while (target < targetLimit) 379 { 380 c = ucbuf_getc(ucbuf, status); 381 if(c == QUOTE) { 382 quoted = (UBool)!quoted; 383 } 384 /* weiv (06/26/2002): adding the following: 385 * - preserving spaces in commands [...] 386 * - # comments until the end of line 387 */ 388 if (c == STARTCOMMAND && !quoted) 389 { 390 /* preserve commands 391 * closing bracket will be handled by the 392 * append at the end of the loop 393 */ 394 while(c != ENDCOMMAND) { 395 U_APPEND_CHAR32_ONLY(c, target); 396 c = ucbuf_getc(ucbuf, status); 397 } 398 } 399 else if (c == HASH && !quoted) { 400 /* skip comments */ 401 while(c != CR && c != LF) { 402 c = ucbuf_getc(ucbuf, status); 403 } 404 continue; 405 } 406 else if (c == ESCAPE) 407 { 408 c = unescape(ucbuf, status); 409 410 if (c == (UChar32)U_ERR) 411 { 412 uprv_free(pTarget); 413 T_FileStream_close(file); 414 return NULL; 415 } 416 } 417 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) 418 { 419 /* ignore spaces carriage returns 420 * and line feed unless in the form \uXXXX 421 */ 422 continue; 423 } 424 425 /* Append UChar * after dissembling if c > 0xffff*/ 426 if (c != (UChar32)U_EOF) 427 { 428 U_APPEND_CHAR32_ONLY(c, target); 429 } 430 else 431 { 432 break; 433 } 434 } 435 436 /* terminate the string */ 437 if(target < targetLimit){ 438 *target = 0x0000; 439 } 440 441 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); 442 443 444 ucbuf_close(ucbuf); 445 uprv_free(pTarget); 446 T_FileStream_close(file); 447 448 return result; 449 } 450 451 static struct SResource * 452 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) 453 { 454 struct SResource *result = NULL; 455 struct UString *tokenValue; 456 FileStream *file = NULL; 457 char filename[256] = { '\0' }; 458 char cs[128] = { '\0' }; 459 uint32_t line; 460 UCHARBUF *ucbuf=NULL; 461 const char* cp = NULL; 462 UChar *pTarget = NULL; 463 const UChar *pSource = NULL; 464 int32_t size = 0; 465 466 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 467 468 if(isVerbose()){ 469 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 470 } 471 472 if (U_FAILURE(*status)) 473 { 474 return NULL; 475 } 476 /* make the filename including the directory */ 477 if (state->inputdir != NULL) 478 { 479 uprv_strcat(filename, state->inputdir); 480 481 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 482 { 483 uprv_strcat(filename, U_FILE_SEP_STRING); 484 } 485 } 486 487 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 488 489 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 490 491 if (U_FAILURE(*status)) 492 { 493 return NULL; 494 } 495 uprv_strcat(filename, cs); 496 497 498 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 499 500 if (U_FAILURE(*status)) { 501 error(line, "An error occured while opening the input file %s\n", filename); 502 return NULL; 503 } 504 505 /* We allocate more space than actually required 506 * since the actual size needed for storing UChars 507 * is not known in UTF-8 byte stream 508 */ 509 pSource = ucbuf_getBuffer(ucbuf, &size, status); 510 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); 511 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 512 513 #if !UCONFIG_NO_TRANSLITERATION 514 size = utrans_stripRules(pSource, size, pTarget, status); 515 #else 516 size = 0; 517 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); 518 #endif 519 result = string_open(state->bundle, tag, pTarget, size, NULL, status); 520 521 ucbuf_close(ucbuf); 522 uprv_free(pTarget); 523 T_FileStream_close(file); 524 525 return result; 526 } 527 static ArrayResource* dependencyArray = NULL; 528 529 static struct SResource * 530 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 531 { 532 struct SResource *result = NULL; 533 struct SResource *elem = NULL; 534 struct UString *tokenValue; 535 uint32_t line; 536 char filename[256] = { '\0' }; 537 char cs[128] = { '\0' }; 538 539 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); 540 541 if(isVerbose()){ 542 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 543 } 544 545 if (U_FAILURE(*status)) 546 { 547 return NULL; 548 } 549 /* make the filename including the directory */ 550 if (state->outputdir != NULL) 551 { 552 uprv_strcat(filename, state->outputdir); 553 554 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR) 555 { 556 uprv_strcat(filename, U_FILE_SEP_STRING); 557 } 558 } 559 560 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 561 562 if (U_FAILURE(*status)) 563 { 564 return NULL; 565 } 566 uprv_strcat(filename, cs); 567 if(!T_FileStream_file_exists(filename)){ 568 if(isStrict()){ 569 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 570 }else{ 571 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 572 } 573 } 574 if(dependencyArray==NULL){ 575 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status); 576 } 577 if(tag!=NULL){ 578 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 579 } 580 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); 581 582 dependencyArray->add(elem); 583 584 if (U_FAILURE(*status)) 585 { 586 return NULL; 587 } 588 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 589 return result; 590 } 591 static struct SResource * 592 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 593 { 594 struct UString *tokenValue; 595 struct SResource *result = NULL; 596 597 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) 598 { 599 return parseUCARules(tag, startline, status); 600 }*/ 601 if(isVerbose()){ 602 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 603 } 604 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 605 606 if (U_SUCCESS(*status)) 607 { 608 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 609 doesn't survive expect either) */ 610 611 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 612 if(U_SUCCESS(*status) && result) { 613 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 614 615 if (U_FAILURE(*status)) 616 { 617 res_close(result); 618 return NULL; 619 } 620 } 621 } 622 623 return result; 624 } 625 626 static struct SResource * 627 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 628 { 629 struct UString *tokenValue; 630 struct SResource *result = NULL; 631 632 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); 633 634 if(isVerbose()){ 635 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 636 } 637 638 if (U_SUCCESS(*status)) 639 { 640 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 641 doesn't survive expect either) */ 642 643 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 644 645 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 646 647 if (U_FAILURE(*status)) 648 { 649 res_close(result); 650 return NULL; 651 } 652 } 653 654 return result; 655 } 656 657 #if !UCONFIG_NO_COLLATION 658 659 namespace { 660 661 static struct SResource* resLookup(struct SResource* res, const char* key){ 662 if (res == res_none() || !res->isTable()) { 663 return NULL; 664 } 665 666 TableResource *list = static_cast<TableResource *>(res); 667 SResource *current = list->fFirst; 668 while (current != NULL) { 669 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { 670 return current; 671 } 672 current = current->fNext; 673 } 674 return NULL; 675 } 676 677 class GenrbImporter : public icu::CollationRuleParser::Importer { 678 public: 679 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {} 680 virtual ~GenrbImporter(); 681 virtual void getRules( 682 const char *localeID, const char *collationType, 683 UnicodeString &rules, 684 const char *&errorReason, UErrorCode &errorCode); 685 686 private: 687 const char *inputDir; 688 const char *outputDir; 689 }; 690 691 GenrbImporter::~GenrbImporter() {} 692 693 void 694 GenrbImporter::getRules( 695 const char *localeID, const char *collationType, 696 UnicodeString &rules, 697 const char *& /*errorReason*/, UErrorCode &errorCode) { 698 CharString filename(localeID, errorCode); 699 for(int32_t i = 0; i < filename.length(); i++){ 700 if(filename[i] == '-'){ 701 filename.data()[i] = '_'; 702 } 703 } 704 filename.append(".txt", errorCode); 705 if (U_FAILURE(errorCode)) { 706 return; 707 } 708 CharString inputDirBuf; 709 CharString openFileName; 710 if(inputDir == NULL) { 711 const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR); 712 if (filenameBegin != NULL) { 713 /* 714 * When a filename ../../../data/root.txt is specified, 715 * we presume that the input directory is ../../../data 716 * This is very important when the resource file includes 717 * another file, like UCARules.txt or thaidict.brk. 718 */ 719 StringPiece dir = filename.toStringPiece(); 720 const char *filenameLimit = filename.data() + filename.length(); 721 dir.remove_suffix((int32_t)(filenameLimit - filenameBegin)); 722 inputDirBuf.append(dir, errorCode); 723 inputDir = inputDirBuf.data(); 724 } 725 }else{ 726 int32_t dirlen = (int32_t)uprv_strlen(inputDir); 727 728 if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) { 729 /* 730 * append the input dir to openFileName if the first char in 731 * filename is not file separator char and the last char input directory is not '.'. 732 * This is to support : 733 * genrb -s. /home/icu/data 734 * genrb -s. icu/data 735 * The user cannot mix notations like 736 * genrb -s. /icu/data --- the absolute path specified. -s redundant 737 * user should use 738 * genrb -s. icu/data --- start from CWD and look in icu/data dir 739 */ 740 openFileName.append(inputDir, dirlen, errorCode); 741 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 742 openFileName.append(U_FILE_SEP_CHAR, errorCode); 743 } 744 } 745 } 746 openFileName.append(filename, errorCode); 747 if(U_FAILURE(errorCode)) { 748 return; 749 } 750 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data()); 751 const char* cp = ""; 752 LocalUCHARBUFPointer ucbuf( 753 ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode)); 754 if(errorCode == U_FILE_ACCESS_ERROR) { 755 fprintf(stderr, "couldn't open file %s\n", openFileName.data()); 756 return; 757 } 758 if (ucbuf.isNull() || U_FAILURE(errorCode)) { 759 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode)); 760 return; 761 } 762 763 /* Parse the data into an SRBRoot */ 764 struct SRBRoot *data = 765 parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode); 766 if (U_FAILURE(errorCode)) { 767 return; 768 } 769 770 struct SResource *root = data->fRoot; 771 struct SResource *collations = resLookup(root, "collations"); 772 if (collations != NULL) { 773 struct SResource *collation = resLookup(collations, collationType); 774 if (collation != NULL) { 775 struct SResource *sequence = resLookup(collation, "Sequence"); 776 if (sequence != NULL && sequence->isString()) { 777 // No string pointer aliasing so that we need not hold onto the resource bundle. 778 StringResource *sr = static_cast<StringResource *>(sequence); 779 rules = sr->fString; 780 } 781 } 782 } 783 } 784 785 // Quick-and-dirty escaping function. 786 // Assumes that we are on an ASCII-based platform. 787 static void 788 escape(const UChar *s, char *buffer) { 789 int32_t length = u_strlen(s); 790 int32_t i = 0; 791 for (;;) { 792 UChar32 c; 793 U16_NEXT(s, i, length, c); 794 if (c == 0) { 795 *buffer = 0; 796 return; 797 } else if (0x20 <= c && c <= 0x7e) { 798 // printable ASCII 799 *buffer++ = (char)c; // assumes ASCII-based platform 800 } else { 801 buffer += sprintf(buffer, "\\u%04X", (int)c); 802 } 803 } 804 } 805 806 } // namespace 807 808 #endif // !UCONFIG_NO_COLLATION 809 810 static TableResource * 811 addCollation(ParseState* state, TableResource *result, const char *collationType, 812 uint32_t startline, UErrorCode *status) 813 { 814 // TODO: Use LocalPointer for result, or make caller close it when there is a failure. 815 struct SResource *member = NULL; 816 struct UString *tokenValue; 817 struct UString comment; 818 enum ETokenType token; 819 char subtag[1024]; 820 UnicodeString rules; 821 UBool haveRules = FALSE; 822 UVersionInfo version; 823 uint32_t line; 824 825 /* '{' . (name resource)* '}' */ 826 version[0]=0; version[1]=0; version[2]=0; version[3]=0; 827 828 for (;;) 829 { 830 ustr_init(&comment); 831 token = getToken(state, &tokenValue, &comment, &line, status); 832 833 if (token == TOK_CLOSE_BRACE) 834 { 835 break; 836 } 837 838 if (token != TOK_STRING) 839 { 840 res_close(result); 841 *status = U_INVALID_FORMAT_ERROR; 842 843 if (token == TOK_EOF) 844 { 845 error(startline, "unterminated table"); 846 } 847 else 848 { 849 error(line, "Unexpected token %s", tokenNames[token]); 850 } 851 852 return NULL; 853 } 854 855 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 856 857 if (U_FAILURE(*status)) 858 { 859 res_close(result); 860 return NULL; 861 } 862 863 member = parseResource(state, subtag, NULL, status); 864 865 if (U_FAILURE(*status)) 866 { 867 res_close(result); 868 return NULL; 869 } 870 if (result == NULL) 871 { 872 // Ignore the parsed resources, continue parsing. 873 } 874 else if (uprv_strcmp(subtag, "Version") == 0 && member->isString()) 875 { 876 StringResource *sr = static_cast<StringResource *>(member); 877 char ver[40]; 878 int32_t length = sr->length(); 879 880 if (length >= UPRV_LENGTHOF(ver)) 881 { 882 length = UPRV_LENGTHOF(ver) - 1; 883 } 884 885 sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV); 886 u_versionFromString(version, ver); 887 888 result->add(member, line, *status); 889 member = NULL; 890 } 891 else if(uprv_strcmp(subtag, "%%CollationBin")==0) 892 { 893 /* discard duplicate %%CollationBin if any*/ 894 } 895 else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString()) 896 { 897 StringResource *sr = static_cast<StringResource *>(member); 898 rules = sr->fString; 899 haveRules = TRUE; 900 // Defer building the collator until we have seen 901 // all sub-elements of the collation table, including the Version. 902 /* in order to achieve smaller data files, we can direct genrb */ 903 /* to omit collation rules */ 904 if(!state->omitCollationRules) { 905 result->add(member, line, *status); 906 member = NULL; 907 } 908 } 909 else // Just copy non-special items. 910 { 911 result->add(member, line, *status); 912 member = NULL; 913 } 914 res_close(member); // TODO: use LocalPointer 915 if (U_FAILURE(*status)) 916 { 917 res_close(result); 918 return NULL; 919 } 920 } 921 922 if (!haveRules) { return result; } 923 924 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO 925 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); 926 (void)collationType; 927 #else 928 // CLDR ticket #3949, ICU ticket #8082: 929 // Do not build collation binary data for for-import-only "private" collation rule strings. 930 if (uprv_strncmp(collationType, "private-", 8) == 0) { 931 if(isVerbose()) { 932 printf("Not building %s~%s collation binary\n", state->filename, collationType); 933 } 934 return result; 935 } 936 937 if(!state->makeBinaryCollation) { 938 if(isVerbose()) { 939 printf("Not building %s~%s collation binary\n", state->filename, collationType); 940 } 941 return result; 942 } 943 UErrorCode intStatus = U_ZERO_ERROR; 944 UParseError parseError; 945 uprv_memset(&parseError, 0, sizeof(parseError)); 946 GenrbImporter importer(state->inputdir, state->outputdir); 947 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus); 948 if(U_FAILURE(intStatus)) { 949 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus)); 950 res_close(result); 951 return NULL; // TODO: use LocalUResourceBundlePointer for result 952 } 953 icu::CollationBuilder builder(base, intStatus); 954 if(uprv_strncmp(collationType, "search", 6) == 0) { 955 builder.disableFastLatin(); // build fast-Latin table unless search collator 956 } 957 LocalPointer<icu::CollationTailoring> t( 958 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus)); 959 if(U_FAILURE(intStatus)) { 960 const char *reason = builder.getErrorReason(); 961 if(reason == NULL) { reason = ""; } 962 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s", 963 state->filename, collationType, 964 (long)parseError.offset, u_errorName(intStatus), reason); 965 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { 966 // Print pre- and post-context. 967 char preBuffer[100], postBuffer[100]; 968 escape(parseError.preContext, preBuffer); 969 escape(parseError.postContext, postBuffer); 970 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer); 971 } 972 if(isStrict() || t.isNull()) { 973 *status = intStatus; 974 res_close(result); 975 return NULL; 976 } 977 } 978 icu::LocalMemory<uint8_t> buffer; 979 int32_t capacity = 100000; 980 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity); 981 if(dest == NULL) { 982 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", 983 (long)capacity); 984 *status = U_MEMORY_ALLOCATION_ERROR; 985 res_close(result); 986 return NULL; 987 } 988 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1]; 989 int32_t totalSize = icu::CollationDataWriter::writeTailoring( 990 *t, *t->settings, indexes, dest, capacity, intStatus); 991 if(intStatus == U_BUFFER_OVERFLOW_ERROR) { 992 intStatus = U_ZERO_ERROR; 993 capacity = totalSize; 994 dest = buffer.allocateInsteadAndCopy(capacity); 995 if(dest == NULL) { 996 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n", 997 (long)capacity); 998 *status = U_MEMORY_ALLOCATION_ERROR; 999 res_close(result); 1000 return NULL; 1001 } 1002 totalSize = icu::CollationDataWriter::writeTailoring( 1003 *t, *t->settings, indexes, dest, capacity, intStatus); 1004 } 1005 if(U_FAILURE(intStatus)) { 1006 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n", 1007 u_errorName(intStatus)); 1008 res_close(result); 1009 return NULL; 1010 } 1011 if(isVerbose()) { 1012 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType); 1013 icu::CollationInfo::printSizes(totalSize, indexes); 1014 if(t->settings->hasReordering()) { 1015 printf("%s~%s collation reordering ranges:\n", state->filename, collationType); 1016 icu::CollationInfo::printReorderRanges( 1017 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength); 1018 } 1019 } 1020 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status); 1021 result->add(collationBin, line, *status); 1022 if (U_FAILURE(*status)) { 1023 res_close(result); 1024 return NULL; 1025 } 1026 #endif 1027 return result; 1028 } 1029 1030 static UBool 1031 keepCollationType(const char *type) { // android-changed 1032 // BEGIN android-added 1033 if (uprv_strcmp(type, "big5han") == 0) { return FALSE; } 1034 if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; } 1035 // END android-added 1036 return TRUE; 1037 } 1038 1039 static struct SResource * 1040 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) 1041 { 1042 TableResource *result = NULL; 1043 struct SResource *member = NULL; 1044 struct UString *tokenValue; 1045 struct UString comment; 1046 enum ETokenType token; 1047 char subtag[1024], typeKeyword[1024]; 1048 uint32_t line; 1049 1050 result = table_open(state->bundle, tag, NULL, status); 1051 1052 if (result == NULL || U_FAILURE(*status)) 1053 { 1054 return NULL; 1055 } 1056 if(isVerbose()){ 1057 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1058 } 1059 if(!newCollation) { 1060 return addCollation(state, result, "(no type)", startline, status); 1061 } 1062 else { 1063 for(;;) { 1064 ustr_init(&comment); 1065 token = getToken(state, &tokenValue, &comment, &line, status); 1066 1067 if (token == TOK_CLOSE_BRACE) 1068 { 1069 return result; 1070 } 1071 1072 if (token != TOK_STRING) 1073 { 1074 res_close(result); 1075 *status = U_INVALID_FORMAT_ERROR; 1076 1077 if (token == TOK_EOF) 1078 { 1079 error(startline, "unterminated table"); 1080 } 1081 else 1082 { 1083 error(line, "Unexpected token %s", tokenNames[token]); 1084 } 1085 1086 return NULL; 1087 } 1088 1089 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1090 1091 if (U_FAILURE(*status)) 1092 { 1093 res_close(result); 1094 return NULL; 1095 } 1096 1097 if (uprv_strcmp(subtag, "default") == 0) 1098 { 1099 member = parseResource(state, subtag, NULL, status); 1100 1101 if (U_FAILURE(*status)) 1102 { 1103 res_close(result); 1104 return NULL; 1105 } 1106 1107 result->add(member, line, *status); 1108 } 1109 else 1110 { 1111 token = peekToken(state, 0, &tokenValue, &line, &comment, status); 1112 /* this probably needs to be refactored or recursively use the parser */ 1113 /* first we assume that our collation table won't have the explicit type */ 1114 /* then, we cannot handle aliases */ 1115 if(token == TOK_OPEN_BRACE) { 1116 token = getToken(state, &tokenValue, &comment, &line, status); 1117 TableResource *collationRes; 1118 if (keepCollationType(subtag)) { 1119 collationRes = table_open(state->bundle, subtag, NULL, status); 1120 } else { 1121 collationRes = NULL; 1122 } 1123 // need to parse the collation data regardless 1124 collationRes = addCollation(state, collationRes, subtag, startline, status); 1125 if (collationRes != NULL) { 1126 result->add(collationRes, startline, *status); 1127 } 1128 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ 1129 /* we could have a table too */ 1130 token = peekToken(state, 1, &tokenValue, &line, &comment, status); 1131 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); 1132 if(uprv_strcmp(typeKeyword, "alias") == 0) { 1133 member = parseResource(state, subtag, NULL, status); 1134 if (U_FAILURE(*status)) 1135 { 1136 res_close(result); 1137 return NULL; 1138 } 1139 1140 result->add(member, line, *status); 1141 } else { 1142 res_close(result); 1143 *status = U_INVALID_FORMAT_ERROR; 1144 return NULL; 1145 } 1146 } else { 1147 res_close(result); 1148 *status = U_INVALID_FORMAT_ERROR; 1149 return NULL; 1150 } 1151 } 1152 1153 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 1154 1155 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 1156 1157 if (U_FAILURE(*status)) 1158 { 1159 res_close(result); 1160 return NULL; 1161 } 1162 } 1163 } 1164 } 1165 1166 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, 1167 if this weren't special-cased, wouldn't be set until the entire file had been processed. */ 1168 static struct SResource * 1169 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status) 1170 { 1171 struct SResource *member = NULL; 1172 struct UString *tokenValue=NULL; 1173 struct UString comment; 1174 enum ETokenType token; 1175 char subtag[1024]; 1176 uint32_t line; 1177 UBool readToken = FALSE; 1178 1179 /* '{' . (name resource)* '}' */ 1180 1181 if(isVerbose()){ 1182 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1183 } 1184 for (;;) 1185 { 1186 ustr_init(&comment); 1187 token = getToken(state, &tokenValue, &comment, &line, status); 1188 1189 if (token == TOK_CLOSE_BRACE) 1190 { 1191 if (!readToken) { 1192 warning(startline, "Encountered empty table"); 1193 } 1194 return table; 1195 } 1196 1197 if (token != TOK_STRING) 1198 { 1199 *status = U_INVALID_FORMAT_ERROR; 1200 1201 if (token == TOK_EOF) 1202 { 1203 error(startline, "unterminated table"); 1204 } 1205 else 1206 { 1207 error(line, "unexpected token %s", tokenNames[token]); 1208 } 1209 1210 return NULL; 1211 } 1212 1213 if(uprv_isInvariantUString(tokenValue->fChars, -1)) { 1214 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 1215 } else { 1216 *status = U_INVALID_FORMAT_ERROR; 1217 error(line, "invariant characters required for table keys"); 1218 return NULL; 1219 } 1220 1221 if (U_FAILURE(*status)) 1222 { 1223 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); 1224 return NULL; 1225 } 1226 1227 member = parseResource(state, subtag, &comment, status); 1228 1229 if (member == NULL || U_FAILURE(*status)) 1230 { 1231 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); 1232 return NULL; 1233 } 1234 1235 table->add(member, line, *status); 1236 1237 if (U_FAILURE(*status)) 1238 { 1239 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); 1240 return NULL; 1241 } 1242 readToken = TRUE; 1243 ustr_deinit(&comment); 1244 } 1245 1246 /* not reached */ 1247 /* A compiler warning will appear if all paths don't contain a return statement. */ 1248 /* *status = U_INTERNAL_PROGRAM_ERROR; 1249 return NULL;*/ 1250 } 1251 1252 static struct SResource * 1253 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1254 { 1255 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) 1256 { 1257 return parseCollationElements(state, tag, startline, FALSE, status); 1258 } 1259 if (tag != NULL && uprv_strcmp(tag, "collations") == 0) 1260 { 1261 return parseCollationElements(state, tag, startline, TRUE, status); 1262 } 1263 if(isVerbose()){ 1264 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1265 } 1266 1267 TableResource *result = table_open(state->bundle, tag, comment, status); 1268 1269 if (result == NULL || U_FAILURE(*status)) 1270 { 1271 return NULL; 1272 } 1273 return realParseTable(state, result, tag, startline, status); 1274 } 1275 1276 static struct SResource * 1277 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1278 { 1279 struct SResource *member = NULL; 1280 struct UString *tokenValue; 1281 struct UString memberComments; 1282 enum ETokenType token; 1283 UBool readToken = FALSE; 1284 1285 ArrayResource *result = array_open(state->bundle, tag, comment, status); 1286 1287 if (result == NULL || U_FAILURE(*status)) 1288 { 1289 return NULL; 1290 } 1291 if(isVerbose()){ 1292 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1293 } 1294 1295 ustr_init(&memberComments); 1296 1297 /* '{' . resource [','] '}' */ 1298 for (;;) 1299 { 1300 /* reset length */ 1301 ustr_setlen(&memberComments, 0, status); 1302 1303 /* check for end of array, but don't consume next token unless it really is the end */ 1304 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status); 1305 1306 1307 if (token == TOK_CLOSE_BRACE) 1308 { 1309 getToken(state, NULL, NULL, NULL, status); 1310 if (!readToken) { 1311 warning(startline, "Encountered empty array"); 1312 } 1313 break; 1314 } 1315 1316 if (token == TOK_EOF) 1317 { 1318 res_close(result); 1319 *status = U_INVALID_FORMAT_ERROR; 1320 error(startline, "unterminated array"); 1321 return NULL; 1322 } 1323 1324 /* string arrays are a special case */ 1325 if (token == TOK_STRING) 1326 { 1327 getToken(state, &tokenValue, &memberComments, NULL, status); 1328 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); 1329 } 1330 else 1331 { 1332 member = parseResource(state, NULL, &memberComments, status); 1333 } 1334 1335 if (member == NULL || U_FAILURE(*status)) 1336 { 1337 res_close(result); 1338 return NULL; 1339 } 1340 1341 result->add(member); 1342 1343 /* eat optional comma if present */ 1344 token = peekToken(state, 0, NULL, NULL, NULL, status); 1345 1346 if (token == TOK_COMMA) 1347 { 1348 getToken(state, NULL, NULL, NULL, status); 1349 } 1350 1351 if (U_FAILURE(*status)) 1352 { 1353 res_close(result); 1354 return NULL; 1355 } 1356 readToken = TRUE; 1357 } 1358 1359 ustr_deinit(&memberComments); 1360 return result; 1361 } 1362 1363 static struct SResource * 1364 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1365 { 1366 enum ETokenType token; 1367 char *string; 1368 int32_t value; 1369 UBool readToken = FALSE; 1370 char *stopstring; 1371 uint32_t len; 1372 struct UString memberComments; 1373 1374 IntVectorResource *result = intvector_open(state->bundle, tag, comment, status); 1375 1376 if (result == NULL || U_FAILURE(*status)) 1377 { 1378 return NULL; 1379 } 1380 1381 if(isVerbose()){ 1382 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1383 } 1384 ustr_init(&memberComments); 1385 /* '{' . string [','] '}' */ 1386 for (;;) 1387 { 1388 ustr_setlen(&memberComments, 0, status); 1389 1390 /* check for end of array, but don't consume next token unless it really is the end */ 1391 token = peekToken(state, 0, NULL, NULL,&memberComments, status); 1392 1393 if (token == TOK_CLOSE_BRACE) 1394 { 1395 /* it's the end, consume the close brace */ 1396 getToken(state, NULL, NULL, NULL, status); 1397 if (!readToken) { 1398 warning(startline, "Encountered empty int vector"); 1399 } 1400 ustr_deinit(&memberComments); 1401 return result; 1402 } 1403 1404 string = getInvariantString(state, NULL, NULL, status); 1405 1406 if (U_FAILURE(*status)) 1407 { 1408 res_close(result); 1409 return NULL; 1410 } 1411 1412 /* For handling illegal char in the Intvector */ 1413 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ 1414 len=(uint32_t)(stopstring-string); 1415 1416 if(len==uprv_strlen(string)) 1417 { 1418 result->add(value, *status); 1419 uprv_free(string); 1420 token = peekToken(state, 0, NULL, NULL, NULL, status); 1421 } 1422 else 1423 { 1424 uprv_free(string); 1425 *status=U_INVALID_CHAR_FOUND; 1426 } 1427 1428 if (U_FAILURE(*status)) 1429 { 1430 res_close(result); 1431 return NULL; 1432 } 1433 1434 /* the comma is optional (even though it is required to prevent the reader from concatenating 1435 consecutive entries) so that a missing comma on the last entry isn't an error */ 1436 if (token == TOK_COMMA) 1437 { 1438 getToken(state, NULL, NULL, NULL, status); 1439 } 1440 readToken = TRUE; 1441 } 1442 1443 /* not reached */ 1444 /* A compiler warning will appear if all paths don't contain a return statement. */ 1445 /* intvector_close(result, status); 1446 *status = U_INTERNAL_PROGRAM_ERROR; 1447 return NULL;*/ 1448 } 1449 1450 static struct SResource * 1451 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1452 { 1453 uint32_t line; 1454 LocalMemory<char> string(getInvariantString(state, &line, NULL, status)); 1455 if (string.isNull() || U_FAILURE(*status)) 1456 { 1457 return NULL; 1458 } 1459 1460 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1461 if (U_FAILURE(*status)) 1462 { 1463 return NULL; 1464 } 1465 1466 if(isVerbose()){ 1467 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1468 } 1469 1470 uint32_t count = (uint32_t)uprv_strlen(string.getAlias()); 1471 if (count > 0){ 1472 if((count % 2)==0){ 1473 LocalMemory<uint8_t> value; 1474 if (value.allocateInsteadAndCopy(count) == NULL) 1475 { 1476 *status = U_MEMORY_ALLOCATION_ERROR; 1477 return NULL; 1478 } 1479 1480 char toConv[3] = {'\0', '\0', '\0'}; 1481 for (uint32_t i = 0; i < count; i += 2) 1482 { 1483 toConv[0] = string[i]; 1484 toConv[1] = string[i + 1]; 1485 1486 char *stopstring; 1487 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); 1488 uint32_t len=(uint32_t)(stopstring-toConv); 1489 1490 if(len!=2) 1491 { 1492 *status=U_INVALID_CHAR_FOUND; 1493 return NULL; 1494 } 1495 } 1496 1497 return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status); 1498 } 1499 else 1500 { 1501 *status = U_INVALID_CHAR_FOUND; 1502 error(line, "Encountered invalid binary value (length is odd)"); 1503 return NULL; 1504 } 1505 } 1506 else 1507 { 1508 warning(startline, "Encountered empty binary value"); 1509 return bin_open(state->bundle, tag, 0, NULL, "", comment, status); 1510 } 1511 } 1512 1513 static struct SResource * 1514 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1515 { 1516 struct SResource *result = NULL; 1517 int32_t value; 1518 char *string; 1519 char *stopstring; 1520 uint32_t len; 1521 1522 string = getInvariantString(state, NULL, NULL, status); 1523 1524 if (string == NULL || U_FAILURE(*status)) 1525 { 1526 return NULL; 1527 } 1528 1529 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1530 1531 if (U_FAILURE(*status)) 1532 { 1533 uprv_free(string); 1534 return NULL; 1535 } 1536 1537 if(isVerbose()){ 1538 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1539 } 1540 1541 if (uprv_strlen(string) <= 0) 1542 { 1543 warning(startline, "Encountered empty integer. Default value is 0."); 1544 } 1545 1546 /* Allow integer support for hexdecimal, octal digit and decimal*/ 1547 /* and handle illegal char in the integer*/ 1548 value = uprv_strtoul(string, &stopstring, 0); 1549 len=(uint32_t)(stopstring-string); 1550 if(len==uprv_strlen(string)) 1551 { 1552 result = int_open(state->bundle, tag, value, comment, status); 1553 } 1554 else 1555 { 1556 *status=U_INVALID_CHAR_FOUND; 1557 } 1558 uprv_free(string); 1559 1560 return result; 1561 } 1562 1563 static struct SResource * 1564 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1565 { 1566 uint32_t line; 1567 LocalMemory<char> filename(getInvariantString(state, &line, NULL, status)); 1568 if (U_FAILURE(*status)) 1569 { 1570 return NULL; 1571 } 1572 1573 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1574 1575 if (U_FAILURE(*status)) 1576 { 1577 return NULL; 1578 } 1579 1580 if(isVerbose()){ 1581 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1582 } 1583 1584 /* Open the input file for reading */ 1585 CharString fullname; 1586 if (state->inputdir != NULL) { 1587 fullname.append(state->inputdir, *status); 1588 } 1589 fullname.appendPathPart(filename.getAlias(), *status); 1590 if (U_FAILURE(*status)) { 1591 return NULL; 1592 } 1593 1594 FileStream *file = T_FileStream_open(fullname.data(), "rb"); 1595 if (file == NULL) 1596 { 1597 error(line, "couldn't open input file %s", filename.getAlias()); 1598 *status = U_FILE_ACCESS_ERROR; 1599 return NULL; 1600 } 1601 1602 int32_t len = T_FileStream_size(file); 1603 LocalMemory<uint8_t> data; 1604 if(data.allocateInsteadAndCopy(len) == NULL) 1605 { 1606 *status = U_MEMORY_ALLOCATION_ERROR; 1607 T_FileStream_close (file); 1608 return NULL; 1609 } 1610 1611 /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len); 1612 T_FileStream_close (file); 1613 1614 return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status); 1615 } 1616 1617 static struct SResource * 1618 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1619 { 1620 struct SResource *result; 1621 int32_t len=0; 1622 char *filename; 1623 uint32_t line; 1624 UChar *pTarget = NULL; 1625 1626 UCHARBUF *ucbuf; 1627 char *fullname = NULL; 1628 int32_t count = 0; 1629 const char* cp = NULL; 1630 const UChar* uBuffer = NULL; 1631 1632 filename = getInvariantString(state, &line, NULL, status); 1633 count = (int32_t)uprv_strlen(filename); 1634 1635 if (U_FAILURE(*status)) 1636 { 1637 return NULL; 1638 } 1639 1640 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1641 1642 if (U_FAILURE(*status)) 1643 { 1644 uprv_free(filename); 1645 return NULL; 1646 } 1647 1648 if(isVerbose()){ 1649 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1650 } 1651 1652 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); 1653 /* test for NULL */ 1654 if(fullname == NULL) 1655 { 1656 *status = U_MEMORY_ALLOCATION_ERROR; 1657 uprv_free(filename); 1658 return NULL; 1659 } 1660 1661 if(state->inputdir!=NULL){ 1662 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) 1663 { 1664 1665 uprv_strcpy(fullname, state->inputdir); 1666 1667 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; 1668 fullname[state->inputdirLength + 1] = '\0'; 1669 1670 uprv_strcat(fullname, filename); 1671 } 1672 else 1673 { 1674 uprv_strcpy(fullname, state->inputdir); 1675 uprv_strcat(fullname, filename); 1676 } 1677 }else{ 1678 uprv_strcpy(fullname,filename); 1679 } 1680 1681 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); 1682 1683 if (U_FAILURE(*status)) { 1684 error(line, "couldn't open input file %s\n", filename); 1685 return NULL; 1686 } 1687 1688 uBuffer = ucbuf_getBuffer(ucbuf,&len,status); 1689 result = string_open(state->bundle, tag, uBuffer, len, comment, status); 1690 1691 ucbuf_close(ucbuf); 1692 1693 uprv_free(pTarget); 1694 1695 uprv_free(filename); 1696 uprv_free(fullname); 1697 1698 return result; 1699 } 1700 1701 1702 1703 1704 1705 U_STRING_DECL(k_type_string, "string", 6); 1706 U_STRING_DECL(k_type_binary, "binary", 6); 1707 U_STRING_DECL(k_type_bin, "bin", 3); 1708 U_STRING_DECL(k_type_table, "table", 5); 1709 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); 1710 U_STRING_DECL(k_type_int, "int", 3); 1711 U_STRING_DECL(k_type_integer, "integer", 7); 1712 U_STRING_DECL(k_type_array, "array", 5); 1713 U_STRING_DECL(k_type_alias, "alias", 5); 1714 U_STRING_DECL(k_type_intvector, "intvector", 9); 1715 U_STRING_DECL(k_type_import, "import", 6); 1716 U_STRING_DECL(k_type_include, "include", 7); 1717 1718 /* Various non-standard processing plugins that create one or more special resources. */ 1719 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1720 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); 1721 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); 1722 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); 1723 1724 typedef enum EResourceType 1725 { 1726 RESTYPE_UNKNOWN, 1727 RESTYPE_STRING, 1728 RESTYPE_BINARY, 1729 RESTYPE_TABLE, 1730 RESTYPE_TABLE_NO_FALLBACK, 1731 RESTYPE_INTEGER, 1732 RESTYPE_ARRAY, 1733 RESTYPE_ALIAS, 1734 RESTYPE_INTVECTOR, 1735 RESTYPE_IMPORT, 1736 RESTYPE_INCLUDE, 1737 RESTYPE_PROCESS_UCA_RULES, 1738 RESTYPE_PROCESS_COLLATION, 1739 RESTYPE_PROCESS_TRANSLITERATOR, 1740 RESTYPE_PROCESS_DEPENDENCY, 1741 RESTYPE_RESERVED 1742 } EResourceType; 1743 1744 static struct { 1745 const char *nameChars; /* only used for debugging */ 1746 const UChar *nameUChars; 1747 ParseResourceFunction *parseFunction; 1748 } gResourceTypes[] = { 1749 {"Unknown", NULL, NULL}, 1750 {"string", k_type_string, parseString}, 1751 {"binary", k_type_binary, parseBinary}, 1752 {"table", k_type_table, parseTable}, 1753 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ 1754 {"integer", k_type_integer, parseInteger}, 1755 {"array", k_type_array, parseArray}, 1756 {"alias", k_type_alias, parseAlias}, 1757 {"intvector", k_type_intvector, parseIntVector}, 1758 {"import", k_type_import, parseImport}, 1759 {"include", k_type_include, parseInclude}, 1760 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, 1761 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, 1762 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, 1763 {"process(dependency)", k_type_plugin_dependency, parseDependency}, 1764 {"reserved", NULL, NULL} 1765 }; 1766 1767 void initParser() 1768 { 1769 U_STRING_INIT(k_type_string, "string", 6); 1770 U_STRING_INIT(k_type_binary, "binary", 6); 1771 U_STRING_INIT(k_type_bin, "bin", 3); 1772 U_STRING_INIT(k_type_table, "table", 5); 1773 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); 1774 U_STRING_INIT(k_type_int, "int", 3); 1775 U_STRING_INIT(k_type_integer, "integer", 7); 1776 U_STRING_INIT(k_type_array, "array", 5); 1777 U_STRING_INIT(k_type_alias, "alias", 5); 1778 U_STRING_INIT(k_type_intvector, "intvector", 9); 1779 U_STRING_INIT(k_type_import, "import", 6); 1780 U_STRING_INIT(k_type_include, "include", 7); 1781 1782 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1783 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); 1784 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); 1785 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); 1786 } 1787 1788 static inline UBool isTable(enum EResourceType type) { 1789 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK); 1790 } 1791 1792 static enum EResourceType 1793 parseResourceType(ParseState* state, UErrorCode *status) 1794 { 1795 struct UString *tokenValue; 1796 struct UString comment; 1797 enum EResourceType result = RESTYPE_UNKNOWN; 1798 uint32_t line=0; 1799 ustr_init(&comment); 1800 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); 1801 1802 if (U_FAILURE(*status)) 1803 { 1804 return RESTYPE_UNKNOWN; 1805 } 1806 1807 *status = U_ZERO_ERROR; 1808 1809 /* Search for normal types */ 1810 result=RESTYPE_UNKNOWN; 1811 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) { 1812 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { 1813 break; 1814 } 1815 } 1816 /* Now search for the aliases */ 1817 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { 1818 result = RESTYPE_INTEGER; 1819 } 1820 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { 1821 result = RESTYPE_BINARY; 1822 } 1823 else if (result == RESTYPE_RESERVED) { 1824 char tokenBuffer[1024]; 1825 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); 1826 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; 1827 *status = U_INVALID_FORMAT_ERROR; 1828 error(line, "unknown resource type '%s'", tokenBuffer); 1829 } 1830 1831 return result; 1832 } 1833 1834 /* parse a non-top-level resource */ 1835 static struct SResource * 1836 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) 1837 { 1838 enum ETokenType token; 1839 enum EResourceType resType = RESTYPE_UNKNOWN; 1840 ParseResourceFunction *parseFunction = NULL; 1841 struct UString *tokenValue; 1842 uint32_t startline; 1843 uint32_t line; 1844 1845 1846 token = getToken(state, &tokenValue, NULL, &startline, status); 1847 1848 if(isVerbose()){ 1849 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1850 } 1851 1852 /* name . [ ':' type ] '{' resource '}' */ 1853 /* This function parses from the colon onwards. If the colon is present, parse the 1854 type then try to parse a resource of that type. If there is no explicit type, 1855 work it out using the lookahead tokens. */ 1856 switch (token) 1857 { 1858 case TOK_EOF: 1859 *status = U_INVALID_FORMAT_ERROR; 1860 error(startline, "Unexpected EOF encountered"); 1861 return NULL; 1862 1863 case TOK_ERROR: 1864 *status = U_INVALID_FORMAT_ERROR; 1865 return NULL; 1866 1867 case TOK_COLON: 1868 resType = parseResourceType(state, status); 1869 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); 1870 1871 if (U_FAILURE(*status)) 1872 { 1873 return NULL; 1874 } 1875 1876 break; 1877 1878 case TOK_OPEN_BRACE: 1879 break; 1880 1881 default: 1882 *status = U_INVALID_FORMAT_ERROR; 1883 error(startline, "syntax error while reading a resource, expected '{' or ':'"); 1884 return NULL; 1885 } 1886 1887 1888 if (resType == RESTYPE_UNKNOWN) 1889 { 1890 /* No explicit type, so try to work it out. At this point, we've read the first '{'. 1891 We could have any of the following: 1892 { { => array (nested) 1893 { :/} => array 1894 { string , => string array 1895 1896 { string { => table 1897 1898 { string :/{ => table 1899 { string } => string 1900 */ 1901 1902 token = peekToken(state, 0, NULL, &line, NULL,status); 1903 1904 if (U_FAILURE(*status)) 1905 { 1906 return NULL; 1907 } 1908 1909 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) 1910 { 1911 resType = RESTYPE_ARRAY; 1912 } 1913 else if (token == TOK_STRING) 1914 { 1915 token = peekToken(state, 1, NULL, &line, NULL, status); 1916 1917 if (U_FAILURE(*status)) 1918 { 1919 return NULL; 1920 } 1921 1922 switch (token) 1923 { 1924 case TOK_COMMA: resType = RESTYPE_ARRAY; break; 1925 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break; 1926 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break; 1927 case TOK_COLON: resType = RESTYPE_TABLE; break; 1928 default: 1929 *status = U_INVALID_FORMAT_ERROR; 1930 error(line, "Unexpected token after string, expected ',', '{' or '}'"); 1931 return NULL; 1932 } 1933 } 1934 else 1935 { 1936 *status = U_INVALID_FORMAT_ERROR; 1937 error(line, "Unexpected token after '{'"); 1938 return NULL; 1939 } 1940 1941 /* printf("Type guessed as %s\n", resourceNames[resType]); */ 1942 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) { 1943 *status = U_INVALID_FORMAT_ERROR; 1944 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); 1945 return NULL; 1946 } 1947 1948 1949 /* We should now know what we need to parse next, so call the appropriate parser 1950 function and return. */ 1951 parseFunction = gResourceTypes[resType].parseFunction; 1952 if (parseFunction != NULL) { 1953 return parseFunction(state, tag, startline, comment, status); 1954 } 1955 else { 1956 *status = U_INTERNAL_PROGRAM_ERROR; 1957 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); 1958 } 1959 1960 return NULL; 1961 } 1962 1963 /* parse the top-level resource */ 1964 struct SRBRoot * 1965 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename, 1966 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status) 1967 { 1968 struct UString *tokenValue; 1969 struct UString comment; 1970 uint32_t line; 1971 enum EResourceType bundleType; 1972 enum ETokenType token; 1973 ParseState state; 1974 uint32_t i; 1975 1976 1977 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) 1978 { 1979 ustr_init(&state.lookahead[i].value); 1980 ustr_init(&state.lookahead[i].comment); 1981 } 1982 1983 initLookahead(&state, buf, status); 1984 1985 state.inputdir = inputDir; 1986 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; 1987 state.outputdir = outputDir; 1988 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; 1989 state.filename = filename; 1990 state.makeBinaryCollation = makeBinaryCollation; 1991 state.omitCollationRules = omitCollationRules; 1992 1993 ustr_init(&comment); 1994 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); 1995 1996 state.bundle = new SRBRoot(&comment, FALSE, *status); 1997 1998 if (state.bundle == NULL || U_FAILURE(*status)) 1999 { 2000 return NULL; 2001 } 2002 2003 2004 state.bundle->setLocale(tokenValue->fChars, *status); 2005 2006 /* The following code is to make Empty bundle work no matter with :table specifer or not */ 2007 token = getToken(&state, NULL, NULL, &line, status); 2008 if(token==TOK_COLON) { 2009 *status=U_ZERO_ERROR; 2010 bundleType=parseResourceType(&state, status); 2011 2012 if(isTable(bundleType)) 2013 { 2014 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status); 2015 } 2016 else 2017 { 2018 *status=U_PARSE_ERROR; 2019 error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); 2020 } 2021 } 2022 else 2023 { 2024 /* not a colon */ 2025 if(token==TOK_OPEN_BRACE) 2026 { 2027 *status=U_ZERO_ERROR; 2028 bundleType=RESTYPE_TABLE; 2029 } 2030 else 2031 { 2032 /* neither colon nor open brace */ 2033 *status=U_PARSE_ERROR; 2034 bundleType=RESTYPE_UNKNOWN; 2035 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); 2036 } 2037 } 2038 2039 if (U_FAILURE(*status)) 2040 { 2041 delete state.bundle; 2042 return NULL; 2043 } 2044 2045 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) { 2046 /* 2047 * Parse a top-level table with the table(nofallback) declaration. 2048 * This is the same as a regular table, but also sets the 2049 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 2050 */ 2051 state.bundle->fNoFallback=TRUE; 2052 } 2053 /* top-level tables need not handle special table names like "collations" */ 2054 assert(!state.bundle->fIsPoolBundle); 2055 assert(state.bundle->fRoot->fType == URES_TABLE); 2056 TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot); 2057 realParseTable(&state, rootTable, NULL, line, status); 2058 if(dependencyArray!=NULL){ 2059 rootTable->add(dependencyArray, 0, *status); 2060 dependencyArray = NULL; 2061 } 2062 if (U_FAILURE(*status)) 2063 { 2064 delete state.bundle; 2065 res_close(dependencyArray); 2066 return NULL; 2067 } 2068 2069 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF) 2070 { 2071 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); 2072 if(isStrict()){ 2073 *status = U_INVALID_FORMAT_ERROR; 2074 return NULL; 2075 } 2076 } 2077 2078 cleanupLookahead(&state); 2079 ustr_deinit(&comment); 2080 return state.bundle; 2081 } 2082