1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File PLURRULE.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 ******************************************************************************* 13 */ 14 15 16 #include "unicode/uniset.h" 17 #include "unicode/utypes.h" 18 #include "unicode/ures.h" 19 #include "unicode/plurrule.h" 20 #include "cmemory.h" 21 #include "cstring.h" 22 #include "hash.h" 23 #include "mutex.h" 24 #include "plurrule_impl.h" 25 #include "putilimp.h" 26 #include "ucln_in.h" 27 #include "ustrfmt.h" 28 #include "locutil.h" 29 30 /* 31 // TODO(claireho): remove stdio 32 #include "stdio.h" 33 */ 34 35 #if !UCONFIG_NO_FORMATTING 36 37 U_NAMESPACE_BEGIN 38 39 40 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) 41 42 static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0}; 43 static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0}; 44 static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0}; 45 static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0}; 46 static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0}; 47 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; 48 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; 49 static const UChar PK_IN[]={LOW_I,LOW_N,0}; 50 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; 51 static const UChar PK_IS[]={LOW_I,LOW_S,0}; 52 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; 53 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; 54 static const UChar PK_OR[]={LOW_O,LOW_R,0}; 55 static const UChar PK_VAR_N[]={LOW_N,0}; 56 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; 57 58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) 59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) 60 61 PluralRules::PluralRules(UErrorCode& status) 62 : UObject(), 63 mRules(NULL) 64 { 65 if (U_FAILURE(status)) { 66 return; 67 } 68 mParser = new RuleParser(); 69 if (mParser==NULL) { 70 status = U_MEMORY_ALLOCATION_ERROR; 71 } 72 } 73 74 PluralRules::PluralRules(const PluralRules& other) 75 : UObject(other), 76 mRules(NULL), 77 mParser(new RuleParser()) 78 { 79 *this=other; 80 } 81 82 PluralRules::~PluralRules() { 83 delete mRules; 84 delete mParser; 85 } 86 87 PluralRules* 88 PluralRules::clone() const { 89 return new PluralRules(*this); 90 } 91 92 PluralRules& 93 PluralRules::operator=(const PluralRules& other) { 94 if (this != &other) { 95 delete mRules; 96 if (other.mRules==NULL) { 97 mRules = NULL; 98 } 99 else { 100 mRules = new RuleChain(*other.mRules); 101 } 102 delete mParser; 103 mParser = new RuleParser(); 104 } 105 106 return *this; 107 } 108 109 PluralRules* U_EXPORT2 110 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { 111 RuleChain rules; 112 113 if (U_FAILURE(status)) { 114 return NULL; 115 } 116 PluralRules *newRules = new PluralRules(status); 117 if ( (newRules != NULL)&& U_SUCCESS(status) ) { 118 newRules->parseDescription((UnicodeString &)description, rules, status); 119 if (U_SUCCESS(status)) { 120 newRules->addRules(rules); 121 } 122 } 123 if (U_FAILURE(status)) { 124 delete newRules; 125 return NULL; 126 } 127 else { 128 return newRules; 129 } 130 } 131 132 PluralRules* U_EXPORT2 133 PluralRules::createDefaultRules(UErrorCode& status) { 134 return createRules(PLURAL_DEFAULT_RULE, status); 135 } 136 137 PluralRules* U_EXPORT2 138 PluralRules::forLocale(const Locale& locale, UErrorCode& status) { 139 RuleChain rChain; 140 if (U_FAILURE(status)) { 141 return NULL; 142 } 143 PluralRules *newObj = new PluralRules(status); 144 if (newObj==NULL || U_FAILURE(status)) { 145 return NULL; 146 } 147 UnicodeString locRule = newObj->getRuleFromResource(locale, status); 148 if ((locRule.length() != 0) && U_SUCCESS(status)) { 149 newObj->parseDescription(locRule, rChain, status); 150 if (U_SUCCESS(status)) { 151 newObj->addRules(rChain); 152 } 153 } 154 if (U_FAILURE(status)||(locRule.length() == 0)) { 155 // use default plural rule 156 status = U_ZERO_ERROR; 157 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE); 158 newObj->parseDescription(defRule, rChain, status); 159 newObj->addRules(rChain); 160 } 161 162 return newObj; 163 } 164 165 UnicodeString 166 PluralRules::select(int32_t number) const { 167 if (mRules == NULL) { 168 return PLURAL_DEFAULT_RULE; 169 } 170 else { 171 return mRules->select(number); 172 } 173 } 174 175 UnicodeString 176 PluralRules::select(double number) const { 177 if (mRules == NULL) { 178 return PLURAL_DEFAULT_RULE; 179 } 180 else { 181 return mRules->select(number); 182 } 183 } 184 185 StringEnumeration* 186 PluralRules::getKeywords(UErrorCode& status) const { 187 if (U_FAILURE(status)) return NULL; 188 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); 189 if (U_FAILURE(status)) return NULL; 190 191 return nameEnumerator; 192 } 193 194 195 UBool 196 PluralRules::isKeyword(const UnicodeString& keyword) const { 197 if ( keyword == PLURAL_KEYWORD_OTHER ) { 198 return true; 199 } 200 else { 201 if (mRules==NULL) { 202 return false; 203 } 204 else { 205 return mRules->isKeyword(keyword); 206 } 207 } 208 } 209 210 UnicodeString 211 PluralRules::getKeywordOther() const { 212 return PLURAL_KEYWORD_OTHER; 213 } 214 215 UBool 216 PluralRules::operator==(const PluralRules& other) const { 217 int32_t limit; 218 UBool sameList = TRUE; 219 const UnicodeString *ptrKeyword; 220 UErrorCode status= U_ZERO_ERROR; 221 222 if ( this == &other ) { 223 return TRUE; 224 } 225 StringEnumeration* myKeywordList = getKeywords(status); 226 if (U_FAILURE(status)) { 227 return FALSE; 228 } 229 StringEnumeration* otherKeywordList =other.getKeywords(status); 230 if (U_FAILURE(status)) { 231 return FALSE; 232 } 233 234 if (myKeywordList->count(status)!=otherKeywordList->count(status) || 235 U_FAILURE(status)) { 236 sameList = FALSE; 237 } 238 else { 239 myKeywordList->reset(status); 240 if (U_FAILURE(status)) { 241 return FALSE; 242 } 243 while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) { 244 if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) { 245 sameList = FALSE; 246 } 247 } 248 otherKeywordList->reset(status); 249 if (U_FAILURE(status)) { 250 return FALSE; 251 } 252 while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) { 253 if (U_FAILURE(status)) { 254 return FALSE; 255 } 256 if (!this->isKeyword(*ptrKeyword)) { 257 sameList = FALSE; 258 } 259 } 260 delete myKeywordList; 261 delete otherKeywordList; 262 if (!sameList) { 263 return FALSE; 264 } 265 } 266 267 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) { 268 return FALSE; 269 } 270 UnicodeString myKeyword, otherKeyword; 271 for (int32_t i=0; i<limit; ++i) { 272 myKeyword = this->select(i); 273 otherKeyword = other.select(i); 274 if (myKeyword!=otherKeyword) { 275 return FALSE; 276 } 277 } 278 return TRUE; 279 } 280 281 void 282 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status) 283 { 284 int32_t ruleIndex=0; 285 UnicodeString token; 286 tokenType type; 287 tokenType prevType=none; 288 RuleChain *ruleChain=NULL; 289 AndConstraint *curAndConstraint=NULL; 290 OrConstraint *orNode=NULL; 291 RuleChain *lastChain=NULL; 292 293 if (U_FAILURE(status)) { 294 return; 295 } 296 UnicodeString ruleData = data.toLower(); 297 while (ruleIndex< ruleData.length()) { 298 mParser->getNextToken(ruleData, &ruleIndex, token, type, status); 299 if (U_FAILURE(status)) { 300 return; 301 } 302 mParser->checkSyntax(prevType, type, status); 303 if (U_FAILURE(status)) { 304 return; 305 } 306 switch (type) { 307 case tAnd: 308 curAndConstraint = curAndConstraint->add(); 309 break; 310 case tOr: 311 lastChain = &rules; 312 while (lastChain->next !=NULL) { 313 lastChain = lastChain->next; 314 } 315 orNode=lastChain->ruleHeader; 316 while (orNode->next != NULL) { 317 orNode = orNode->next; 318 } 319 orNode->next= new OrConstraint(); 320 orNode=orNode->next; 321 orNode->next=NULL; 322 curAndConstraint = orNode->add(); 323 break; 324 case tIs: 325 curAndConstraint->rangeHigh=-1; 326 break; 327 case tNot: 328 curAndConstraint->notIn=TRUE; 329 break; 330 case tIn: 331 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; 332 curAndConstraint->integerOnly = TRUE; 333 break; 334 case tWithin: 335 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; 336 break; 337 case tNumber: 338 if ( (curAndConstraint->op==AndConstraint::MOD)&& 339 (curAndConstraint->opNum == -1 ) ) { 340 curAndConstraint->opNum=getNumberValue(token); 341 } 342 else { 343 if (curAndConstraint->rangeLow == -1) { 344 curAndConstraint->rangeLow=getNumberValue(token); 345 } 346 else { 347 curAndConstraint->rangeHigh=getNumberValue(token); 348 } 349 } 350 break; 351 case tMod: 352 curAndConstraint->op=AndConstraint::MOD; 353 break; 354 case tKeyword: 355 if (ruleChain==NULL) { 356 ruleChain = &rules; 357 } 358 else { 359 while (ruleChain->next!=NULL){ 360 ruleChain=ruleChain->next; 361 } 362 ruleChain=ruleChain->next=new RuleChain(); 363 } 364 orNode = ruleChain->ruleHeader = new OrConstraint(); 365 curAndConstraint = orNode->add(); 366 ruleChain->keyword = token; 367 break; 368 default: 369 break; 370 } 371 prevType=type; 372 } 373 } 374 375 int32_t 376 PluralRules::getNumberValue(const UnicodeString& token) const { 377 int32_t i; 378 char digits[128]; 379 380 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); 381 digits[i]='\0'; 382 383 return((int32_t)atoi(digits)); 384 } 385 386 387 void 388 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) { 389 int32_t i=*curIndex; 390 391 localeName.remove(); 392 while (i< localeData.length()) { 393 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) { 394 break; 395 } 396 i++; 397 } 398 399 while (i< localeData.length()) { 400 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) { 401 break; 402 } 403 localeName+=localeData.charAt(i++); 404 } 405 *curIndex=i; 406 } 407 408 409 int32_t 410 PluralRules::getRepeatLimit() const { 411 if (mRules!=NULL) { 412 return mRules->getRepeatLimit(); 413 } 414 else { 415 return 0; 416 } 417 } 418 419 420 void 421 PluralRules::addRules(RuleChain& rules) { 422 RuleChain *newRule = new RuleChain(rules); 423 this->mRules=newRule; 424 newRule->setRepeatLimit(); 425 } 426 427 UnicodeString 428 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) { 429 UnicodeString emptyStr; 430 431 if (U_FAILURE(errCode)) { 432 return emptyStr; 433 } 434 UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode); 435 if(U_FAILURE(errCode)) { 436 /* total failure, not even root could be opened */ 437 return emptyStr; 438 } 439 UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode); 440 if(U_FAILURE(errCode)) { 441 ures_close(rb); 442 return emptyStr; 443 } 444 int32_t resLen=0; 445 const char *curLocaleName=locale.getName(); 446 const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode); 447 448 if (s == NULL) { 449 // Check parent locales. 450 UErrorCode status = U_ZERO_ERROR; 451 char parentLocaleName[ULOC_FULLNAME_CAPACITY]; 452 const char *curLocaleName=locale.getName(); 453 int32_t localeNameLen=0; 454 uprv_strcpy(parentLocaleName, curLocaleName); 455 456 while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName, 457 ULOC_FULLNAME_CAPACITY, &status)) > 0) { 458 resLen=0; 459 s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status); 460 if (s != NULL) { 461 errCode = U_ZERO_ERROR; 462 break; 463 } 464 status = U_ZERO_ERROR; 465 } 466 } 467 if (s==NULL) { 468 ures_close(locRes); 469 ures_close(rb); 470 return emptyStr; 471 } 472 473 char setKey[256]; 474 UChar result[256]; 475 u_UCharsToChars(s, setKey, resLen + 1); 476 // printf("\n PluralRule: %s\n", setKey); 477 478 479 UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode); 480 if(U_FAILURE(errCode)) { 481 ures_close(locRes); 482 ures_close(rb); 483 return emptyStr; 484 } 485 resLen=0; 486 UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode); 487 if (U_FAILURE(errCode)) { 488 ures_close(ruleRes); 489 ures_close(locRes); 490 ures_close(rb); 491 return emptyStr; 492 } 493 494 int32_t numberKeys = ures_getSize(setRes); 495 char *key=NULL; 496 int32_t len=0; 497 for(int32_t i=0; i<numberKeys; ++i) { 498 int32_t keyLen; 499 resLen=0; 500 s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode); 501 keyLen = (int32_t)uprv_strlen(key); 502 u_charsToUChars(key, result+len, keyLen); 503 len += keyLen; 504 result[len++]=COLON; 505 uprv_memcpy(result+len, s, resLen*sizeof(UChar)); 506 len += resLen; 507 result[len++]=SEMI_COLON; 508 } 509 result[len++]=0; 510 u_UCharsToChars(result, setKey, len); 511 // printf(" Rule: %s\n", setKey); 512 513 ures_close(setRes); 514 ures_close(ruleRes); 515 ures_close(locRes); 516 ures_close(rb); 517 return UnicodeString(result); 518 519 } 520 521 AndConstraint::AndConstraint() { 522 op = AndConstraint::NONE; 523 opNum=-1; 524 rangeLow=-1; 525 rangeHigh=-1; 526 notIn=FALSE; 527 integerOnly=FALSE; 528 next=NULL; 529 } 530 531 532 AndConstraint::AndConstraint(const AndConstraint& other) { 533 this->op = other.op; 534 this->opNum=other.opNum; 535 this->rangeLow=other.rangeLow; 536 this->rangeHigh=other.rangeHigh; 537 this->integerOnly=other.integerOnly; 538 this->notIn=other.notIn; 539 if (other.next==NULL) { 540 this->next=NULL; 541 } 542 else { 543 this->next = new AndConstraint(*other.next); 544 } 545 } 546 547 AndConstraint::~AndConstraint() { 548 if (next!=NULL) { 549 delete next; 550 } 551 } 552 553 554 UBool 555 AndConstraint::isFulfilled(double number) { 556 UBool result=TRUE; 557 double value=number; 558 559 if ( op == MOD ) { 560 value = (int32_t)value % opNum; 561 } 562 if ( rangeHigh == -1 ) { 563 if ( rangeLow == -1 ) { 564 result = TRUE; // empty rule 565 } 566 else { 567 if ( value == rangeLow ) { 568 result = TRUE; 569 } 570 else { 571 result = FALSE; 572 } 573 } 574 } 575 else { 576 if ((rangeLow <= value) && (value <= rangeHigh)) { 577 if (integerOnly) { 578 if ( value != (int32_t)value) { 579 result = FALSE; 580 } 581 else { 582 result = TRUE; 583 } 584 } 585 else { 586 result = TRUE; 587 } 588 } 589 else { 590 result = FALSE; 591 } 592 } 593 if (notIn) { 594 return !result; 595 } 596 else { 597 return result; 598 } 599 } 600 601 int32_t 602 AndConstraint::updateRepeatLimit(int32_t maxLimit) { 603 604 if ( op == MOD ) { 605 return uprv_max(opNum, maxLimit); 606 } 607 else { 608 if ( rangeHigh == -1 ) { 609 return uprv_max(rangeLow, maxLimit); 610 } 611 else{ 612 return uprv_max(rangeHigh, maxLimit); 613 } 614 } 615 } 616 617 618 AndConstraint* 619 AndConstraint::add() 620 { 621 this->next = new AndConstraint(); 622 return this->next; 623 } 624 625 OrConstraint::OrConstraint() { 626 childNode=NULL; 627 next=NULL; 628 } 629 630 OrConstraint::OrConstraint(const OrConstraint& other) { 631 if ( other.childNode == NULL ) { 632 this->childNode = NULL; 633 } 634 else { 635 this->childNode = new AndConstraint(*(other.childNode)); 636 } 637 if (other.next == NULL ) { 638 this->next = NULL; 639 } 640 else { 641 this->next = new OrConstraint(*(other.next)); 642 } 643 } 644 645 OrConstraint::~OrConstraint() { 646 if (childNode!=NULL) { 647 delete childNode; 648 } 649 if (next!=NULL) { 650 delete next; 651 } 652 } 653 654 AndConstraint* 655 OrConstraint::add() 656 { 657 OrConstraint *curOrConstraint=this; 658 { 659 while (curOrConstraint->next!=NULL) { 660 curOrConstraint = curOrConstraint->next; 661 } 662 curOrConstraint->next = NULL; 663 curOrConstraint->childNode = new AndConstraint(); 664 } 665 return curOrConstraint->childNode; 666 } 667 668 UBool 669 OrConstraint::isFulfilled(double number) { 670 OrConstraint* orRule=this; 671 UBool result=FALSE; 672 673 while (orRule!=NULL && !result) { 674 result=TRUE; 675 AndConstraint* andRule = orRule->childNode; 676 while (andRule!=NULL && result) { 677 result = andRule->isFulfilled(number); 678 andRule=andRule->next; 679 } 680 orRule = orRule->next; 681 } 682 683 return result; 684 } 685 686 687 RuleChain::RuleChain() { 688 ruleHeader=NULL; 689 next = NULL; 690 repeatLimit=0; 691 } 692 693 RuleChain::RuleChain(const RuleChain& other) { 694 this->repeatLimit = other.repeatLimit; 695 this->keyword=other.keyword; 696 if (other.ruleHeader != NULL) { 697 this->ruleHeader = new OrConstraint(*(other.ruleHeader)); 698 } 699 else { 700 this->ruleHeader = NULL; 701 } 702 if (other.next != NULL ) { 703 this->next = new RuleChain(*other.next); 704 } 705 else 706 { 707 this->next = NULL; 708 } 709 } 710 711 RuleChain::~RuleChain() { 712 if (next != NULL) { 713 delete next; 714 } 715 if ( ruleHeader != NULL ) { 716 delete ruleHeader; 717 } 718 } 719 720 UnicodeString 721 RuleChain::select(double number) const { 722 723 if ( ruleHeader != NULL ) { 724 if (ruleHeader->isFulfilled(number)) { 725 return keyword; 726 } 727 } 728 if ( next != NULL ) { 729 return next->select(number); 730 } 731 else { 732 return PLURAL_KEYWORD_OTHER; 733 } 734 735 } 736 737 void 738 RuleChain::dumpRules(UnicodeString& result) { 739 UChar digitString[16]; 740 741 if ( ruleHeader != NULL ) { 742 result += keyword; 743 OrConstraint* orRule=ruleHeader; 744 while ( orRule != NULL ) { 745 AndConstraint* andRule=orRule->childNode; 746 while ( andRule != NULL ) { 747 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) { 748 result += UNICODE_STRING_SIMPLE(" n is "); 749 if (andRule->notIn) { 750 result += UNICODE_STRING_SIMPLE("not "); 751 } 752 uprv_itou(digitString,16, andRule->rangeLow,10,0); 753 result += UnicodeString(digitString); 754 } 755 else { 756 if (andRule->op==AndConstraint::MOD) { 757 result += UNICODE_STRING_SIMPLE(" n mod "); 758 uprv_itou(digitString,16, andRule->opNum,10,0); 759 result += UnicodeString(digitString); 760 } 761 else { 762 result += UNICODE_STRING_SIMPLE(" n "); 763 } 764 if (andRule->rangeHigh==-1) { 765 if (andRule->notIn) { 766 result += UNICODE_STRING_SIMPLE(" is not "); 767 uprv_itou(digitString,16, andRule->rangeLow,10,0); 768 result += UnicodeString(digitString); 769 } 770 else { 771 result += UNICODE_STRING_SIMPLE(" is "); 772 uprv_itou(digitString,16, andRule->rangeLow,10,0); 773 result += UnicodeString(digitString); 774 } 775 } 776 else { 777 if (andRule->notIn) { 778 if ( andRule->integerOnly ) { 779 result += UNICODE_STRING_SIMPLE(" not in "); 780 } 781 else { 782 result += UNICODE_STRING_SIMPLE(" not within "); 783 } 784 uprv_itou(digitString,16, andRule->rangeLow,10,0); 785 result += UnicodeString(digitString); 786 result += UNICODE_STRING_SIMPLE(" .. "); 787 uprv_itou(digitString,16, andRule->rangeHigh,10,0); 788 result += UnicodeString(digitString); 789 } 790 else { 791 if ( andRule->integerOnly ) { 792 result += UNICODE_STRING_SIMPLE(" in "); 793 } 794 else { 795 result += UNICODE_STRING_SIMPLE(" within "); 796 } 797 uprv_itou(digitString,16, andRule->rangeLow,10,0); 798 result += UnicodeString(digitString); 799 result += UNICODE_STRING_SIMPLE(" .. "); 800 uprv_itou(digitString,16, andRule->rangeHigh,10,0); 801 } 802 } 803 } 804 if ( (andRule=andRule->next) != NULL) { 805 result += PK_AND; 806 } 807 } 808 if ( (orRule = orRule->next) != NULL ) { 809 result += PK_OR; 810 } 811 } 812 } 813 if ( next != NULL ) { 814 next->dumpRules(result); 815 } 816 } 817 818 int32_t 819 RuleChain::getRepeatLimit () { 820 return repeatLimit; 821 } 822 823 void 824 RuleChain::setRepeatLimit () { 825 int32_t limit=0; 826 827 if ( next != NULL ) { 828 next->setRepeatLimit(); 829 limit = next->repeatLimit; 830 } 831 832 if ( ruleHeader != NULL ) { 833 OrConstraint* orRule=ruleHeader; 834 while ( orRule != NULL ) { 835 AndConstraint* andRule=orRule->childNode; 836 while ( andRule != NULL ) { 837 limit = andRule->updateRepeatLimit(limit); 838 andRule = andRule->next; 839 } 840 orRule = orRule->next; 841 } 842 } 843 repeatLimit = limit; 844 } 845 846 UErrorCode 847 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { 848 if ( arraySize < capacityOfKeywords-1 ) { 849 keywords[arraySize++]=keyword; 850 } 851 else { 852 return U_BUFFER_OVERFLOW_ERROR; 853 } 854 855 if ( next != NULL ) { 856 return next->getKeywords(capacityOfKeywords, keywords, arraySize); 857 } 858 else { 859 return U_ZERO_ERROR; 860 } 861 } 862 863 UBool 864 RuleChain::isKeyword(const UnicodeString& keywordParam) const { 865 if ( keyword == keywordParam ) { 866 return TRUE; 867 } 868 869 if ( next != NULL ) { 870 return next->isKeyword(keywordParam); 871 } 872 else { 873 return FALSE; 874 } 875 } 876 877 878 RuleParser::RuleParser() { 879 UErrorCode err=U_ZERO_ERROR; 880 const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]"); 881 const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]"); 882 idStartFilter = new UnicodeSet(idStart, err); 883 idContinueFilter = new UnicodeSet(idContinue, err); 884 } 885 886 RuleParser::~RuleParser() { 887 delete idStartFilter; 888 delete idContinueFilter; 889 } 890 891 void 892 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status) 893 { 894 if (U_FAILURE(status)) { 895 return; 896 } 897 switch(prevType) { 898 case none: 899 case tSemiColon: 900 if (curType!=tKeyword) { 901 status = U_UNEXPECTED_TOKEN; 902 } 903 break; 904 case tVariableN : 905 if (curType != tIs && curType != tMod && curType != tIn && 906 curType != tNot && curType != tWithin) { 907 status = U_UNEXPECTED_TOKEN; 908 } 909 break; 910 case tZero: 911 case tOne: 912 case tTwo: 913 case tFew: 914 case tMany: 915 case tOther: 916 case tKeyword: 917 if (curType != tColon) { 918 status = U_UNEXPECTED_TOKEN; 919 } 920 break; 921 case tColon : 922 if (curType != tVariableN) { 923 status = U_UNEXPECTED_TOKEN; 924 } 925 break; 926 case tIs: 927 if ( curType != tNumber && curType != tNot) { 928 status = U_UNEXPECTED_TOKEN; 929 } 930 break; 931 case tNot: 932 if (curType != tNumber && curType != tIn && curType != tWithin) { 933 status = U_UNEXPECTED_TOKEN; 934 } 935 break; 936 case tMod: 937 case tDot: 938 case tIn: 939 case tWithin: 940 case tAnd: 941 case tOr: 942 if (curType != tNumber && curType != tVariableN) { 943 status = U_UNEXPECTED_TOKEN; 944 } 945 break; 946 case tNumber: 947 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot && 948 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr) 949 { 950 status = U_UNEXPECTED_TOKEN; 951 } 952 break; 953 default: 954 status = U_UNEXPECTED_TOKEN; 955 break; 956 } 957 } 958 959 void 960 RuleParser::getNextToken(const UnicodeString& ruleData, 961 int32_t *ruleIndex, 962 UnicodeString& token, 963 tokenType& type, 964 UErrorCode &status) 965 { 966 int32_t curIndex= *ruleIndex; 967 UChar ch; 968 tokenType prevType=none; 969 970 if (U_FAILURE(status)) { 971 return; 972 } 973 while (curIndex<ruleData.length()) { 974 ch = ruleData.charAt(curIndex); 975 if ( !inRange(ch, type) ) { 976 status = U_ILLEGAL_CHARACTER; 977 return; 978 } 979 switch (type) { 980 case tSpace: 981 if ( *ruleIndex != curIndex ) { // letter 982 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 983 *ruleIndex=curIndex; 984 type=prevType; 985 getKeyType(token, type, status); 986 return; 987 } 988 else { 989 *ruleIndex=*ruleIndex+1; 990 } 991 break; // consective space 992 case tColon: 993 case tSemiColon: 994 if ( *ruleIndex != curIndex ) { 995 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 996 *ruleIndex=curIndex; 997 type=prevType; 998 getKeyType(token, type, status); 999 return; 1000 } 1001 else { 1002 *ruleIndex=curIndex+1; 1003 return; 1004 } 1005 case tLetter: 1006 if ((type==prevType)||(prevType==none)) { 1007 prevType=type; 1008 break; 1009 } 1010 break; 1011 case tNumber: 1012 if ((type==prevType)||(prevType==none)) { 1013 prevType=type; 1014 break; 1015 } 1016 else { 1017 *ruleIndex=curIndex+1; 1018 return; 1019 } 1020 case tDot: 1021 if (prevType==none) { // first dot 1022 prevType=type; 1023 continue; 1024 } 1025 else { 1026 if ( *ruleIndex != curIndex ) { 1027 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1028 *ruleIndex=curIndex; // letter 1029 type=prevType; 1030 getKeyType(token, type, status); 1031 return; 1032 } 1033 else { // two consective dots 1034 *ruleIndex=curIndex+2; 1035 return; 1036 } 1037 } 1038 break; 1039 default: 1040 status = U_UNEXPECTED_TOKEN; 1041 return; 1042 } 1043 curIndex++; 1044 } 1045 if ( curIndex>=ruleData.length() ) { 1046 if ( (type == tLetter)||(type == tNumber) ) { 1047 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1048 getKeyType(token, type, status); 1049 if (U_FAILURE(status)) { 1050 return; 1051 } 1052 } 1053 *ruleIndex = ruleData.length(); 1054 } 1055 } 1056 1057 UBool 1058 RuleParser::inRange(UChar ch, tokenType& type) { 1059 if ((ch>=CAP_A) && (ch<=CAP_Z)) { 1060 // we assume all characters are in lower case already. 1061 return FALSE; 1062 } 1063 if ((ch>=LOW_A) && (ch<=LOW_Z)) { 1064 type = tLetter; 1065 return TRUE; 1066 } 1067 if ((ch>=U_ZERO) && (ch<=U_NINE)) { 1068 type = tNumber; 1069 return TRUE; 1070 } 1071 switch (ch) { 1072 case COLON: 1073 type = tColon; 1074 return TRUE; 1075 case SPACE: 1076 type = tSpace; 1077 return TRUE; 1078 case SEMI_COLON: 1079 type = tSemiColon; 1080 return TRUE; 1081 case DOT: 1082 type = tDot; 1083 return TRUE; 1084 default : 1085 type = none; 1086 return FALSE; 1087 } 1088 } 1089 1090 1091 void 1092 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status) 1093 { 1094 if (U_FAILURE(status)) { 1095 return; 1096 } 1097 if ( keyType==tNumber) { 1098 } 1099 else if (token==PK_VAR_N) { 1100 keyType = tVariableN; 1101 } 1102 else if (token==PK_IS) { 1103 keyType = tIs; 1104 } 1105 else if (token==PK_AND) { 1106 keyType = tAnd; 1107 } 1108 else if (token==PK_IN) { 1109 keyType = tIn; 1110 } 1111 else if (token==PK_WITHIN) { 1112 keyType = tWithin; 1113 } 1114 else if (token==PK_NOT) { 1115 keyType = tNot; 1116 } 1117 else if (token==PK_MOD) { 1118 keyType = tMod; 1119 } 1120 else if (token==PK_OR) { 1121 keyType = tOr; 1122 } 1123 else if ( isValidKeyword(token) ) { 1124 keyType = tKeyword; 1125 } 1126 else { 1127 status = U_UNEXPECTED_TOKEN; 1128 } 1129 } 1130 1131 UBool 1132 RuleParser::isValidKeyword(const UnicodeString& token) { 1133 if ( token.length()==0 ) { 1134 return FALSE; 1135 } 1136 if ( idStartFilter->contains(token.charAt(0) )==TRUE ) { 1137 int32_t i; 1138 for (i=1; i< token.length(); i++) { 1139 if (idContinueFilter->contains(token.charAt(i))== FALSE) { 1140 return FALSE; 1141 } 1142 } 1143 return TRUE; 1144 } 1145 else { 1146 return FALSE; 1147 } 1148 } 1149 1150 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) : 1151 fKeywordNames(status) 1152 { 1153 RuleChain *node=header; 1154 UBool addKeywordOther=true; 1155 1156 if (U_FAILURE(status)) { 1157 return; 1158 } 1159 pos=0; 1160 fKeywordNames.removeAllElements(); 1161 while(node!=NULL) { 1162 fKeywordNames.addElement(new UnicodeString(node->keyword), status); 1163 if (U_FAILURE(status)) { 1164 return; 1165 } 1166 if (node->keyword == PLURAL_KEYWORD_OTHER) { 1167 addKeywordOther= false; 1168 } 1169 node=node->next; 1170 } 1171 1172 if (addKeywordOther) { 1173 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); 1174 if (U_FAILURE(status)) { 1175 return; 1176 } 1177 } 1178 } 1179 1180 const UnicodeString* 1181 PluralKeywordEnumeration::snext(UErrorCode& status) { 1182 if (U_SUCCESS(status) && pos < fKeywordNames.size()) { 1183 return (const UnicodeString*)fKeywordNames.elementAt(pos++); 1184 } 1185 return NULL; 1186 } 1187 1188 void 1189 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { 1190 pos=0; 1191 } 1192 1193 int32_t 1194 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { 1195 return fKeywordNames.size(); 1196 } 1197 1198 PluralKeywordEnumeration::~PluralKeywordEnumeration() { 1199 UnicodeString *s; 1200 for (int32_t i=0; i<fKeywordNames.size(); ++i) { 1201 if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) { 1202 delete s; 1203 } 1204 } 1205 } 1206 1207 U_NAMESPACE_END 1208 1209 1210 #endif /* #if !UCONFIG_NO_FORMATTING */ 1211 1212 //eof 1213