1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File PLURRULE.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 ******************************************************************************* 13 */ 14 15 16 #include "unicode/utypes.h" 17 #include "unicode/localpointer.h" 18 #include "unicode/plurrule.h" 19 #include "unicode/ures.h" 20 #include "cmemory.h" 21 #include "cstring.h" 22 #include "hash.h" 23 #include "mutex.h" 24 #include "patternprops.h" 25 #include "plurrule_impl.h" 26 #include "putilimp.h" 27 #include "ucln_in.h" 28 #include "uhash.h" 29 #include "ustrfmt.h" 30 #include "locutil.h" 31 32 #if !UCONFIG_NO_FORMATTING 33 34 U_NAMESPACE_BEGIN 35 36 // shared by all instances when lazy-initializing samples 37 static UMTX pluralMutex; 38 39 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) 40 41 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; 42 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; 43 static const UChar PK_IN[]={LOW_I,LOW_N,0}; 44 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; 45 static const UChar PK_IS[]={LOW_I,LOW_S,0}; 46 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; 47 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; 48 static const UChar PK_OR[]={LOW_O,LOW_R,0}; 49 static const UChar PK_VAR_N[]={LOW_N,0}; 50 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; 51 52 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) 53 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) 54 55 PluralRules::PluralRules(UErrorCode& status) 56 : UObject(), 57 mRules(NULL), 58 mParser(NULL), 59 mSamples(NULL), 60 mSampleInfo(NULL), 61 mSampleInfoCount(0) 62 { 63 if (U_FAILURE(status)) { 64 return; 65 } 66 mParser = new RuleParser(); 67 if (mParser==NULL) { 68 status = U_MEMORY_ALLOCATION_ERROR; 69 } 70 } 71 72 PluralRules::PluralRules(const PluralRules& other) 73 : UObject(other), 74 mRules(NULL), 75 mParser(NULL), 76 mSamples(NULL), 77 mSampleInfo(NULL), 78 mSampleInfoCount(0) 79 { 80 *this=other; 81 } 82 83 PluralRules::~PluralRules() { 84 delete mRules; 85 delete mParser; 86 uprv_free(mSamples); 87 uprv_free(mSampleInfo); 88 } 89 90 PluralRules* 91 PluralRules::clone() const { 92 return new PluralRules(*this); 93 } 94 95 PluralRules& 96 PluralRules::operator=(const PluralRules& other) { 97 if (this != &other) { 98 delete mRules; 99 if (other.mRules==NULL) { 100 mRules = NULL; 101 } 102 else { 103 mRules = new RuleChain(*other.mRules); 104 } 105 delete mParser; 106 mParser = new RuleParser(); 107 108 uprv_free(mSamples); 109 mSamples = NULL; 110 111 uprv_free(mSampleInfo); 112 mSampleInfo = NULL; 113 mSampleInfoCount = 0; 114 } 115 116 return *this; 117 } 118 119 PluralRules* U_EXPORT2 120 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { 121 RuleChain rules; 122 123 if (U_FAILURE(status)) { 124 return NULL; 125 } 126 PluralRules *newRules = new PluralRules(status); 127 if ( (newRules != NULL)&& U_SUCCESS(status) ) { 128 newRules->parseDescription((UnicodeString &)description, rules, status); 129 if (U_SUCCESS(status)) { 130 newRules->addRules(rules); 131 } 132 } 133 if (U_FAILURE(status)) { 134 delete newRules; 135 return NULL; 136 } 137 else { 138 return newRules; 139 } 140 } 141 142 PluralRules* U_EXPORT2 143 PluralRules::createDefaultRules(UErrorCode& status) { 144 return createRules(PLURAL_DEFAULT_RULE, status); 145 } 146 147 PluralRules* U_EXPORT2 148 PluralRules::forLocale(const Locale& locale, UErrorCode& status) { 149 RuleChain rChain; 150 if (U_FAILURE(status)) { 151 return NULL; 152 } 153 PluralRules *newObj = new PluralRules(status); 154 if (newObj==NULL || U_FAILURE(status)) { 155 delete newObj; 156 return NULL; 157 } 158 UnicodeString locRule = newObj->getRuleFromResource(locale, status); 159 if ((locRule.length() != 0) && U_SUCCESS(status)) { 160 newObj->parseDescription(locRule, rChain, status); 161 if (U_SUCCESS(status)) { 162 newObj->addRules(rChain); 163 } 164 } 165 if (U_FAILURE(status)||(locRule.length() == 0)) { 166 // use default plural rule 167 status = U_ZERO_ERROR; 168 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE); 169 newObj->parseDescription(defRule, rChain, status); 170 newObj->addRules(rChain); 171 } 172 173 return newObj; 174 } 175 176 UnicodeString 177 PluralRules::select(int32_t number) const { 178 if (mRules == NULL) { 179 return PLURAL_DEFAULT_RULE; 180 } 181 else { 182 return mRules->select(number); 183 } 184 } 185 186 UnicodeString 187 PluralRules::select(double number) const { 188 if (mRules == NULL) { 189 return PLURAL_DEFAULT_RULE; 190 } 191 else { 192 return mRules->select(number); 193 } 194 } 195 196 StringEnumeration* 197 PluralRules::getKeywords(UErrorCode& status) const { 198 if (U_FAILURE(status)) return NULL; 199 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); 200 if (U_FAILURE(status)) { 201 delete nameEnumerator; 202 return NULL; 203 } 204 205 return nameEnumerator; 206 } 207 208 double 209 PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) { 210 double val = 0.0; 211 UErrorCode status = U_ZERO_ERROR; 212 int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status); 213 return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE; 214 } 215 216 int32_t 217 PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest, 218 int32_t destCapacity, UErrorCode& error) { 219 return getSamplesInternal(keyword, dest, destCapacity, FALSE, error); 220 } 221 222 int32_t 223 PluralRules::getSamples(const UnicodeString &keyword, double *dest, 224 int32_t destCapacity, UErrorCode& status) { 225 return getSamplesInternal(keyword, dest, destCapacity, TRUE, status); 226 } 227 228 int32_t 229 PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest, 230 int32_t destCapacity, UBool includeUnlimited, 231 UErrorCode& status) { 232 initSamples(status); 233 if (U_FAILURE(status)) { 234 return -1; 235 } 236 if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) { 237 status = U_ILLEGAL_ARGUMENT_ERROR; 238 return -1; 239 } 240 241 int32_t index = getKeywordIndex(keyword, status); 242 if (index == -1) { 243 return 0; 244 } 245 246 const int32_t LIMIT_MASK = 0x1 << 31; 247 248 if (!includeUnlimited) { 249 if ((mSampleInfo[index] & LIMIT_MASK) == 0) { 250 return -1; 251 } 252 } 253 254 int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK; 255 int32_t limit = mSampleInfo[index] & ~LIMIT_MASK; 256 int32_t len = limit - start; 257 if (len <= destCapacity) { 258 destCapacity = len; 259 } else if (includeUnlimited) { 260 len = destCapacity; // no overflow, and don't report more than we copy 261 } else { 262 status = U_BUFFER_OVERFLOW_ERROR; 263 return len; 264 } 265 for (int32_t i = 0; i < destCapacity; ++i, ++start) { 266 dest[i] = mSamples[start]; 267 } 268 return len; 269 } 270 271 272 UBool 273 PluralRules::isKeyword(const UnicodeString& keyword) const { 274 if ( keyword == PLURAL_KEYWORD_OTHER ) { 275 return true; 276 } 277 else { 278 if (mRules==NULL) { 279 return false; 280 } 281 else { 282 return mRules->isKeyword(keyword); 283 } 284 } 285 } 286 287 UnicodeString 288 PluralRules::getKeywordOther() const { 289 return PLURAL_KEYWORD_OTHER; 290 } 291 292 UBool 293 PluralRules::operator==(const PluralRules& other) const { 294 int32_t limit; 295 const UnicodeString *ptrKeyword; 296 UErrorCode status= U_ZERO_ERROR; 297 298 if ( this == &other ) { 299 return TRUE; 300 } 301 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); 302 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); 303 if (U_FAILURE(status)) { 304 return FALSE; 305 } 306 307 if (myKeywordList->count(status)!=otherKeywordList->count(status)) { 308 return FALSE; 309 } 310 myKeywordList->reset(status); 311 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { 312 if (!other.isKeyword(*ptrKeyword)) { 313 return FALSE; 314 } 315 } 316 otherKeywordList->reset(status); 317 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { 318 if (!this->isKeyword(*ptrKeyword)) { 319 return FALSE; 320 } 321 } 322 if (U_FAILURE(status)) { 323 return FALSE; 324 } 325 326 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) { 327 return FALSE; 328 } 329 UnicodeString myKeyword, otherKeyword; 330 for (int32_t i=0; i<limit; ++i) { 331 myKeyword = this->select(i); 332 otherKeyword = other.select(i); 333 if (myKeyword!=otherKeyword) { 334 return FALSE; 335 } 336 } 337 return TRUE; 338 } 339 340 void 341 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status) 342 { 343 int32_t ruleIndex=0; 344 UnicodeString token; 345 tokenType type; 346 tokenType prevType=none; 347 RuleChain *ruleChain=NULL; 348 AndConstraint *curAndConstraint=NULL; 349 OrConstraint *orNode=NULL; 350 RuleChain *lastChain=NULL; 351 352 if (U_FAILURE(status)) { 353 return; 354 } 355 UnicodeString ruleData = data.toLower(); 356 while (ruleIndex< ruleData.length()) { 357 mParser->getNextToken(ruleData, &ruleIndex, token, type, status); 358 if (U_FAILURE(status)) { 359 return; 360 } 361 mParser->checkSyntax(prevType, type, status); 362 if (U_FAILURE(status)) { 363 return; 364 } 365 switch (type) { 366 case tAnd: 367 curAndConstraint = curAndConstraint->add(); 368 break; 369 case tOr: 370 lastChain = &rules; 371 while (lastChain->next !=NULL) { 372 lastChain = lastChain->next; 373 } 374 orNode=lastChain->ruleHeader; 375 while (orNode->next != NULL) { 376 orNode = orNode->next; 377 } 378 orNode->next= new OrConstraint(); 379 orNode=orNode->next; 380 orNode->next=NULL; 381 curAndConstraint = orNode->add(); 382 break; 383 case tIs: 384 curAndConstraint->rangeHigh=-1; 385 break; 386 case tNot: 387 curAndConstraint->notIn=TRUE; 388 break; 389 case tIn: 390 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; 391 curAndConstraint->integerOnly = TRUE; 392 break; 393 case tWithin: 394 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; 395 break; 396 case tNumber: 397 if ( (curAndConstraint->op==AndConstraint::MOD)&& 398 (curAndConstraint->opNum == -1 ) ) { 399 curAndConstraint->opNum=getNumberValue(token); 400 } 401 else { 402 if (curAndConstraint->rangeLow == -1) { 403 curAndConstraint->rangeLow=getNumberValue(token); 404 } 405 else { 406 curAndConstraint->rangeHigh=getNumberValue(token); 407 } 408 } 409 break; 410 case tMod: 411 curAndConstraint->op=AndConstraint::MOD; 412 break; 413 case tKeyword: 414 if (ruleChain==NULL) { 415 ruleChain = &rules; 416 } 417 else { 418 while (ruleChain->next!=NULL){ 419 ruleChain=ruleChain->next; 420 } 421 ruleChain=ruleChain->next=new RuleChain(); 422 } 423 if (ruleChain->ruleHeader != NULL) { 424 delete ruleChain->ruleHeader; 425 } 426 orNode = ruleChain->ruleHeader = new OrConstraint(); 427 curAndConstraint = orNode->add(); 428 ruleChain->keyword = token; 429 break; 430 default: 431 break; 432 } 433 prevType=type; 434 } 435 } 436 437 int32_t 438 PluralRules::getNumberValue(const UnicodeString& token) const { 439 int32_t i; 440 char digits[128]; 441 442 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); 443 digits[i]='\0'; 444 445 return((int32_t)atoi(digits)); 446 } 447 448 449 void 450 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) { 451 int32_t i=*curIndex; 452 453 localeName.remove(); 454 while (i< localeData.length()) { 455 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) { 456 break; 457 } 458 i++; 459 } 460 461 while (i< localeData.length()) { 462 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) { 463 break; 464 } 465 localeName+=localeData.charAt(i++); 466 } 467 *curIndex=i; 468 } 469 470 471 int32_t 472 PluralRules::getRepeatLimit() const { 473 if (mRules!=NULL) { 474 return mRules->getRepeatLimit(); 475 } 476 else { 477 return 0; 478 } 479 } 480 481 int32_t 482 PluralRules::getKeywordIndex(const UnicodeString& keyword, 483 UErrorCode& status) const { 484 if (U_SUCCESS(status)) { 485 int32_t n = 0; 486 RuleChain* rc = mRules; 487 while (rc != NULL) { 488 if (rc->ruleHeader != NULL) { 489 if (rc->keyword == keyword) { 490 return n; 491 } 492 ++n; 493 } 494 rc = rc->next; 495 } 496 if (keyword == PLURAL_KEYWORD_OTHER) { 497 return n; 498 } 499 } 500 return -1; 501 } 502 503 typedef struct SampleRecord { 504 int32_t ruleIndex; 505 double value; 506 } SampleRecord; 507 508 void 509 PluralRules::initSamples(UErrorCode& status) { 510 if (U_FAILURE(status)) { 511 return; 512 } 513 Mutex lock(&pluralMutex); 514 515 if (mSamples) { 516 return; 517 } 518 519 // Note, the original design let you have multiple rules with the same keyword. But 520 // we don't use that in our data and existing functions in this implementation don't 521 // fully support it (for example, the returned keywords is a list and not a set). 522 // 523 // So I don't support this here either. If you ask for samples, or for all values, 524 // you will get information about the first rule with that keyword, not all rules with 525 // that keyword. 526 527 int32_t maxIndex = 0; 528 int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end 529 RuleChain* rc = mRules; 530 while (rc != NULL) { 531 if (rc->ruleHeader != NULL) { 532 if (otherIndex == -1 && rc->keyword == PLURAL_KEYWORD_OTHER) { 533 otherIndex = maxIndex; 534 } 535 ++maxIndex; 536 } 537 rc = rc->next; 538 } 539 if (otherIndex == -1) { 540 ++maxIndex; 541 } 542 543 LocalMemory<int32_t> newSampleInfo; 544 if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) { 545 status = U_MEMORY_ALLOCATION_ERROR; 546 return; 547 } 548 549 const int32_t LIMIT_MASK = 0x1 << 31; 550 551 rc = mRules; 552 int32_t n = 0; 553 while (rc != NULL) { 554 if (rc->ruleHeader != NULL) { 555 newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0; 556 } 557 rc = rc->next; 558 } 559 if (otherIndex == -1) { 560 newSampleInfo[maxIndex - 1] = 0; // unlimited 561 } 562 563 MaybeStackArray<SampleRecord, 10> newSamples; 564 int32_t sampleCount = 0; 565 566 int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2; 567 if (limit < 10) { 568 limit = 10; 569 } 570 571 for (int i = 0, keywordsRemaining = maxIndex; 572 keywordsRemaining > 0 && i < limit; 573 ++i) { 574 double val = i / 2.0; 575 576 n = 0; 577 rc = mRules; 578 int32_t found = -1; 579 while (rc != NULL) { 580 if (rc->ruleHeader != NULL) { 581 if (rc->ruleHeader->isFulfilled(val)) { 582 found = n; 583 break; 584 } 585 ++n; 586 } 587 rc = rc->next; 588 } 589 if (found == -1) { 590 // 'other'. If there is an 'other' rule, the rule set is bad since nothing 591 // should leak through, but we don't bother to report that here. 592 found = otherIndex == -1 ? maxIndex - 1 : otherIndex; 593 } 594 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set 595 continue; 596 } 597 newSampleInfo[found] += 1; // won't impact limit flag 598 599 if (sampleCount == newSamples.getCapacity()) { 600 int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2; 601 if (NULL == newSamples.resize(newCapacity, sampleCount)) { 602 status = U_MEMORY_ALLOCATION_ERROR; 603 return; 604 } 605 } 606 newSamples[sampleCount].ruleIndex = found; 607 newSamples[sampleCount].value = val; 608 ++sampleCount; 609 610 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set 611 --keywordsRemaining; 612 } 613 } 614 615 // sort the values by index, leaving order otherwise unchanged 616 // this is just a selection sort for simplicity 617 LocalMemory<double> values; 618 if (NULL == values.allocateInsteadAndCopy(sampleCount)) { 619 status = U_MEMORY_ALLOCATION_ERROR; 620 return; 621 } 622 for (int i = 0, j = 0; i < maxIndex; ++i) { 623 for (int k = 0; k < sampleCount; ++k) { 624 if (newSamples[k].ruleIndex == i) { 625 values[j++] = newSamples[k].value; 626 } 627 } 628 } 629 630 // convert array of mask/lengths to array of mask/limits 631 limit = 0; 632 for (int i = 0; i < maxIndex; ++i) { 633 int32_t info = newSampleInfo[i]; 634 int32_t len = info & ~LIMIT_MASK; 635 limit += len; 636 // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples, 637 // it's not really unlimited, so mark it as limited 638 int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK; 639 newSampleInfo[i] = limit | mask; 640 } 641 642 // ok, we've got good data 643 mSamples = values.orphan(); 644 mSampleInfo = newSampleInfo.orphan(); 645 mSampleInfoCount = maxIndex; 646 } 647 648 void 649 PluralRules::addRules(RuleChain& rules) { 650 RuleChain *newRule = new RuleChain(rules); 651 this->mRules=newRule; 652 newRule->setRepeatLimit(); 653 } 654 655 UnicodeString 656 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) { 657 UnicodeString emptyStr; 658 659 if (U_FAILURE(errCode)) { 660 return emptyStr; 661 } 662 UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode); 663 if(U_FAILURE(errCode)) { 664 /* total failure, not even root could be opened */ 665 return emptyStr; 666 } 667 UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode); 668 if(U_FAILURE(errCode)) { 669 ures_close(rb); 670 return emptyStr; 671 } 672 int32_t resLen=0; 673 const char *curLocaleName=locale.getName(); 674 const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode); 675 676 if (s == NULL) { 677 // Check parent locales. 678 UErrorCode status = U_ZERO_ERROR; 679 char parentLocaleName[ULOC_FULLNAME_CAPACITY]; 680 const char *curLocaleName=locale.getName(); 681 int32_t localeNameLen=0; 682 uprv_strcpy(parentLocaleName, curLocaleName); 683 684 while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName, 685 ULOC_FULLNAME_CAPACITY, &status)) > 0) { 686 resLen=0; 687 s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status); 688 if (s != NULL) { 689 errCode = U_ZERO_ERROR; 690 break; 691 } 692 status = U_ZERO_ERROR; 693 } 694 } 695 if (s==NULL) { 696 ures_close(locRes); 697 ures_close(rb); 698 return emptyStr; 699 } 700 701 char setKey[256]; 702 UChar result[256]; 703 u_UCharsToChars(s, setKey, resLen + 1); 704 // printf("\n PluralRule: %s\n", setKey); 705 706 707 UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode); 708 if(U_FAILURE(errCode)) { 709 ures_close(locRes); 710 ures_close(rb); 711 return emptyStr; 712 } 713 resLen=0; 714 UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode); 715 if (U_FAILURE(errCode)) { 716 ures_close(ruleRes); 717 ures_close(locRes); 718 ures_close(rb); 719 return emptyStr; 720 } 721 722 int32_t numberKeys = ures_getSize(setRes); 723 char *key=NULL; 724 int32_t len=0; 725 for(int32_t i=0; i<numberKeys; ++i) { 726 int32_t keyLen; 727 resLen=0; 728 s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode); 729 keyLen = (int32_t)uprv_strlen(key); 730 u_charsToUChars(key, result+len, keyLen); 731 len += keyLen; 732 result[len++]=COLON; 733 uprv_memcpy(result+len, s, resLen*sizeof(UChar)); 734 len += resLen; 735 result[len++]=SEMI_COLON; 736 } 737 result[len++]=0; 738 u_UCharsToChars(result, setKey, len); 739 // printf(" Rule: %s\n", setKey); 740 741 ures_close(setRes); 742 ures_close(ruleRes); 743 ures_close(locRes); 744 ures_close(rb); 745 return UnicodeString(result); 746 } 747 748 AndConstraint::AndConstraint() { 749 op = AndConstraint::NONE; 750 opNum=-1; 751 rangeLow=-1; 752 rangeHigh=-1; 753 notIn=FALSE; 754 integerOnly=FALSE; 755 next=NULL; 756 } 757 758 759 AndConstraint::AndConstraint(const AndConstraint& other) { 760 this->op = other.op; 761 this->opNum=other.opNum; 762 this->rangeLow=other.rangeLow; 763 this->rangeHigh=other.rangeHigh; 764 this->integerOnly=other.integerOnly; 765 this->notIn=other.notIn; 766 if (other.next==NULL) { 767 this->next=NULL; 768 } 769 else { 770 this->next = new AndConstraint(*other.next); 771 } 772 } 773 774 AndConstraint::~AndConstraint() { 775 if (next!=NULL) { 776 delete next; 777 } 778 } 779 780 781 UBool 782 AndConstraint::isFulfilled(double number) { 783 UBool result=TRUE; 784 double value=number; 785 786 // arrrrrrgh 787 if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) { 788 return notIn; 789 } 790 791 if ( op == MOD ) { 792 value = (int32_t)value % opNum; 793 } 794 if ( rangeHigh == -1 ) { 795 if ( rangeLow == -1 ) { 796 result = TRUE; // empty rule 797 } 798 else { 799 if ( value == rangeLow ) { 800 result = TRUE; 801 } 802 else { 803 result = FALSE; 804 } 805 } 806 } 807 else { 808 if ((rangeLow <= value) && (value <= rangeHigh)) { 809 if (integerOnly) { 810 if ( value != (int32_t)value) { 811 result = FALSE; 812 } 813 else { 814 result = TRUE; 815 } 816 } 817 else { 818 result = TRUE; 819 } 820 } 821 else { 822 result = FALSE; 823 } 824 } 825 if (notIn) { 826 return !result; 827 } 828 else { 829 return result; 830 } 831 } 832 833 UBool 834 AndConstraint::isLimited() { 835 return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD; 836 } 837 838 int32_t 839 AndConstraint::updateRepeatLimit(int32_t maxLimit) { 840 841 if ( op == MOD ) { 842 return uprv_max(opNum, maxLimit); 843 } 844 else { 845 if ( rangeHigh == -1 ) { 846 return uprv_max(rangeLow, maxLimit); 847 } 848 else{ 849 return uprv_max(rangeHigh, maxLimit); 850 } 851 } 852 } 853 854 855 AndConstraint* 856 AndConstraint::add() 857 { 858 this->next = new AndConstraint(); 859 return this->next; 860 } 861 862 OrConstraint::OrConstraint() { 863 childNode=NULL; 864 next=NULL; 865 } 866 867 OrConstraint::OrConstraint(const OrConstraint& other) { 868 if ( other.childNode == NULL ) { 869 this->childNode = NULL; 870 } 871 else { 872 this->childNode = new AndConstraint(*(other.childNode)); 873 } 874 if (other.next == NULL ) { 875 this->next = NULL; 876 } 877 else { 878 this->next = new OrConstraint(*(other.next)); 879 } 880 } 881 882 OrConstraint::~OrConstraint() { 883 if (childNode!=NULL) { 884 delete childNode; 885 } 886 if (next!=NULL) { 887 delete next; 888 } 889 } 890 891 AndConstraint* 892 OrConstraint::add() 893 { 894 OrConstraint *curOrConstraint=this; 895 { 896 while (curOrConstraint->next!=NULL) { 897 curOrConstraint = curOrConstraint->next; 898 } 899 curOrConstraint->next = NULL; 900 curOrConstraint->childNode = new AndConstraint(); 901 } 902 return curOrConstraint->childNode; 903 } 904 905 UBool 906 OrConstraint::isFulfilled(double number) { 907 OrConstraint* orRule=this; 908 UBool result=FALSE; 909 910 while (orRule!=NULL && !result) { 911 result=TRUE; 912 AndConstraint* andRule = orRule->childNode; 913 while (andRule!=NULL && result) { 914 result = andRule->isFulfilled(number); 915 andRule=andRule->next; 916 } 917 orRule = orRule->next; 918 } 919 920 return result; 921 } 922 923 UBool 924 OrConstraint::isLimited() { 925 for (OrConstraint *orc = this; orc != NULL; orc = orc->next) { 926 UBool result = FALSE; 927 for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) { 928 if (andc->isLimited()) { 929 result = TRUE; 930 break; 931 } 932 } 933 if (result == FALSE) { 934 return FALSE; 935 } 936 } 937 return TRUE; 938 } 939 940 RuleChain::RuleChain() { 941 ruleHeader=NULL; 942 next = NULL; 943 repeatLimit=0; 944 } 945 946 RuleChain::RuleChain(const RuleChain& other) { 947 this->repeatLimit = other.repeatLimit; 948 this->keyword=other.keyword; 949 if (other.ruleHeader != NULL) { 950 this->ruleHeader = new OrConstraint(*(other.ruleHeader)); 951 } 952 else { 953 this->ruleHeader = NULL; 954 } 955 if (other.next != NULL ) { 956 this->next = new RuleChain(*other.next); 957 } 958 else 959 { 960 this->next = NULL; 961 } 962 } 963 964 RuleChain::~RuleChain() { 965 if (next != NULL) { 966 delete next; 967 } 968 if ( ruleHeader != NULL ) { 969 delete ruleHeader; 970 } 971 } 972 973 UnicodeString 974 RuleChain::select(double number) const { 975 976 if ( ruleHeader != NULL ) { 977 if (ruleHeader->isFulfilled(number)) { 978 return keyword; 979 } 980 } 981 if ( next != NULL ) { 982 return next->select(number); 983 } 984 else { 985 return PLURAL_KEYWORD_OTHER; 986 } 987 988 } 989 990 void 991 RuleChain::dumpRules(UnicodeString& result) { 992 UChar digitString[16]; 993 994 if ( ruleHeader != NULL ) { 995 result += keyword; 996 OrConstraint* orRule=ruleHeader; 997 while ( orRule != NULL ) { 998 AndConstraint* andRule=orRule->childNode; 999 while ( andRule != NULL ) { 1000 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) { 1001 result += UNICODE_STRING_SIMPLE(" n is "); 1002 if (andRule->notIn) { 1003 result += UNICODE_STRING_SIMPLE("not "); 1004 } 1005 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1006 result += UnicodeString(digitString); 1007 } 1008 else { 1009 if (andRule->op==AndConstraint::MOD) { 1010 result += UNICODE_STRING_SIMPLE(" n mod "); 1011 uprv_itou(digitString,16, andRule->opNum,10,0); 1012 result += UnicodeString(digitString); 1013 } 1014 else { 1015 result += UNICODE_STRING_SIMPLE(" n "); 1016 } 1017 if (andRule->rangeHigh==-1) { 1018 if (andRule->notIn) { 1019 result += UNICODE_STRING_SIMPLE(" is not "); 1020 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1021 result += UnicodeString(digitString); 1022 } 1023 else { 1024 result += UNICODE_STRING_SIMPLE(" is "); 1025 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1026 result += UnicodeString(digitString); 1027 } 1028 } 1029 else { 1030 if (andRule->notIn) { 1031 if ( andRule->integerOnly ) { 1032 result += UNICODE_STRING_SIMPLE(" not in "); 1033 } 1034 else { 1035 result += UNICODE_STRING_SIMPLE(" not within "); 1036 } 1037 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1038 result += UnicodeString(digitString); 1039 result += UNICODE_STRING_SIMPLE(" .. "); 1040 uprv_itou(digitString,16, andRule->rangeHigh,10,0); 1041 result += UnicodeString(digitString); 1042 } 1043 else { 1044 if ( andRule->integerOnly ) { 1045 result += UNICODE_STRING_SIMPLE(" in "); 1046 } 1047 else { 1048 result += UNICODE_STRING_SIMPLE(" within "); 1049 } 1050 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1051 result += UnicodeString(digitString); 1052 result += UNICODE_STRING_SIMPLE(" .. "); 1053 uprv_itou(digitString,16, andRule->rangeHigh,10,0); 1054 } 1055 } 1056 } 1057 if ( (andRule=andRule->next) != NULL) { 1058 result += PK_AND; 1059 } 1060 } 1061 if ( (orRule = orRule->next) != NULL ) { 1062 result += PK_OR; 1063 } 1064 } 1065 } 1066 if ( next != NULL ) { 1067 next->dumpRules(result); 1068 } 1069 } 1070 1071 int32_t 1072 RuleChain::getRepeatLimit () { 1073 return repeatLimit; 1074 } 1075 1076 void 1077 RuleChain::setRepeatLimit () { 1078 int32_t limit=0; 1079 1080 if ( next != NULL ) { 1081 next->setRepeatLimit(); 1082 limit = next->repeatLimit; 1083 } 1084 1085 if ( ruleHeader != NULL ) { 1086 OrConstraint* orRule=ruleHeader; 1087 while ( orRule != NULL ) { 1088 AndConstraint* andRule=orRule->childNode; 1089 while ( andRule != NULL ) { 1090 limit = andRule->updateRepeatLimit(limit); 1091 andRule = andRule->next; 1092 } 1093 orRule = orRule->next; 1094 } 1095 } 1096 repeatLimit = limit; 1097 } 1098 1099 UErrorCode 1100 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { 1101 if ( arraySize < capacityOfKeywords-1 ) { 1102 keywords[arraySize++]=keyword; 1103 } 1104 else { 1105 return U_BUFFER_OVERFLOW_ERROR; 1106 } 1107 1108 if ( next != NULL ) { 1109 return next->getKeywords(capacityOfKeywords, keywords, arraySize); 1110 } 1111 else { 1112 return U_ZERO_ERROR; 1113 } 1114 } 1115 1116 UBool 1117 RuleChain::isKeyword(const UnicodeString& keywordParam) const { 1118 if ( keyword == keywordParam ) { 1119 return TRUE; 1120 } 1121 1122 if ( next != NULL ) { 1123 return next->isKeyword(keywordParam); 1124 } 1125 else { 1126 return FALSE; 1127 } 1128 } 1129 1130 1131 RuleParser::RuleParser() { 1132 } 1133 1134 RuleParser::~RuleParser() { 1135 } 1136 1137 void 1138 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status) 1139 { 1140 if (U_FAILURE(status)) { 1141 return; 1142 } 1143 switch(prevType) { 1144 case none: 1145 case tSemiColon: 1146 if (curType!=tKeyword) { 1147 status = U_UNEXPECTED_TOKEN; 1148 } 1149 break; 1150 case tVariableN : 1151 if (curType != tIs && curType != tMod && curType != tIn && 1152 curType != tNot && curType != tWithin) { 1153 status = U_UNEXPECTED_TOKEN; 1154 } 1155 break; 1156 case tZero: 1157 case tOne: 1158 case tTwo: 1159 case tFew: 1160 case tMany: 1161 case tOther: 1162 case tKeyword: 1163 if (curType != tColon) { 1164 status = U_UNEXPECTED_TOKEN; 1165 } 1166 break; 1167 case tColon : 1168 if (curType != tVariableN) { 1169 status = U_UNEXPECTED_TOKEN; 1170 } 1171 break; 1172 case tIs: 1173 if ( curType != tNumber && curType != tNot) { 1174 status = U_UNEXPECTED_TOKEN; 1175 } 1176 break; 1177 case tNot: 1178 if (curType != tNumber && curType != tIn && curType != tWithin) { 1179 status = U_UNEXPECTED_TOKEN; 1180 } 1181 break; 1182 case tMod: 1183 case tDot: 1184 case tIn: 1185 case tWithin: 1186 case tAnd: 1187 case tOr: 1188 if (curType != tNumber && curType != tVariableN) { 1189 status = U_UNEXPECTED_TOKEN; 1190 } 1191 break; 1192 case tNumber: 1193 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot && 1194 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr) 1195 { 1196 status = U_UNEXPECTED_TOKEN; 1197 } 1198 break; 1199 default: 1200 status = U_UNEXPECTED_TOKEN; 1201 break; 1202 } 1203 } 1204 1205 void 1206 RuleParser::getNextToken(const UnicodeString& ruleData, 1207 int32_t *ruleIndex, 1208 UnicodeString& token, 1209 tokenType& type, 1210 UErrorCode &status) 1211 { 1212 int32_t curIndex= *ruleIndex; 1213 UChar ch; 1214 tokenType prevType=none; 1215 1216 if (U_FAILURE(status)) { 1217 return; 1218 } 1219 while (curIndex<ruleData.length()) { 1220 ch = ruleData.charAt(curIndex); 1221 if ( !inRange(ch, type) ) { 1222 status = U_ILLEGAL_CHARACTER; 1223 return; 1224 } 1225 switch (type) { 1226 case tSpace: 1227 if ( *ruleIndex != curIndex ) { // letter 1228 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1229 *ruleIndex=curIndex; 1230 type=prevType; 1231 getKeyType(token, type, status); 1232 return; 1233 } 1234 else { 1235 *ruleIndex=*ruleIndex+1; 1236 } 1237 break; // consective space 1238 case tColon: 1239 case tSemiColon: 1240 if ( *ruleIndex != curIndex ) { 1241 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1242 *ruleIndex=curIndex; 1243 type=prevType; 1244 getKeyType(token, type, status); 1245 return; 1246 } 1247 else { 1248 *ruleIndex=curIndex+1; 1249 return; 1250 } 1251 case tLetter: 1252 if ((type==prevType)||(prevType==none)) { 1253 prevType=type; 1254 break; 1255 } 1256 break; 1257 case tNumber: 1258 if ((type==prevType)||(prevType==none)) { 1259 prevType=type; 1260 break; 1261 } 1262 else { 1263 *ruleIndex=curIndex+1; 1264 return; 1265 } 1266 case tDot: 1267 if (prevType==none) { // first dot 1268 prevType=type; 1269 continue; 1270 } 1271 else { 1272 if ( *ruleIndex != curIndex ) { 1273 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1274 *ruleIndex=curIndex; // letter 1275 type=prevType; 1276 getKeyType(token, type, status); 1277 return; 1278 } 1279 else { // two consective dots 1280 *ruleIndex=curIndex+2; 1281 return; 1282 } 1283 } 1284 break; 1285 default: 1286 status = U_UNEXPECTED_TOKEN; 1287 return; 1288 } 1289 curIndex++; 1290 } 1291 if ( curIndex>=ruleData.length() ) { 1292 if ( (type == tLetter)||(type == tNumber) ) { 1293 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1294 getKeyType(token, type, status); 1295 if (U_FAILURE(status)) { 1296 return; 1297 } 1298 } 1299 *ruleIndex = ruleData.length(); 1300 } 1301 } 1302 1303 UBool 1304 RuleParser::inRange(UChar ch, tokenType& type) { 1305 if ((ch>=CAP_A) && (ch<=CAP_Z)) { 1306 // we assume all characters are in lower case already. 1307 return FALSE; 1308 } 1309 if ((ch>=LOW_A) && (ch<=LOW_Z)) { 1310 type = tLetter; 1311 return TRUE; 1312 } 1313 if ((ch>=U_ZERO) && (ch<=U_NINE)) { 1314 type = tNumber; 1315 return TRUE; 1316 } 1317 switch (ch) { 1318 case COLON: 1319 type = tColon; 1320 return TRUE; 1321 case SPACE: 1322 type = tSpace; 1323 return TRUE; 1324 case SEMI_COLON: 1325 type = tSemiColon; 1326 return TRUE; 1327 case DOT: 1328 type = tDot; 1329 return TRUE; 1330 default : 1331 type = none; 1332 return FALSE; 1333 } 1334 } 1335 1336 1337 void 1338 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status) 1339 { 1340 if (U_FAILURE(status)) { 1341 return; 1342 } 1343 if ( keyType==tNumber) { 1344 } 1345 else if (token==PK_VAR_N) { 1346 keyType = tVariableN; 1347 } 1348 else if (token==PK_IS) { 1349 keyType = tIs; 1350 } 1351 else if (token==PK_AND) { 1352 keyType = tAnd; 1353 } 1354 else if (token==PK_IN) { 1355 keyType = tIn; 1356 } 1357 else if (token==PK_WITHIN) { 1358 keyType = tWithin; 1359 } 1360 else if (token==PK_NOT) { 1361 keyType = tNot; 1362 } 1363 else if (token==PK_MOD) { 1364 keyType = tMod; 1365 } 1366 else if (token==PK_OR) { 1367 keyType = tOr; 1368 } 1369 else if ( isValidKeyword(token) ) { 1370 keyType = tKeyword; 1371 } 1372 else { 1373 status = U_UNEXPECTED_TOKEN; 1374 } 1375 } 1376 1377 UBool 1378 RuleParser::isValidKeyword(const UnicodeString& token) { 1379 return PatternProps::isIdentifier(token.getBuffer(), token.length()); 1380 } 1381 1382 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) 1383 : pos(0), fKeywordNames(status) { 1384 if (U_FAILURE(status)) { 1385 return; 1386 } 1387 fKeywordNames.setDeleter(uhash_deleteUObject); 1388 UBool addKeywordOther=TRUE; 1389 RuleChain *node=header; 1390 while(node!=NULL) { 1391 fKeywordNames.addElement(new UnicodeString(node->keyword), status); 1392 if (U_FAILURE(status)) { 1393 return; 1394 } 1395 if (node->keyword == PLURAL_KEYWORD_OTHER) { 1396 addKeywordOther= FALSE; 1397 } 1398 node=node->next; 1399 } 1400 1401 if (addKeywordOther) { 1402 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); 1403 } 1404 } 1405 1406 const UnicodeString* 1407 PluralKeywordEnumeration::snext(UErrorCode& status) { 1408 if (U_SUCCESS(status) && pos < fKeywordNames.size()) { 1409 return (const UnicodeString*)fKeywordNames.elementAt(pos++); 1410 } 1411 return NULL; 1412 } 1413 1414 void 1415 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { 1416 pos=0; 1417 } 1418 1419 int32_t 1420 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { 1421 return fKeywordNames.size(); 1422 } 1423 1424 PluralKeywordEnumeration::~PluralKeywordEnumeration() { 1425 } 1426 1427 U_NAMESPACE_END 1428 1429 1430 #endif /* #if !UCONFIG_NO_FORMATTING */ 1431 1432 //eof 1433