1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File plurrule.cpp 8 */ 9 10 #include "unicode/utypes.h" 11 #include "unicode/localpointer.h" 12 #include "unicode/plurrule.h" 13 #include "unicode/upluralrules.h" 14 #include "unicode/ures.h" 15 #include "cmemory.h" 16 #include "cstring.h" 17 #include "hash.h" 18 #include "mutex.h" 19 #include "patternprops.h" 20 #include "plurrule_impl.h" 21 #include "putilimp.h" 22 #include "ucln_in.h" 23 #include "ustrfmt.h" 24 #include "locutil.h" 25 #include "uassert.h" 26 27 #if !UCONFIG_NO_FORMATTING 28 29 U_NAMESPACE_BEGIN 30 31 // shared by all instances when lazy-initializing samples 32 static UMutex pluralMutex = U_MUTEX_INITIALIZER; 33 34 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) 35 36 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; 37 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; 38 static const UChar PK_IN[]={LOW_I,LOW_N,0}; 39 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; 40 static const UChar PK_IS[]={LOW_I,LOW_S,0}; 41 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; 42 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; 43 static const UChar PK_OR[]={LOW_O,LOW_R,0}; 44 static const UChar PK_VAR_N[]={LOW_N,0}; 45 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; 46 47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) 48 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) 49 50 PluralRules::PluralRules(UErrorCode& status) 51 : UObject(), 52 mRules(NULL), 53 mParser(NULL), 54 mSamples(NULL), 55 mSampleInfo(NULL), 56 mSampleInfoCount(0) 57 { 58 if (U_FAILURE(status)) { 59 return; 60 } 61 mParser = new RuleParser(); 62 if (mParser==NULL) { 63 status = U_MEMORY_ALLOCATION_ERROR; 64 } 65 } 66 67 PluralRules::PluralRules(const PluralRules& other) 68 : UObject(other), 69 mRules(NULL), 70 mParser(NULL), 71 mSamples(NULL), 72 mSampleInfo(NULL), 73 mSampleInfoCount(0) 74 { 75 *this=other; 76 } 77 78 PluralRules::~PluralRules() { 79 delete mRules; 80 delete mParser; 81 uprv_free(mSamples); 82 uprv_free(mSampleInfo); 83 } 84 85 PluralRules* 86 PluralRules::clone() const { 87 return new PluralRules(*this); 88 } 89 90 PluralRules& 91 PluralRules::operator=(const PluralRules& other) { 92 if (this != &other) { 93 delete mRules; 94 if (other.mRules==NULL) { 95 mRules = NULL; 96 } 97 else { 98 mRules = new RuleChain(*other.mRules); 99 } 100 delete mParser; 101 mParser = new RuleParser(); 102 103 uprv_free(mSamples); 104 mSamples = NULL; 105 106 uprv_free(mSampleInfo); 107 mSampleInfo = NULL; 108 mSampleInfoCount = 0; 109 } 110 111 return *this; 112 } 113 114 PluralRules* U_EXPORT2 115 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { 116 RuleChain rules; 117 118 if (U_FAILURE(status)) { 119 return NULL; 120 } 121 PluralRules *newRules = new PluralRules(status); 122 if ( (newRules != NULL)&& U_SUCCESS(status) ) { 123 newRules->parseDescription((UnicodeString &)description, rules, status); 124 if (U_SUCCESS(status)) { 125 newRules->addRules(rules); 126 } 127 } 128 if (U_FAILURE(status)) { 129 delete newRules; 130 return NULL; 131 } 132 else { 133 return newRules; 134 } 135 } 136 137 PluralRules* U_EXPORT2 138 PluralRules::createDefaultRules(UErrorCode& status) { 139 return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status); 140 } 141 142 PluralRules* U_EXPORT2 143 PluralRules::forLocale(const Locale& locale, UErrorCode& status) { 144 return forLocale(locale, UPLURAL_TYPE_CARDINAL, status); 145 } 146 147 PluralRules* U_EXPORT2 148 PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) { 149 RuleChain rChain; 150 if (U_FAILURE(status)) { 151 return NULL; 152 } 153 if (type >= UPLURAL_TYPE_COUNT) { 154 status = U_ILLEGAL_ARGUMENT_ERROR; 155 return NULL; 156 } 157 PluralRules *newObj = new PluralRules(status); 158 if (newObj==NULL || U_FAILURE(status)) { 159 delete newObj; 160 return NULL; 161 } 162 UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); 163 if ((locRule.length() != 0) && U_SUCCESS(status)) { 164 newObj->parseDescription(locRule, rChain, status); 165 if (U_SUCCESS(status)) { 166 newObj->addRules(rChain); 167 } 168 } 169 if (U_FAILURE(status)||(locRule.length() == 0)) { 170 // use default plural rule 171 status = U_ZERO_ERROR; 172 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE); 173 newObj->parseDescription(defRule, rChain, status); 174 newObj->addRules(rChain); 175 } 176 177 return newObj; 178 } 179 180 UnicodeString 181 PluralRules::select(int32_t number) const { 182 if (mRules == NULL) { 183 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); 184 } 185 else { 186 return mRules->select(number); 187 } 188 } 189 190 UnicodeString 191 PluralRules::select(double number) const { 192 if (mRules == NULL) { 193 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); 194 } 195 else { 196 return mRules->select(number); 197 } 198 } 199 200 StringEnumeration* 201 PluralRules::getKeywords(UErrorCode& status) const { 202 if (U_FAILURE(status)) return NULL; 203 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); 204 if (U_FAILURE(status)) { 205 delete nameEnumerator; 206 return NULL; 207 } 208 209 return nameEnumerator; 210 } 211 212 double 213 PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) { 214 double val = 0.0; 215 UErrorCode status = U_ZERO_ERROR; 216 int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status); 217 return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE; 218 } 219 220 int32_t 221 PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest, 222 int32_t destCapacity, UErrorCode& error) { 223 return getSamplesInternal(keyword, dest, destCapacity, FALSE, error); 224 } 225 226 int32_t 227 PluralRules::getSamples(const UnicodeString &keyword, double *dest, 228 int32_t destCapacity, UErrorCode& status) { 229 return getSamplesInternal(keyword, dest, destCapacity, TRUE, status); 230 } 231 232 int32_t 233 PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest, 234 int32_t destCapacity, UBool includeUnlimited, 235 UErrorCode& status) { 236 initSamples(status); 237 if (U_FAILURE(status)) { 238 return -1; 239 } 240 if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) { 241 status = U_ILLEGAL_ARGUMENT_ERROR; 242 return -1; 243 } 244 245 int32_t index = getKeywordIndex(keyword, status); 246 if (index == -1) { 247 return 0; 248 } 249 250 const int32_t LIMIT_MASK = 0x1 << 31; 251 252 if (!includeUnlimited) { 253 if ((mSampleInfo[index] & LIMIT_MASK) == 0) { 254 return -1; 255 } 256 } 257 258 int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK; 259 int32_t limit = mSampleInfo[index] & ~LIMIT_MASK; 260 int32_t len = limit - start; 261 if (len <= destCapacity) { 262 destCapacity = len; 263 } else if (includeUnlimited) { 264 len = destCapacity; // no overflow, and don't report more than we copy 265 } else { 266 status = U_BUFFER_OVERFLOW_ERROR; 267 return len; 268 } 269 for (int32_t i = 0; i < destCapacity; ++i, ++start) { 270 dest[i] = mSamples[start]; 271 } 272 return len; 273 } 274 275 276 UBool 277 PluralRules::isKeyword(const UnicodeString& keyword) const { 278 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { 279 return true; 280 } 281 else { 282 if (mRules==NULL) { 283 return false; 284 } 285 else { 286 return mRules->isKeyword(keyword); 287 } 288 } 289 } 290 291 UnicodeString 292 PluralRules::getKeywordOther() const { 293 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); 294 } 295 296 UBool 297 PluralRules::operator==(const PluralRules& other) const { 298 int32_t limit; 299 const UnicodeString *ptrKeyword; 300 UErrorCode status= U_ZERO_ERROR; 301 302 if ( this == &other ) { 303 return TRUE; 304 } 305 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); 306 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); 307 if (U_FAILURE(status)) { 308 return FALSE; 309 } 310 311 if (myKeywordList->count(status)!=otherKeywordList->count(status)) { 312 return FALSE; 313 } 314 myKeywordList->reset(status); 315 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { 316 if (!other.isKeyword(*ptrKeyword)) { 317 return FALSE; 318 } 319 } 320 otherKeywordList->reset(status); 321 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { 322 if (!this->isKeyword(*ptrKeyword)) { 323 return FALSE; 324 } 325 } 326 if (U_FAILURE(status)) { 327 return FALSE; 328 } 329 330 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) { 331 return FALSE; 332 } 333 UnicodeString myKeyword, otherKeyword; 334 for (int32_t i=0; i<limit; ++i) { 335 myKeyword = this->select(i); 336 otherKeyword = other.select(i); 337 if (myKeyword!=otherKeyword) { 338 return FALSE; 339 } 340 } 341 return TRUE; 342 } 343 344 void 345 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status) 346 { 347 int32_t ruleIndex=0; 348 UnicodeString token; 349 tokenType type; 350 tokenType prevType=none; 351 RuleChain *ruleChain=NULL; 352 AndConstraint *curAndConstraint=NULL; 353 OrConstraint *orNode=NULL; 354 RuleChain *lastChain=NULL; 355 356 if (U_FAILURE(status)) { 357 return; 358 } 359 UnicodeString ruleData = data.toLower(""); 360 while (ruleIndex< ruleData.length()) { 361 mParser->getNextToken(ruleData, &ruleIndex, token, type, status); 362 if (U_FAILURE(status)) { 363 return; 364 } 365 mParser->checkSyntax(prevType, type, status); 366 if (U_FAILURE(status)) { 367 return; 368 } 369 switch (type) { 370 case tAnd: 371 U_ASSERT(curAndConstraint != NULL); 372 curAndConstraint = curAndConstraint->add(); 373 break; 374 case tOr: 375 lastChain = &rules; 376 while (lastChain->next !=NULL) { 377 lastChain = lastChain->next; 378 } 379 orNode=lastChain->ruleHeader; 380 while (orNode->next != NULL) { 381 orNode = orNode->next; 382 } 383 orNode->next= new OrConstraint(); 384 orNode=orNode->next; 385 orNode->next=NULL; 386 curAndConstraint = orNode->add(); 387 break; 388 case tIs: 389 U_ASSERT(curAndConstraint != NULL); 390 curAndConstraint->rangeHigh=-1; 391 break; 392 case tNot: 393 U_ASSERT(curAndConstraint != NULL); 394 curAndConstraint->notIn=TRUE; 395 break; 396 case tIn: 397 U_ASSERT(curAndConstraint != NULL); 398 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; 399 curAndConstraint->integerOnly = TRUE; 400 break; 401 case tWithin: 402 U_ASSERT(curAndConstraint != NULL); 403 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; 404 break; 405 case tNumber: 406 U_ASSERT(curAndConstraint != NULL); 407 if ( (curAndConstraint->op==AndConstraint::MOD)&& 408 (curAndConstraint->opNum == -1 ) ) { 409 curAndConstraint->opNum=getNumberValue(token); 410 } 411 else { 412 if (curAndConstraint->rangeLow == -1) { 413 curAndConstraint->rangeLow=getNumberValue(token); 414 } 415 else { 416 curAndConstraint->rangeHigh=getNumberValue(token); 417 } 418 } 419 break; 420 case tMod: 421 U_ASSERT(curAndConstraint != NULL); 422 curAndConstraint->op=AndConstraint::MOD; 423 break; 424 case tKeyword: 425 if (ruleChain==NULL) { 426 ruleChain = &rules; 427 } 428 else { 429 while (ruleChain->next!=NULL){ 430 ruleChain=ruleChain->next; 431 } 432 ruleChain=ruleChain->next=new RuleChain(); 433 } 434 if (ruleChain->ruleHeader != NULL) { 435 delete ruleChain->ruleHeader; 436 } 437 orNode = ruleChain->ruleHeader = new OrConstraint(); 438 curAndConstraint = orNode->add(); 439 ruleChain->keyword = token; 440 break; 441 default: 442 break; 443 } 444 prevType=type; 445 } 446 } 447 448 int32_t 449 PluralRules::getNumberValue(const UnicodeString& token) const { 450 int32_t i; 451 char digits[128]; 452 453 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); 454 digits[i]='\0'; 455 456 return((int32_t)atoi(digits)); 457 } 458 459 460 void 461 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) { 462 int32_t i=*curIndex; 463 464 localeName.remove(); 465 while (i< localeData.length()) { 466 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) { 467 break; 468 } 469 i++; 470 } 471 472 while (i< localeData.length()) { 473 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) { 474 break; 475 } 476 localeName+=localeData.charAt(i++); 477 } 478 *curIndex=i; 479 } 480 481 482 int32_t 483 PluralRules::getRepeatLimit() const { 484 if (mRules!=NULL) { 485 return mRules->getRepeatLimit(); 486 } 487 else { 488 return 0; 489 } 490 } 491 492 int32_t 493 PluralRules::getKeywordIndex(const UnicodeString& keyword, 494 UErrorCode& status) const { 495 if (U_SUCCESS(status)) { 496 int32_t n = 0; 497 RuleChain* rc = mRules; 498 while (rc != NULL) { 499 if (rc->ruleHeader != NULL) { 500 if (rc->keyword == keyword) { 501 return n; 502 } 503 ++n; 504 } 505 rc = rc->next; 506 } 507 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { 508 return n; 509 } 510 } 511 return -1; 512 } 513 514 typedef struct SampleRecord { 515 int32_t ruleIndex; 516 double value; 517 } SampleRecord; 518 519 void 520 PluralRules::initSamples(UErrorCode& status) { 521 if (U_FAILURE(status)) { 522 return; 523 } 524 Mutex lock(&pluralMutex); 525 526 if (mSamples) { 527 return; 528 } 529 530 // Note, the original design let you have multiple rules with the same keyword. But 531 // we don't use that in our data and existing functions in this implementation don't 532 // fully support it (for example, the returned keywords is a list and not a set). 533 // 534 // So I don't support this here either. If you ask for samples, or for all values, 535 // you will get information about the first rule with that keyword, not all rules with 536 // that keyword. 537 538 int32_t maxIndex = 0; 539 int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end 540 RuleChain* rc = mRules; 541 while (rc != NULL) { 542 if (rc->ruleHeader != NULL) { 543 if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { 544 otherIndex = maxIndex; 545 } 546 ++maxIndex; 547 } 548 rc = rc->next; 549 } 550 if (otherIndex == -1) { 551 ++maxIndex; 552 } 553 554 LocalMemory<int32_t> newSampleInfo; 555 if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) { 556 status = U_MEMORY_ALLOCATION_ERROR; 557 return; 558 } 559 560 const int32_t LIMIT_MASK = 0x1 << 31; 561 562 rc = mRules; 563 int32_t n = 0; 564 while (rc != NULL) { 565 if (rc->ruleHeader != NULL) { 566 newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0; 567 } 568 rc = rc->next; 569 } 570 if (otherIndex == -1) { 571 newSampleInfo[maxIndex - 1] = 0; // unlimited 572 } 573 574 MaybeStackArray<SampleRecord, 10> newSamples; 575 int32_t sampleCount = 0; 576 577 int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2; 578 if (limit < 10) { 579 limit = 10; 580 } 581 582 for (int i = 0, keywordsRemaining = maxIndex; 583 keywordsRemaining > 0 && i < limit; 584 ++i) { 585 double val = i / 2.0; 586 587 n = 0; 588 rc = mRules; 589 int32_t found = -1; 590 while (rc != NULL) { 591 if (rc->ruleHeader != NULL) { 592 if (rc->ruleHeader->isFulfilled(val)) { 593 found = n; 594 break; 595 } 596 ++n; 597 } 598 rc = rc->next; 599 } 600 if (found == -1) { 601 // 'other'. If there is an 'other' rule, the rule set is bad since nothing 602 // should leak through, but we don't bother to report that here. 603 found = otherIndex == -1 ? maxIndex - 1 : otherIndex; 604 } 605 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set 606 continue; 607 } 608 newSampleInfo[found] += 1; // won't impact limit flag 609 610 if (sampleCount == newSamples.getCapacity()) { 611 int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2; 612 if (NULL == newSamples.resize(newCapacity, sampleCount)) { 613 status = U_MEMORY_ALLOCATION_ERROR; 614 return; 615 } 616 } 617 newSamples[sampleCount].ruleIndex = found; 618 newSamples[sampleCount].value = val; 619 ++sampleCount; 620 621 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set 622 --keywordsRemaining; 623 } 624 } 625 626 // sort the values by index, leaving order otherwise unchanged 627 // this is just a selection sort for simplicity 628 LocalMemory<double> values; 629 if (NULL == values.allocateInsteadAndCopy(sampleCount)) { 630 status = U_MEMORY_ALLOCATION_ERROR; 631 return; 632 } 633 for (int i = 0, j = 0; i < maxIndex; ++i) { 634 for (int k = 0; k < sampleCount; ++k) { 635 if (newSamples[k].ruleIndex == i) { 636 values[j++] = newSamples[k].value; 637 } 638 } 639 } 640 641 // convert array of mask/lengths to array of mask/limits 642 limit = 0; 643 for (int i = 0; i < maxIndex; ++i) { 644 int32_t info = newSampleInfo[i]; 645 int32_t len = info & ~LIMIT_MASK; 646 limit += len; 647 // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples, 648 // it's not really unlimited, so mark it as limited 649 int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK; 650 newSampleInfo[i] = limit | mask; 651 } 652 653 // ok, we've got good data 654 mSamples = values.orphan(); 655 mSampleInfo = newSampleInfo.orphan(); 656 mSampleInfoCount = maxIndex; 657 } 658 659 void 660 PluralRules::addRules(RuleChain& rules) { 661 RuleChain *newRule = new RuleChain(rules); 662 this->mRules=newRule; 663 newRule->setRepeatLimit(); 664 } 665 666 UnicodeString 667 PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) { 668 UnicodeString emptyStr; 669 670 if (U_FAILURE(errCode)) { 671 return emptyStr; 672 } 673 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode)); 674 if(U_FAILURE(errCode)) { 675 return emptyStr; 676 } 677 const char *typeKey; 678 switch (type) { 679 case UPLURAL_TYPE_CARDINAL: 680 typeKey = "locales"; 681 break; 682 case UPLURAL_TYPE_ORDINAL: 683 typeKey = "locales_ordinals"; 684 break; 685 default: 686 // Must not occur: The caller should have checked for valid types. 687 errCode = U_ILLEGAL_ARGUMENT_ERROR; 688 return emptyStr; 689 } 690 LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode)); 691 if(U_FAILURE(errCode)) { 692 return emptyStr; 693 } 694 int32_t resLen=0; 695 const char *curLocaleName=locale.getName(); 696 const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); 697 698 if (s == NULL) { 699 // Check parent locales. 700 UErrorCode status = U_ZERO_ERROR; 701 char parentLocaleName[ULOC_FULLNAME_CAPACITY]; 702 const char *curLocaleName=locale.getName(); 703 uprv_strcpy(parentLocaleName, curLocaleName); 704 705 while (uloc_getParent(parentLocaleName, parentLocaleName, 706 ULOC_FULLNAME_CAPACITY, &status) > 0) { 707 resLen=0; 708 s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); 709 if (s != NULL) { 710 errCode = U_ZERO_ERROR; 711 break; 712 } 713 status = U_ZERO_ERROR; 714 } 715 } 716 if (s==NULL) { 717 return emptyStr; 718 } 719 720 char setKey[256]; 721 UChar result[256]; 722 u_UCharsToChars(s, setKey, resLen + 1); 723 // printf("\n PluralRule: %s\n", setKey); 724 725 726 LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode)); 727 if(U_FAILURE(errCode)) { 728 return emptyStr; 729 } 730 resLen=0; 731 LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode)); 732 if (U_FAILURE(errCode)) { 733 return emptyStr; 734 } 735 736 int32_t numberKeys = ures_getSize(setRes.getAlias()); 737 char *key=NULL; 738 int32_t len=0; 739 for(int32_t i=0; i<numberKeys; ++i) { 740 int32_t keyLen; 741 resLen=0; 742 s=ures_getNextString(setRes.getAlias(), &resLen, (const char**)&key, &errCode); 743 keyLen = (int32_t)uprv_strlen(key); 744 u_charsToUChars(key, result+len, keyLen); 745 len += keyLen; 746 result[len++]=COLON; 747 uprv_memcpy(result+len, s, resLen*sizeof(UChar)); 748 len += resLen; 749 result[len++]=SEMI_COLON; 750 } 751 result[len++]=0; 752 u_UCharsToChars(result, setKey, len); 753 // printf(" Rule: %s\n", setKey); 754 755 return UnicodeString(result); 756 } 757 758 AndConstraint::AndConstraint() { 759 op = AndConstraint::NONE; 760 opNum=-1; 761 rangeLow=-1; 762 rangeHigh=-1; 763 notIn=FALSE; 764 integerOnly=FALSE; 765 next=NULL; 766 } 767 768 769 AndConstraint::AndConstraint(const AndConstraint& other) { 770 this->op = other.op; 771 this->opNum=other.opNum; 772 this->rangeLow=other.rangeLow; 773 this->rangeHigh=other.rangeHigh; 774 this->integerOnly=other.integerOnly; 775 this->notIn=other.notIn; 776 if (other.next==NULL) { 777 this->next=NULL; 778 } 779 else { 780 this->next = new AndConstraint(*other.next); 781 } 782 } 783 784 AndConstraint::~AndConstraint() { 785 if (next!=NULL) { 786 delete next; 787 } 788 } 789 790 791 UBool 792 AndConstraint::isFulfilled(double number) { 793 UBool result=TRUE; 794 double value=number; 795 796 // arrrrrrgh 797 if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) { 798 return notIn; 799 } 800 801 if ( op == MOD ) { 802 value = (int32_t)value % opNum; 803 } 804 if ( rangeHigh == -1 ) { 805 if ( rangeLow == -1 ) { 806 result = TRUE; // empty rule 807 } 808 else { 809 if ( value == rangeLow ) { 810 result = TRUE; 811 } 812 else { 813 result = FALSE; 814 } 815 } 816 } 817 else { 818 if ((rangeLow <= value) && (value <= rangeHigh)) { 819 if (integerOnly) { 820 if ( value != (int32_t)value) { 821 result = FALSE; 822 } 823 else { 824 result = TRUE; 825 } 826 } 827 else { 828 result = TRUE; 829 } 830 } 831 else { 832 result = FALSE; 833 } 834 } 835 if (notIn) { 836 return !result; 837 } 838 else { 839 return result; 840 } 841 } 842 843 UBool 844 AndConstraint::isLimited() { 845 return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD; 846 } 847 848 int32_t 849 AndConstraint::updateRepeatLimit(int32_t maxLimit) { 850 851 if ( op == MOD ) { 852 return uprv_max(opNum, maxLimit); 853 } 854 else { 855 if ( rangeHigh == -1 ) { 856 return uprv_max(rangeLow, maxLimit); 857 } 858 else{ 859 return uprv_max(rangeHigh, maxLimit); 860 } 861 } 862 } 863 864 865 AndConstraint* 866 AndConstraint::add() 867 { 868 this->next = new AndConstraint(); 869 return this->next; 870 } 871 872 OrConstraint::OrConstraint() { 873 childNode=NULL; 874 next=NULL; 875 } 876 877 OrConstraint::OrConstraint(const OrConstraint& other) { 878 if ( other.childNode == NULL ) { 879 this->childNode = NULL; 880 } 881 else { 882 this->childNode = new AndConstraint(*(other.childNode)); 883 } 884 if (other.next == NULL ) { 885 this->next = NULL; 886 } 887 else { 888 this->next = new OrConstraint(*(other.next)); 889 } 890 } 891 892 OrConstraint::~OrConstraint() { 893 if (childNode!=NULL) { 894 delete childNode; 895 } 896 if (next!=NULL) { 897 delete next; 898 } 899 } 900 901 AndConstraint* 902 OrConstraint::add() 903 { 904 OrConstraint *curOrConstraint=this; 905 { 906 while (curOrConstraint->next!=NULL) { 907 curOrConstraint = curOrConstraint->next; 908 } 909 curOrConstraint->next = NULL; 910 curOrConstraint->childNode = new AndConstraint(); 911 } 912 return curOrConstraint->childNode; 913 } 914 915 UBool 916 OrConstraint::isFulfilled(double number) { 917 OrConstraint* orRule=this; 918 UBool result=FALSE; 919 920 while (orRule!=NULL && !result) { 921 result=TRUE; 922 AndConstraint* andRule = orRule->childNode; 923 while (andRule!=NULL && result) { 924 result = andRule->isFulfilled(number); 925 andRule=andRule->next; 926 } 927 orRule = orRule->next; 928 } 929 930 return result; 931 } 932 933 UBool 934 OrConstraint::isLimited() { 935 for (OrConstraint *orc = this; orc != NULL; orc = orc->next) { 936 UBool result = FALSE; 937 for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) { 938 if (andc->isLimited()) { 939 result = TRUE; 940 break; 941 } 942 } 943 if (result == FALSE) { 944 return FALSE; 945 } 946 } 947 return TRUE; 948 } 949 950 RuleChain::RuleChain() { 951 ruleHeader=NULL; 952 next = NULL; 953 repeatLimit=0; 954 } 955 956 RuleChain::RuleChain(const RuleChain& other) { 957 this->repeatLimit = other.repeatLimit; 958 this->keyword=other.keyword; 959 if (other.ruleHeader != NULL) { 960 this->ruleHeader = new OrConstraint(*(other.ruleHeader)); 961 } 962 else { 963 this->ruleHeader = NULL; 964 } 965 if (other.next != NULL ) { 966 this->next = new RuleChain(*other.next); 967 } 968 else 969 { 970 this->next = NULL; 971 } 972 } 973 974 RuleChain::~RuleChain() { 975 if (next != NULL) { 976 delete next; 977 } 978 if ( ruleHeader != NULL ) { 979 delete ruleHeader; 980 } 981 } 982 983 UnicodeString 984 RuleChain::select(double number) const { 985 986 if ( ruleHeader != NULL ) { 987 if (ruleHeader->isFulfilled(number)) { 988 return keyword; 989 } 990 } 991 if ( next != NULL ) { 992 return next->select(number); 993 } 994 else { 995 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); 996 } 997 998 } 999 1000 void 1001 RuleChain::dumpRules(UnicodeString& result) { 1002 UChar digitString[16]; 1003 1004 if ( ruleHeader != NULL ) { 1005 result += keyword; 1006 OrConstraint* orRule=ruleHeader; 1007 while ( orRule != NULL ) { 1008 AndConstraint* andRule=orRule->childNode; 1009 while ( andRule != NULL ) { 1010 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) { 1011 result += UNICODE_STRING_SIMPLE(" n is "); 1012 if (andRule->notIn) { 1013 result += UNICODE_STRING_SIMPLE("not "); 1014 } 1015 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1016 result += UnicodeString(digitString); 1017 } 1018 else { 1019 if (andRule->op==AndConstraint::MOD) { 1020 result += UNICODE_STRING_SIMPLE(" n mod "); 1021 uprv_itou(digitString,16, andRule->opNum,10,0); 1022 result += UnicodeString(digitString); 1023 } 1024 else { 1025 result += UNICODE_STRING_SIMPLE(" n "); 1026 } 1027 if (andRule->rangeHigh==-1) { 1028 if (andRule->notIn) { 1029 result += UNICODE_STRING_SIMPLE(" is not "); 1030 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1031 result += UnicodeString(digitString); 1032 } 1033 else { 1034 result += UNICODE_STRING_SIMPLE(" is "); 1035 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1036 result += UnicodeString(digitString); 1037 } 1038 } 1039 else { 1040 if (andRule->notIn) { 1041 if ( andRule->integerOnly ) { 1042 result += UNICODE_STRING_SIMPLE(" not in "); 1043 } 1044 else { 1045 result += UNICODE_STRING_SIMPLE(" not within "); 1046 } 1047 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1048 result += UnicodeString(digitString); 1049 result += UNICODE_STRING_SIMPLE(" .. "); 1050 uprv_itou(digitString,16, andRule->rangeHigh,10,0); 1051 result += UnicodeString(digitString); 1052 } 1053 else { 1054 if ( andRule->integerOnly ) { 1055 result += UNICODE_STRING_SIMPLE(" in "); 1056 } 1057 else { 1058 result += UNICODE_STRING_SIMPLE(" within "); 1059 } 1060 uprv_itou(digitString,16, andRule->rangeLow,10,0); 1061 result += UnicodeString(digitString); 1062 result += UNICODE_STRING_SIMPLE(" .. "); 1063 uprv_itou(digitString,16, andRule->rangeHigh,10,0); 1064 } 1065 } 1066 } 1067 if ( (andRule=andRule->next) != NULL) { 1068 result.append(PK_AND, 3); 1069 } 1070 } 1071 if ( (orRule = orRule->next) != NULL ) { 1072 result.append(PK_OR, 2); 1073 } 1074 } 1075 } 1076 if ( next != NULL ) { 1077 next->dumpRules(result); 1078 } 1079 } 1080 1081 int32_t 1082 RuleChain::getRepeatLimit () { 1083 return repeatLimit; 1084 } 1085 1086 void 1087 RuleChain::setRepeatLimit () { 1088 int32_t limit=0; 1089 1090 if ( next != NULL ) { 1091 next->setRepeatLimit(); 1092 limit = next->repeatLimit; 1093 } 1094 1095 if ( ruleHeader != NULL ) { 1096 OrConstraint* orRule=ruleHeader; 1097 while ( orRule != NULL ) { 1098 AndConstraint* andRule=orRule->childNode; 1099 while ( andRule != NULL ) { 1100 limit = andRule->updateRepeatLimit(limit); 1101 andRule = andRule->next; 1102 } 1103 orRule = orRule->next; 1104 } 1105 } 1106 repeatLimit = limit; 1107 } 1108 1109 UErrorCode 1110 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { 1111 if ( arraySize < capacityOfKeywords-1 ) { 1112 keywords[arraySize++]=keyword; 1113 } 1114 else { 1115 return U_BUFFER_OVERFLOW_ERROR; 1116 } 1117 1118 if ( next != NULL ) { 1119 return next->getKeywords(capacityOfKeywords, keywords, arraySize); 1120 } 1121 else { 1122 return U_ZERO_ERROR; 1123 } 1124 } 1125 1126 UBool 1127 RuleChain::isKeyword(const UnicodeString& keywordParam) const { 1128 if ( keyword == keywordParam ) { 1129 return TRUE; 1130 } 1131 1132 if ( next != NULL ) { 1133 return next->isKeyword(keywordParam); 1134 } 1135 else { 1136 return FALSE; 1137 } 1138 } 1139 1140 1141 RuleParser::RuleParser() { 1142 } 1143 1144 RuleParser::~RuleParser() { 1145 } 1146 1147 void 1148 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status) 1149 { 1150 if (U_FAILURE(status)) { 1151 return; 1152 } 1153 switch(prevType) { 1154 case none: 1155 case tSemiColon: 1156 if (curType!=tKeyword) { 1157 status = U_UNEXPECTED_TOKEN; 1158 } 1159 break; 1160 case tVariableN : 1161 if (curType != tIs && curType != tMod && curType != tIn && 1162 curType != tNot && curType != tWithin) { 1163 status = U_UNEXPECTED_TOKEN; 1164 } 1165 break; 1166 case tZero: 1167 case tOne: 1168 case tTwo: 1169 case tFew: 1170 case tMany: 1171 case tOther: 1172 case tKeyword: 1173 if (curType != tColon) { 1174 status = U_UNEXPECTED_TOKEN; 1175 } 1176 break; 1177 case tColon : 1178 if (curType != tVariableN) { 1179 status = U_UNEXPECTED_TOKEN; 1180 } 1181 break; 1182 case tIs: 1183 if ( curType != tNumber && curType != tNot) { 1184 status = U_UNEXPECTED_TOKEN; 1185 } 1186 break; 1187 case tNot: 1188 if (curType != tNumber && curType != tIn && curType != tWithin) { 1189 status = U_UNEXPECTED_TOKEN; 1190 } 1191 break; 1192 case tMod: 1193 case tDot: 1194 case tIn: 1195 case tWithin: 1196 case tAnd: 1197 case tOr: 1198 if (curType != tNumber && curType != tVariableN) { 1199 status = U_UNEXPECTED_TOKEN; 1200 } 1201 break; 1202 case tNumber: 1203 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot && 1204 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr) 1205 { 1206 status = U_UNEXPECTED_TOKEN; 1207 } 1208 break; 1209 default: 1210 status = U_UNEXPECTED_TOKEN; 1211 break; 1212 } 1213 } 1214 1215 void 1216 RuleParser::getNextToken(const UnicodeString& ruleData, 1217 int32_t *ruleIndex, 1218 UnicodeString& token, 1219 tokenType& type, 1220 UErrorCode &status) 1221 { 1222 int32_t curIndex= *ruleIndex; 1223 UChar ch; 1224 tokenType prevType=none; 1225 1226 if (U_FAILURE(status)) { 1227 return; 1228 } 1229 while (curIndex<ruleData.length()) { 1230 ch = ruleData.charAt(curIndex); 1231 if ( !inRange(ch, type) ) { 1232 status = U_ILLEGAL_CHARACTER; 1233 return; 1234 } 1235 switch (type) { 1236 case tSpace: 1237 if ( *ruleIndex != curIndex ) { // letter 1238 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1239 *ruleIndex=curIndex; 1240 type=prevType; 1241 getKeyType(token, type, status); 1242 return; 1243 } 1244 else { 1245 *ruleIndex=*ruleIndex+1; 1246 } 1247 break; // consective space 1248 case tColon: 1249 case tSemiColon: 1250 if ( *ruleIndex != curIndex ) { 1251 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1252 *ruleIndex=curIndex; 1253 type=prevType; 1254 getKeyType(token, type, status); 1255 return; 1256 } 1257 else { 1258 *ruleIndex=curIndex+1; 1259 return; 1260 } 1261 case tLetter: 1262 if ((type==prevType)||(prevType==none)) { 1263 prevType=type; 1264 break; 1265 } 1266 break; 1267 case tNumber: 1268 if ((type==prevType)||(prevType==none)) { 1269 prevType=type; 1270 break; 1271 } 1272 else { 1273 *ruleIndex=curIndex+1; 1274 return; 1275 } 1276 case tDot: 1277 if (prevType==none) { // first dot 1278 prevType=type; 1279 continue; 1280 } 1281 else { 1282 if ( *ruleIndex != curIndex ) { 1283 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1284 *ruleIndex=curIndex; // letter 1285 type=prevType; 1286 getKeyType(token, type, status); 1287 return; 1288 } 1289 else { // two consective dots 1290 *ruleIndex=curIndex+2; 1291 return; 1292 } 1293 } 1294 default: 1295 status = U_UNEXPECTED_TOKEN; 1296 return; 1297 } 1298 curIndex++; 1299 } 1300 if ( curIndex>=ruleData.length() ) { 1301 if ( (type == tLetter)||(type == tNumber) ) { 1302 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); 1303 getKeyType(token, type, status); 1304 if (U_FAILURE(status)) { 1305 return; 1306 } 1307 } 1308 *ruleIndex = ruleData.length(); 1309 } 1310 } 1311 1312 UBool 1313 RuleParser::inRange(UChar ch, tokenType& type) { 1314 if ((ch>=CAP_A) && (ch<=CAP_Z)) { 1315 // we assume all characters are in lower case already. 1316 return FALSE; 1317 } 1318 if ((ch>=LOW_A) && (ch<=LOW_Z)) { 1319 type = tLetter; 1320 return TRUE; 1321 } 1322 if ((ch>=U_ZERO) && (ch<=U_NINE)) { 1323 type = tNumber; 1324 return TRUE; 1325 } 1326 switch (ch) { 1327 case COLON: 1328 type = tColon; 1329 return TRUE; 1330 case SPACE: 1331 type = tSpace; 1332 return TRUE; 1333 case SEMI_COLON: 1334 type = tSemiColon; 1335 return TRUE; 1336 case DOT: 1337 type = tDot; 1338 return TRUE; 1339 default : 1340 type = none; 1341 return FALSE; 1342 } 1343 } 1344 1345 1346 void 1347 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status) 1348 { 1349 if (U_FAILURE(status)) { 1350 return; 1351 } 1352 if ( keyType==tNumber) { 1353 } 1354 else if (0 == token.compare(PK_VAR_N, 1)) { 1355 keyType = tVariableN; 1356 } 1357 else if (0 == token.compare(PK_IS, 2)) { 1358 keyType = tIs; 1359 } 1360 else if (0 == token.compare(PK_AND, 3)) { 1361 keyType = tAnd; 1362 } 1363 else if (0 == token.compare(PK_IN, 2)) { 1364 keyType = tIn; 1365 } 1366 else if (0 == token.compare(PK_WITHIN, 6)) { 1367 keyType = tWithin; 1368 } 1369 else if (0 == token.compare(PK_NOT, 3)) { 1370 keyType = tNot; 1371 } 1372 else if (0 == token.compare(PK_MOD, 3)) { 1373 keyType = tMod; 1374 } 1375 else if (0 == token.compare(PK_OR, 2)) { 1376 keyType = tOr; 1377 } 1378 else if ( isValidKeyword(token) ) { 1379 keyType = tKeyword; 1380 } 1381 else { 1382 status = U_UNEXPECTED_TOKEN; 1383 } 1384 } 1385 1386 UBool 1387 RuleParser::isValidKeyword(const UnicodeString& token) { 1388 return PatternProps::isIdentifier(token.getBuffer(), token.length()); 1389 } 1390 1391 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) 1392 : pos(0), fKeywordNames(status) { 1393 if (U_FAILURE(status)) { 1394 return; 1395 } 1396 fKeywordNames.setDeleter(uprv_deleteUObject); 1397 UBool addKeywordOther=TRUE; 1398 RuleChain *node=header; 1399 while(node!=NULL) { 1400 fKeywordNames.addElement(new UnicodeString(node->keyword), status); 1401 if (U_FAILURE(status)) { 1402 return; 1403 } 1404 if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { 1405 addKeywordOther= FALSE; 1406 } 1407 node=node->next; 1408 } 1409 1410 if (addKeywordOther) { 1411 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); 1412 } 1413 } 1414 1415 const UnicodeString* 1416 PluralKeywordEnumeration::snext(UErrorCode& status) { 1417 if (U_SUCCESS(status) && pos < fKeywordNames.size()) { 1418 return (const UnicodeString*)fKeywordNames.elementAt(pos++); 1419 } 1420 return NULL; 1421 } 1422 1423 void 1424 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { 1425 pos=0; 1426 } 1427 1428 int32_t 1429 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { 1430 return fKeywordNames.size(); 1431 } 1432 1433 PluralKeywordEnumeration::~PluralKeywordEnumeration() { 1434 } 1435 1436 U_NAMESPACE_END 1437 1438 1439 #endif /* #if !UCONFIG_NO_FORMATTING */ 1440 1441 //eof 1442