Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2011, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 * File PLURRULE.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *******************************************************************************
     13 */
     14 
     15 
     16 #include "unicode/utypes.h"
     17 #include "unicode/localpointer.h"
     18 #include "unicode/plurrule.h"
     19 #include "unicode/ures.h"
     20 #include "cmemory.h"
     21 #include "cstring.h"
     22 #include "hash.h"
     23 #include "mutex.h"
     24 #include "patternprops.h"
     25 #include "plurrule_impl.h"
     26 #include "putilimp.h"
     27 #include "ucln_in.h"
     28 #include "uhash.h"
     29 #include "ustrfmt.h"
     30 #include "locutil.h"
     31 
     32 #if !UCONFIG_NO_FORMATTING
     33 
     34 U_NAMESPACE_BEGIN
     35 
     36 // shared by all instances when lazy-initializing samples
     37 static UMTX pluralMutex;
     38 
     39 #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
     40 
     41 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
     42 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
     43 static const UChar PK_IN[]={LOW_I,LOW_N,0};
     44 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
     45 static const UChar PK_IS[]={LOW_I,LOW_S,0};
     46 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
     47 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
     48 static const UChar PK_OR[]={LOW_O,LOW_R,0};
     49 static const UChar PK_VAR_N[]={LOW_N,0};
     50 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
     51 
     52 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
     53 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
     54 
     55 PluralRules::PluralRules(UErrorCode& status)
     56 :   UObject(),
     57     mRules(NULL),
     58     mParser(NULL),
     59     mSamples(NULL),
     60     mSampleInfo(NULL),
     61     mSampleInfoCount(0)
     62 {
     63     if (U_FAILURE(status)) {
     64         return;
     65     }
     66     mParser = new RuleParser();
     67     if (mParser==NULL) {
     68         status = U_MEMORY_ALLOCATION_ERROR;
     69     }
     70 }
     71 
     72 PluralRules::PluralRules(const PluralRules& other)
     73 : UObject(other),
     74     mRules(NULL),
     75   mParser(NULL),
     76   mSamples(NULL),
     77   mSampleInfo(NULL),
     78   mSampleInfoCount(0)
     79 {
     80     *this=other;
     81 }
     82 
     83 PluralRules::~PluralRules() {
     84     delete mRules;
     85     delete mParser;
     86     uprv_free(mSamples);
     87     uprv_free(mSampleInfo);
     88 }
     89 
     90 PluralRules*
     91 PluralRules::clone() const {
     92     return new PluralRules(*this);
     93 }
     94 
     95 PluralRules&
     96 PluralRules::operator=(const PluralRules& other) {
     97     if (this != &other) {
     98         delete mRules;
     99         if (other.mRules==NULL) {
    100             mRules = NULL;
    101         }
    102         else {
    103             mRules = new RuleChain(*other.mRules);
    104         }
    105         delete mParser;
    106         mParser = new RuleParser();
    107 
    108         uprv_free(mSamples);
    109         mSamples = NULL;
    110 
    111         uprv_free(mSampleInfo);
    112         mSampleInfo = NULL;
    113         mSampleInfoCount = 0;
    114     }
    115 
    116     return *this;
    117 }
    118 
    119 PluralRules* U_EXPORT2
    120 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
    121     RuleChain   rules;
    122 
    123     if (U_FAILURE(status)) {
    124         return NULL;
    125     }
    126     PluralRules *newRules = new PluralRules(status);
    127     if ( (newRules != NULL)&& U_SUCCESS(status) ) {
    128         newRules->parseDescription((UnicodeString &)description, rules, status);
    129         if (U_SUCCESS(status)) {
    130             newRules->addRules(rules);
    131         }
    132     }
    133     if (U_FAILURE(status)) {
    134         delete newRules;
    135         return NULL;
    136     }
    137     else {
    138         return newRules;
    139     }
    140 }
    141 
    142 PluralRules* U_EXPORT2
    143 PluralRules::createDefaultRules(UErrorCode& status) {
    144     return createRules(PLURAL_DEFAULT_RULE, status);
    145 }
    146 
    147 PluralRules* U_EXPORT2
    148 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
    149     RuleChain   rChain;
    150     if (U_FAILURE(status)) {
    151         return NULL;
    152     }
    153     PluralRules *newObj = new PluralRules(status);
    154     if (newObj==NULL || U_FAILURE(status)) {
    155         delete newObj;
    156         return NULL;
    157     }
    158     UnicodeString locRule = newObj->getRuleFromResource(locale, status);
    159     if ((locRule.length() != 0) && U_SUCCESS(status)) {
    160         newObj->parseDescription(locRule, rChain, status);
    161         if (U_SUCCESS(status)) {
    162             newObj->addRules(rChain);
    163         }
    164     }
    165     if (U_FAILURE(status)||(locRule.length() == 0)) {
    166         // use default plural rule
    167         status = U_ZERO_ERROR;
    168         UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
    169         newObj->parseDescription(defRule, rChain, status);
    170         newObj->addRules(rChain);
    171     }
    172 
    173     return newObj;
    174 }
    175 
    176 UnicodeString
    177 PluralRules::select(int32_t number) const {
    178     if (mRules == NULL) {
    179         return PLURAL_DEFAULT_RULE;
    180     }
    181     else {
    182         return mRules->select(number);
    183     }
    184 }
    185 
    186 UnicodeString
    187 PluralRules::select(double number) const {
    188     if (mRules == NULL) {
    189         return PLURAL_DEFAULT_RULE;
    190     }
    191     else {
    192         return mRules->select(number);
    193     }
    194 }
    195 
    196 StringEnumeration*
    197 PluralRules::getKeywords(UErrorCode& status) const {
    198     if (U_FAILURE(status))  return NULL;
    199     StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
    200     if (U_FAILURE(status)) {
    201       delete nameEnumerator;
    202       return NULL;
    203     }
    204 
    205     return nameEnumerator;
    206 }
    207 
    208 double
    209 PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
    210   double val = 0.0;
    211   UErrorCode status = U_ZERO_ERROR;
    212   int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
    213   return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
    214 }
    215 
    216 int32_t
    217 PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
    218                                  int32_t destCapacity, UErrorCode& error) {
    219     return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
    220 }
    221 
    222 int32_t
    223 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
    224                         int32_t destCapacity, UErrorCode& status) {
    225     return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
    226 }
    227 
    228 int32_t
    229 PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
    230                                 int32_t destCapacity, UBool includeUnlimited,
    231                                 UErrorCode& status) {
    232     initSamples(status);
    233     if (U_FAILURE(status)) {
    234         return -1;
    235     }
    236     if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
    237         status = U_ILLEGAL_ARGUMENT_ERROR;
    238         return -1;
    239     }
    240 
    241     int32_t index = getKeywordIndex(keyword, status);
    242     if (index == -1) {
    243         return 0;
    244     }
    245 
    246     const int32_t LIMIT_MASK = 0x1 << 31;
    247 
    248     if (!includeUnlimited) {
    249         if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
    250             return -1;
    251         }
    252     }
    253 
    254     int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
    255     int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
    256     int32_t len = limit - start;
    257     if (len <= destCapacity) {
    258         destCapacity = len;
    259     } else if (includeUnlimited) {
    260         len = destCapacity;  // no overflow, and don't report more than we copy
    261     } else {
    262         status = U_BUFFER_OVERFLOW_ERROR;
    263         return len;
    264     }
    265     for (int32_t i = 0; i < destCapacity; ++i, ++start) {
    266         dest[i] = mSamples[start];
    267     }
    268     return len;
    269 }
    270 
    271 
    272 UBool
    273 PluralRules::isKeyword(const UnicodeString& keyword) const {
    274     if ( keyword == PLURAL_KEYWORD_OTHER ) {
    275         return true;
    276     }
    277     else {
    278         if (mRules==NULL) {
    279             return false;
    280         }
    281         else {
    282             return mRules->isKeyword(keyword);
    283         }
    284     }
    285 }
    286 
    287 UnicodeString
    288 PluralRules::getKeywordOther() const {
    289     return PLURAL_KEYWORD_OTHER;
    290 }
    291 
    292 UBool
    293 PluralRules::operator==(const PluralRules& other) const  {
    294     int32_t limit;
    295     const UnicodeString *ptrKeyword;
    296     UErrorCode status= U_ZERO_ERROR;
    297 
    298     if ( this == &other ) {
    299         return TRUE;
    300     }
    301     LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
    302     LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
    303     if (U_FAILURE(status)) {
    304         return FALSE;
    305     }
    306 
    307     if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
    308         return FALSE;
    309     }
    310     myKeywordList->reset(status);
    311     while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
    312         if (!other.isKeyword(*ptrKeyword)) {
    313             return FALSE;
    314         }
    315     }
    316     otherKeywordList->reset(status);
    317     while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
    318         if (!this->isKeyword(*ptrKeyword)) {
    319             return FALSE;
    320         }
    321     }
    322     if (U_FAILURE(status)) {
    323         return FALSE;
    324     }
    325 
    326     if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
    327         return FALSE;
    328     }
    329     UnicodeString myKeyword, otherKeyword;
    330     for (int32_t i=0; i<limit; ++i) {
    331         myKeyword = this->select(i);
    332         otherKeyword = other.select(i);
    333         if (myKeyword!=otherKeyword) {
    334             return FALSE;
    335         }
    336     }
    337     return TRUE;
    338 }
    339 
    340 void
    341 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
    342 {
    343     int32_t ruleIndex=0;
    344     UnicodeString token;
    345     tokenType type;
    346     tokenType prevType=none;
    347     RuleChain *ruleChain=NULL;
    348     AndConstraint *curAndConstraint=NULL;
    349     OrConstraint *orNode=NULL;
    350     RuleChain *lastChain=NULL;
    351 
    352     if (U_FAILURE(status)) {
    353         return;
    354     }
    355     UnicodeString ruleData = data.toLower();
    356     while (ruleIndex< ruleData.length()) {
    357         mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
    358         if (U_FAILURE(status)) {
    359             return;
    360         }
    361         mParser->checkSyntax(prevType, type, status);
    362         if (U_FAILURE(status)) {
    363             return;
    364         }
    365         switch (type) {
    366         case tAnd:
    367             curAndConstraint = curAndConstraint->add();
    368             break;
    369         case tOr:
    370             lastChain = &rules;
    371             while (lastChain->next !=NULL) {
    372                 lastChain = lastChain->next;
    373             }
    374             orNode=lastChain->ruleHeader;
    375             while (orNode->next != NULL) {
    376                 orNode = orNode->next;
    377             }
    378             orNode->next= new OrConstraint();
    379             orNode=orNode->next;
    380             orNode->next=NULL;
    381             curAndConstraint = orNode->add();
    382             break;
    383         case tIs:
    384             curAndConstraint->rangeHigh=-1;
    385             break;
    386         case tNot:
    387             curAndConstraint->notIn=TRUE;
    388             break;
    389         case tIn:
    390             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
    391             curAndConstraint->integerOnly = TRUE;
    392             break;
    393         case tWithin:
    394             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
    395             break;
    396         case tNumber:
    397             if ( (curAndConstraint->op==AndConstraint::MOD)&&
    398                  (curAndConstraint->opNum == -1 ) ) {
    399                 curAndConstraint->opNum=getNumberValue(token);
    400             }
    401             else {
    402                 if (curAndConstraint->rangeLow == -1) {
    403                     curAndConstraint->rangeLow=getNumberValue(token);
    404                 }
    405                 else {
    406                     curAndConstraint->rangeHigh=getNumberValue(token);
    407                 }
    408             }
    409             break;
    410         case tMod:
    411             curAndConstraint->op=AndConstraint::MOD;
    412             break;
    413         case tKeyword:
    414             if (ruleChain==NULL) {
    415                 ruleChain = &rules;
    416             }
    417             else {
    418                 while (ruleChain->next!=NULL){
    419                     ruleChain=ruleChain->next;
    420                 }
    421                 ruleChain=ruleChain->next=new RuleChain();
    422             }
    423             if (ruleChain->ruleHeader != NULL) {
    424                 delete ruleChain->ruleHeader;
    425             }
    426             orNode = ruleChain->ruleHeader = new OrConstraint();
    427             curAndConstraint = orNode->add();
    428             ruleChain->keyword = token;
    429             break;
    430         default:
    431             break;
    432         }
    433         prevType=type;
    434     }
    435 }
    436 
    437 int32_t
    438 PluralRules::getNumberValue(const UnicodeString& token) const {
    439     int32_t i;
    440     char digits[128];
    441 
    442     i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
    443     digits[i]='\0';
    444 
    445     return((int32_t)atoi(digits));
    446 }
    447 
    448 
    449 void
    450 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
    451     int32_t i=*curIndex;
    452 
    453     localeName.remove();
    454     while (i< localeData.length()) {
    455        if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
    456            break;
    457        }
    458        i++;
    459     }
    460 
    461     while (i< localeData.length()) {
    462        if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
    463            break;
    464        }
    465        localeName+=localeData.charAt(i++);
    466     }
    467     *curIndex=i;
    468 }
    469 
    470 
    471 int32_t
    472 PluralRules::getRepeatLimit() const {
    473     if (mRules!=NULL) {
    474         return mRules->getRepeatLimit();
    475     }
    476     else {
    477         return 0;
    478     }
    479 }
    480 
    481 int32_t
    482 PluralRules::getKeywordIndex(const UnicodeString& keyword,
    483                              UErrorCode& status) const {
    484     if (U_SUCCESS(status)) {
    485         int32_t n = 0;
    486         RuleChain* rc = mRules;
    487         while (rc != NULL) {
    488             if (rc->ruleHeader != NULL) {
    489                 if (rc->keyword == keyword) {
    490                     return n;
    491                 }
    492                 ++n;
    493             }
    494             rc = rc->next;
    495         }
    496         if (keyword == PLURAL_KEYWORD_OTHER) {
    497             return n;
    498         }
    499     }
    500     return -1;
    501 }
    502 
    503 typedef struct SampleRecord {
    504     int32_t ruleIndex;
    505     double  value;
    506 } SampleRecord;
    507 
    508 void
    509 PluralRules::initSamples(UErrorCode& status) {
    510     if (U_FAILURE(status)) {
    511         return;
    512     }
    513     Mutex lock(&pluralMutex);
    514 
    515     if (mSamples) {
    516         return;
    517     }
    518 
    519     // Note, the original design let you have multiple rules with the same keyword.  But
    520     // we don't use that in our data and existing functions in this implementation don't
    521     // fully support it (for example, the returned keywords is a list and not a set).
    522     //
    523     // So I don't support this here either.  If you ask for samples, or for all values,
    524     // you will get information about the first rule with that keyword, not all rules with
    525     // that keyword.
    526 
    527     int32_t maxIndex = 0;
    528     int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
    529     RuleChain* rc = mRules;
    530     while (rc != NULL) {
    531         if (rc->ruleHeader != NULL) {
    532             if (otherIndex == -1 && rc->keyword == PLURAL_KEYWORD_OTHER) {
    533                 otherIndex = maxIndex;
    534             }
    535             ++maxIndex;
    536         }
    537         rc = rc->next;
    538     }
    539     if (otherIndex == -1) {
    540         ++maxIndex;
    541     }
    542 
    543     LocalMemory<int32_t> newSampleInfo;
    544     if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
    545         status = U_MEMORY_ALLOCATION_ERROR;
    546         return;
    547     }
    548 
    549     const int32_t LIMIT_MASK = 0x1 << 31;
    550 
    551     rc = mRules;
    552     int32_t n = 0;
    553     while (rc != NULL) {
    554         if (rc->ruleHeader != NULL) {
    555             newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
    556         }
    557         rc = rc->next;
    558     }
    559     if (otherIndex == -1) {
    560         newSampleInfo[maxIndex - 1] = 0; // unlimited
    561     }
    562 
    563     MaybeStackArray<SampleRecord, 10> newSamples;
    564     int32_t sampleCount = 0;
    565 
    566     int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
    567     if (limit < 10) {
    568         limit = 10;
    569     }
    570 
    571     for (int i = 0, keywordsRemaining = maxIndex;
    572           keywordsRemaining > 0 && i < limit;
    573           ++i) {
    574         double val = i / 2.0;
    575 
    576         n = 0;
    577         rc = mRules;
    578         int32_t found = -1;
    579         while (rc != NULL) {
    580             if (rc->ruleHeader != NULL) {
    581                 if (rc->ruleHeader->isFulfilled(val)) {
    582                     found = n;
    583                     break;
    584                 }
    585                 ++n;
    586             }
    587             rc = rc->next;
    588         }
    589         if (found == -1) {
    590             // 'other'.  If there is an 'other' rule, the rule set is bad since nothing
    591             // should leak through, but we don't bother to report that here.
    592             found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
    593         }
    594         if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
    595             continue;
    596         }
    597         newSampleInfo[found] += 1; // won't impact limit flag
    598 
    599         if (sampleCount == newSamples.getCapacity()) {
    600             int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
    601             if (NULL == newSamples.resize(newCapacity, sampleCount)) {
    602                 status = U_MEMORY_ALLOCATION_ERROR;
    603                 return;
    604             }
    605         }
    606         newSamples[sampleCount].ruleIndex = found;
    607         newSamples[sampleCount].value = val;
    608         ++sampleCount;
    609 
    610         if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
    611             --keywordsRemaining;
    612         }
    613     }
    614 
    615     // sort the values by index, leaving order otherwise unchanged
    616     // this is just a selection sort for simplicity
    617     LocalMemory<double> values;
    618     if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
    619         status = U_MEMORY_ALLOCATION_ERROR;
    620         return;
    621     }
    622     for (int i = 0, j = 0; i < maxIndex; ++i) {
    623         for (int k = 0; k < sampleCount; ++k) {
    624             if (newSamples[k].ruleIndex == i) {
    625                 values[j++] = newSamples[k].value;
    626             }
    627         }
    628     }
    629 
    630     // convert array of mask/lengths to array of mask/limits
    631     limit = 0;
    632     for (int i = 0; i < maxIndex; ++i) {
    633         int32_t info = newSampleInfo[i];
    634         int32_t len = info & ~LIMIT_MASK;
    635         limit += len;
    636         // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
    637         // it's not really unlimited, so mark it as limited
    638         int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
    639         newSampleInfo[i] = limit | mask;
    640     }
    641 
    642     // ok, we've got good data
    643     mSamples = values.orphan();
    644     mSampleInfo = newSampleInfo.orphan();
    645     mSampleInfoCount = maxIndex;
    646 }
    647 
    648 void
    649 PluralRules::addRules(RuleChain& rules) {
    650     RuleChain *newRule = new RuleChain(rules);
    651     this->mRules=newRule;
    652     newRule->setRepeatLimit();
    653 }
    654 
    655 UnicodeString
    656 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
    657     UnicodeString emptyStr;
    658 
    659     if (U_FAILURE(errCode)) {
    660         return emptyStr;
    661     }
    662     UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
    663     if(U_FAILURE(errCode)) {
    664         /* total failure, not even root could be opened */
    665         return emptyStr;
    666     }
    667     UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
    668     if(U_FAILURE(errCode)) {
    669         ures_close(rb);
    670         return emptyStr;
    671     }
    672     int32_t resLen=0;
    673     const char *curLocaleName=locale.getName();
    674     const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
    675 
    676     if (s == NULL) {
    677         // Check parent locales.
    678         UErrorCode status = U_ZERO_ERROR;
    679         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
    680         const char *curLocaleName=locale.getName();
    681         int32_t localeNameLen=0;
    682         uprv_strcpy(parentLocaleName, curLocaleName);
    683 
    684         while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
    685                                        ULOC_FULLNAME_CAPACITY, &status)) > 0) {
    686             resLen=0;
    687             s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
    688             if (s != NULL) {
    689                 errCode = U_ZERO_ERROR;
    690                 break;
    691             }
    692             status = U_ZERO_ERROR;
    693         }
    694     }
    695     if (s==NULL) {
    696         ures_close(locRes);
    697         ures_close(rb);
    698         return emptyStr;
    699     }
    700 
    701     char setKey[256];
    702     UChar result[256];
    703     u_UCharsToChars(s, setKey, resLen + 1);
    704     // printf("\n PluralRule: %s\n", setKey);
    705 
    706 
    707     UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
    708     if(U_FAILURE(errCode)) {
    709         ures_close(locRes);
    710         ures_close(rb);
    711         return emptyStr;
    712     }
    713     resLen=0;
    714     UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
    715     if (U_FAILURE(errCode)) {
    716         ures_close(ruleRes);
    717         ures_close(locRes);
    718         ures_close(rb);
    719         return emptyStr;
    720     }
    721 
    722     int32_t numberKeys = ures_getSize(setRes);
    723     char *key=NULL;
    724     int32_t len=0;
    725     for(int32_t i=0; i<numberKeys; ++i) {
    726         int32_t keyLen;
    727         resLen=0;
    728         s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
    729         keyLen = (int32_t)uprv_strlen(key);
    730         u_charsToUChars(key, result+len, keyLen);
    731         len += keyLen;
    732         result[len++]=COLON;
    733         uprv_memcpy(result+len, s, resLen*sizeof(UChar));
    734         len += resLen;
    735         result[len++]=SEMI_COLON;
    736     }
    737     result[len++]=0;
    738     u_UCharsToChars(result, setKey, len);
    739     // printf(" Rule: %s\n", setKey);
    740 
    741     ures_close(setRes);
    742     ures_close(ruleRes);
    743     ures_close(locRes);
    744     ures_close(rb);
    745     return UnicodeString(result);
    746 }
    747 
    748 AndConstraint::AndConstraint() {
    749     op = AndConstraint::NONE;
    750     opNum=-1;
    751     rangeLow=-1;
    752     rangeHigh=-1;
    753     notIn=FALSE;
    754     integerOnly=FALSE;
    755     next=NULL;
    756 }
    757 
    758 
    759 AndConstraint::AndConstraint(const AndConstraint& other) {
    760     this->op = other.op;
    761     this->opNum=other.opNum;
    762     this->rangeLow=other.rangeLow;
    763     this->rangeHigh=other.rangeHigh;
    764     this->integerOnly=other.integerOnly;
    765     this->notIn=other.notIn;
    766     if (other.next==NULL) {
    767         this->next=NULL;
    768     }
    769     else {
    770         this->next = new AndConstraint(*other.next);
    771     }
    772 }
    773 
    774 AndConstraint::~AndConstraint() {
    775     if (next!=NULL) {
    776         delete next;
    777     }
    778 }
    779 
    780 
    781 UBool
    782 AndConstraint::isFulfilled(double number) {
    783     UBool result=TRUE;
    784     double value=number;
    785 
    786     // arrrrrrgh
    787     if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
    788       return notIn;
    789     }
    790 
    791     if ( op == MOD ) {
    792         value = (int32_t)value % opNum;
    793     }
    794     if ( rangeHigh == -1 ) {
    795         if ( rangeLow == -1 ) {
    796             result = TRUE; // empty rule
    797         }
    798         else {
    799             if ( value == rangeLow ) {
    800                 result = TRUE;
    801             }
    802             else {
    803                 result = FALSE;
    804             }
    805         }
    806     }
    807     else {
    808         if ((rangeLow <= value) && (value <= rangeHigh)) {
    809             if (integerOnly) {
    810                 if ( value != (int32_t)value) {
    811                     result = FALSE;
    812                 }
    813                 else {
    814                     result = TRUE;
    815                 }
    816             }
    817             else {
    818                 result = TRUE;
    819             }
    820         }
    821         else {
    822             result = FALSE;
    823         }
    824     }
    825     if (notIn) {
    826         return !result;
    827     }
    828     else {
    829         return result;
    830     }
    831 }
    832 
    833 UBool
    834 AndConstraint::isLimited() {
    835     return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
    836 }
    837 
    838 int32_t
    839 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
    840 
    841     if ( op == MOD ) {
    842         return uprv_max(opNum, maxLimit);
    843     }
    844     else {
    845         if ( rangeHigh == -1 ) {
    846             return uprv_max(rangeLow, maxLimit);
    847         }
    848         else{
    849             return uprv_max(rangeHigh, maxLimit);
    850         }
    851     }
    852 }
    853 
    854 
    855 AndConstraint*
    856 AndConstraint::add()
    857 {
    858     this->next = new AndConstraint();
    859     return this->next;
    860 }
    861 
    862 OrConstraint::OrConstraint() {
    863     childNode=NULL;
    864     next=NULL;
    865 }
    866 
    867 OrConstraint::OrConstraint(const OrConstraint& other) {
    868     if ( other.childNode == NULL ) {
    869         this->childNode = NULL;
    870     }
    871     else {
    872         this->childNode = new AndConstraint(*(other.childNode));
    873     }
    874     if (other.next == NULL ) {
    875         this->next = NULL;
    876     }
    877     else {
    878         this->next = new OrConstraint(*(other.next));
    879     }
    880 }
    881 
    882 OrConstraint::~OrConstraint() {
    883     if (childNode!=NULL) {
    884         delete childNode;
    885     }
    886     if (next!=NULL) {
    887         delete next;
    888     }
    889 }
    890 
    891 AndConstraint*
    892 OrConstraint::add()
    893 {
    894     OrConstraint *curOrConstraint=this;
    895     {
    896         while (curOrConstraint->next!=NULL) {
    897             curOrConstraint = curOrConstraint->next;
    898         }
    899         curOrConstraint->next = NULL;
    900         curOrConstraint->childNode = new AndConstraint();
    901     }
    902     return curOrConstraint->childNode;
    903 }
    904 
    905 UBool
    906 OrConstraint::isFulfilled(double number) {
    907     OrConstraint* orRule=this;
    908     UBool result=FALSE;
    909 
    910     while (orRule!=NULL && !result) {
    911         result=TRUE;
    912         AndConstraint* andRule = orRule->childNode;
    913         while (andRule!=NULL && result) {
    914             result = andRule->isFulfilled(number);
    915             andRule=andRule->next;
    916         }
    917         orRule = orRule->next;
    918     }
    919 
    920     return result;
    921 }
    922 
    923 UBool
    924 OrConstraint::isLimited() {
    925     for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
    926         UBool result = FALSE;
    927         for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
    928             if (andc->isLimited()) {
    929                 result = TRUE;
    930                 break;
    931             }
    932         }
    933         if (result == FALSE) {
    934             return FALSE;
    935         }
    936     }
    937     return TRUE;
    938 }
    939 
    940 RuleChain::RuleChain() {
    941     ruleHeader=NULL;
    942     next = NULL;
    943     repeatLimit=0;
    944 }
    945 
    946 RuleChain::RuleChain(const RuleChain& other) {
    947     this->repeatLimit = other.repeatLimit;
    948     this->keyword=other.keyword;
    949     if (other.ruleHeader != NULL) {
    950         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
    951     }
    952     else {
    953         this->ruleHeader = NULL;
    954     }
    955     if (other.next != NULL ) {
    956         this->next = new RuleChain(*other.next);
    957     }
    958     else
    959     {
    960         this->next = NULL;
    961     }
    962 }
    963 
    964 RuleChain::~RuleChain() {
    965     if (next != NULL) {
    966         delete next;
    967     }
    968     if ( ruleHeader != NULL ) {
    969         delete ruleHeader;
    970     }
    971 }
    972 
    973 UnicodeString
    974 RuleChain::select(double number) const {
    975 
    976    if ( ruleHeader != NULL ) {
    977        if (ruleHeader->isFulfilled(number)) {
    978            return keyword;
    979        }
    980    }
    981    if ( next != NULL ) {
    982        return next->select(number);
    983    }
    984    else {
    985        return PLURAL_KEYWORD_OTHER;
    986    }
    987 
    988 }
    989 
    990 void
    991 RuleChain::dumpRules(UnicodeString& result) {
    992     UChar digitString[16];
    993 
    994     if ( ruleHeader != NULL ) {
    995         result +=  keyword;
    996         OrConstraint* orRule=ruleHeader;
    997         while ( orRule != NULL ) {
    998             AndConstraint* andRule=orRule->childNode;
    999             while ( andRule != NULL ) {
   1000                 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
   1001                     result += UNICODE_STRING_SIMPLE(" n is ");
   1002                     if (andRule->notIn) {
   1003                         result += UNICODE_STRING_SIMPLE("not ");
   1004                     }
   1005                     uprv_itou(digitString,16, andRule->rangeLow,10,0);
   1006                     result += UnicodeString(digitString);
   1007                 }
   1008                 else {
   1009                     if (andRule->op==AndConstraint::MOD) {
   1010                         result += UNICODE_STRING_SIMPLE("  n mod ");
   1011                         uprv_itou(digitString,16, andRule->opNum,10,0);
   1012                         result += UnicodeString(digitString);
   1013                     }
   1014                     else {
   1015                         result += UNICODE_STRING_SIMPLE("  n ");
   1016                     }
   1017                     if (andRule->rangeHigh==-1) {
   1018                         if (andRule->notIn) {
   1019                             result += UNICODE_STRING_SIMPLE(" is not ");
   1020                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
   1021                             result += UnicodeString(digitString);
   1022                         }
   1023                         else {
   1024                             result += UNICODE_STRING_SIMPLE(" is ");
   1025                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
   1026                             result += UnicodeString(digitString);
   1027                         }
   1028                     }
   1029                     else {
   1030                         if (andRule->notIn) {
   1031                             if ( andRule->integerOnly ) {
   1032                                 result += UNICODE_STRING_SIMPLE("  not in ");
   1033                             }
   1034                             else {
   1035                                 result += UNICODE_STRING_SIMPLE("  not within ");
   1036                             }
   1037                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
   1038                             result += UnicodeString(digitString);
   1039                             result += UNICODE_STRING_SIMPLE(" .. ");
   1040                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
   1041                             result += UnicodeString(digitString);
   1042                         }
   1043                         else {
   1044                             if ( andRule->integerOnly ) {
   1045                                 result += UNICODE_STRING_SIMPLE(" in ");
   1046                             }
   1047                             else {
   1048                                 result += UNICODE_STRING_SIMPLE(" within ");
   1049                             }
   1050                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
   1051                             result += UnicodeString(digitString);
   1052                             result += UNICODE_STRING_SIMPLE(" .. ");
   1053                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
   1054                         }
   1055                     }
   1056                 }
   1057                 if ( (andRule=andRule->next) != NULL) {
   1058                     result += PK_AND;
   1059                 }
   1060             }
   1061             if ( (orRule = orRule->next) != NULL ) {
   1062                 result += PK_OR;
   1063             }
   1064         }
   1065     }
   1066     if ( next != NULL ) {
   1067         next->dumpRules(result);
   1068     }
   1069 }
   1070 
   1071 int32_t
   1072 RuleChain::getRepeatLimit () {
   1073     return repeatLimit;
   1074 }
   1075 
   1076 void
   1077 RuleChain::setRepeatLimit () {
   1078     int32_t limit=0;
   1079 
   1080     if ( next != NULL ) {
   1081         next->setRepeatLimit();
   1082         limit = next->repeatLimit;
   1083     }
   1084 
   1085     if ( ruleHeader != NULL ) {
   1086         OrConstraint* orRule=ruleHeader;
   1087         while ( orRule != NULL ) {
   1088             AndConstraint* andRule=orRule->childNode;
   1089             while ( andRule != NULL ) {
   1090                 limit = andRule->updateRepeatLimit(limit);
   1091                 andRule = andRule->next;
   1092             }
   1093             orRule = orRule->next;
   1094         }
   1095     }
   1096     repeatLimit = limit;
   1097 }
   1098 
   1099 UErrorCode
   1100 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
   1101     if ( arraySize < capacityOfKeywords-1 ) {
   1102         keywords[arraySize++]=keyword;
   1103     }
   1104     else {
   1105         return U_BUFFER_OVERFLOW_ERROR;
   1106     }
   1107 
   1108     if ( next != NULL ) {
   1109         return next->getKeywords(capacityOfKeywords, keywords, arraySize);
   1110     }
   1111     else {
   1112         return U_ZERO_ERROR;
   1113     }
   1114 }
   1115 
   1116 UBool
   1117 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
   1118     if ( keyword == keywordParam ) {
   1119         return TRUE;
   1120     }
   1121 
   1122     if ( next != NULL ) {
   1123         return next->isKeyword(keywordParam);
   1124     }
   1125     else {
   1126         return FALSE;
   1127     }
   1128 }
   1129 
   1130 
   1131 RuleParser::RuleParser() {
   1132 }
   1133 
   1134 RuleParser::~RuleParser() {
   1135 }
   1136 
   1137 void
   1138 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
   1139 {
   1140     if (U_FAILURE(status)) {
   1141         return;
   1142     }
   1143     switch(prevType) {
   1144     case none:
   1145     case tSemiColon:
   1146         if (curType!=tKeyword) {
   1147             status = U_UNEXPECTED_TOKEN;
   1148         }
   1149         break;
   1150     case tVariableN :
   1151         if (curType != tIs && curType != tMod && curType != tIn &&
   1152             curType != tNot && curType != tWithin) {
   1153             status = U_UNEXPECTED_TOKEN;
   1154         }
   1155         break;
   1156     case tZero:
   1157     case tOne:
   1158     case tTwo:
   1159     case tFew:
   1160     case tMany:
   1161     case tOther:
   1162     case tKeyword:
   1163         if (curType != tColon) {
   1164             status = U_UNEXPECTED_TOKEN;
   1165         }
   1166         break;
   1167     case tColon :
   1168         if (curType != tVariableN) {
   1169             status = U_UNEXPECTED_TOKEN;
   1170         }
   1171         break;
   1172     case tIs:
   1173         if ( curType != tNumber && curType != tNot) {
   1174             status = U_UNEXPECTED_TOKEN;
   1175         }
   1176         break;
   1177     case tNot:
   1178         if (curType != tNumber && curType != tIn && curType != tWithin) {
   1179             status = U_UNEXPECTED_TOKEN;
   1180         }
   1181         break;
   1182     case tMod:
   1183     case tDot:
   1184     case tIn:
   1185     case tWithin:
   1186     case tAnd:
   1187     case tOr:
   1188         if (curType != tNumber && curType != tVariableN) {
   1189             status = U_UNEXPECTED_TOKEN;
   1190         }
   1191         break;
   1192     case tNumber:
   1193         if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
   1194             curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
   1195         {
   1196             status = U_UNEXPECTED_TOKEN;
   1197         }
   1198         break;
   1199     default:
   1200         status = U_UNEXPECTED_TOKEN;
   1201         break;
   1202     }
   1203 }
   1204 
   1205 void
   1206 RuleParser::getNextToken(const UnicodeString& ruleData,
   1207                          int32_t *ruleIndex,
   1208                          UnicodeString& token,
   1209                          tokenType& type,
   1210                          UErrorCode &status)
   1211 {
   1212     int32_t curIndex= *ruleIndex;
   1213     UChar ch;
   1214     tokenType prevType=none;
   1215 
   1216     if (U_FAILURE(status)) {
   1217         return;
   1218     }
   1219     while (curIndex<ruleData.length()) {
   1220         ch = ruleData.charAt(curIndex);
   1221         if ( !inRange(ch, type) ) {
   1222             status = U_ILLEGAL_CHARACTER;
   1223             return;
   1224         }
   1225         switch (type) {
   1226         case tSpace:
   1227             if ( *ruleIndex != curIndex ) { // letter
   1228                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1229                 *ruleIndex=curIndex;
   1230                 type=prevType;
   1231                 getKeyType(token, type, status);
   1232                 return;
   1233             }
   1234             else {
   1235                 *ruleIndex=*ruleIndex+1;
   1236             }
   1237             break; // consective space
   1238         case tColon:
   1239         case tSemiColon:
   1240             if ( *ruleIndex != curIndex ) {
   1241                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1242                 *ruleIndex=curIndex;
   1243                 type=prevType;
   1244                 getKeyType(token, type, status);
   1245                 return;
   1246             }
   1247             else {
   1248                 *ruleIndex=curIndex+1;
   1249                 return;
   1250             }
   1251         case tLetter:
   1252              if ((type==prevType)||(prevType==none)) {
   1253                 prevType=type;
   1254                 break;
   1255              }
   1256              break;
   1257         case tNumber:
   1258              if ((type==prevType)||(prevType==none)) {
   1259                 prevType=type;
   1260                 break;
   1261              }
   1262              else {
   1263                 *ruleIndex=curIndex+1;
   1264                 return;
   1265              }
   1266          case tDot:
   1267              if (prevType==none) {  // first dot
   1268                 prevType=type;
   1269                 continue;
   1270              }
   1271              else {
   1272                  if ( *ruleIndex != curIndex ) {
   1273                     token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1274                     *ruleIndex=curIndex;  // letter
   1275                     type=prevType;
   1276                     getKeyType(token, type, status);
   1277                     return;
   1278                  }
   1279                  else {  // two consective dots
   1280                     *ruleIndex=curIndex+2;
   1281                     return;
   1282                  }
   1283              }
   1284              break;
   1285          default:
   1286              status = U_UNEXPECTED_TOKEN;
   1287              return;
   1288         }
   1289         curIndex++;
   1290     }
   1291     if ( curIndex>=ruleData.length() ) {
   1292         if ( (type == tLetter)||(type == tNumber) ) {
   1293             token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1294             getKeyType(token, type, status);
   1295             if (U_FAILURE(status)) {
   1296                 return;
   1297             }
   1298         }
   1299         *ruleIndex = ruleData.length();
   1300     }
   1301 }
   1302 
   1303 UBool
   1304 RuleParser::inRange(UChar ch, tokenType& type) {
   1305     if ((ch>=CAP_A) && (ch<=CAP_Z)) {
   1306         // we assume all characters are in lower case already.
   1307         return FALSE;
   1308     }
   1309     if ((ch>=LOW_A) && (ch<=LOW_Z)) {
   1310         type = tLetter;
   1311         return TRUE;
   1312     }
   1313     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
   1314         type = tNumber;
   1315         return TRUE;
   1316     }
   1317     switch (ch) {
   1318     case COLON:
   1319         type = tColon;
   1320         return TRUE;
   1321     case SPACE:
   1322         type = tSpace;
   1323         return TRUE;
   1324     case SEMI_COLON:
   1325         type = tSemiColon;
   1326         return TRUE;
   1327     case DOT:
   1328         type = tDot;
   1329         return TRUE;
   1330     default :
   1331         type = none;
   1332         return FALSE;
   1333     }
   1334 }
   1335 
   1336 
   1337 void
   1338 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
   1339 {
   1340     if (U_FAILURE(status)) {
   1341         return;
   1342     }
   1343     if ( keyType==tNumber) {
   1344     }
   1345     else if (token==PK_VAR_N) {
   1346         keyType = tVariableN;
   1347     }
   1348     else if (token==PK_IS) {
   1349         keyType = tIs;
   1350     }
   1351     else if (token==PK_AND) {
   1352         keyType = tAnd;
   1353     }
   1354     else if (token==PK_IN) {
   1355         keyType = tIn;
   1356     }
   1357     else if (token==PK_WITHIN) {
   1358         keyType = tWithin;
   1359     }
   1360     else if (token==PK_NOT) {
   1361         keyType = tNot;
   1362     }
   1363     else if (token==PK_MOD) {
   1364         keyType = tMod;
   1365     }
   1366     else if (token==PK_OR) {
   1367         keyType = tOr;
   1368     }
   1369     else if ( isValidKeyword(token) ) {
   1370         keyType = tKeyword;
   1371     }
   1372     else {
   1373         status = U_UNEXPECTED_TOKEN;
   1374     }
   1375 }
   1376 
   1377 UBool
   1378 RuleParser::isValidKeyword(const UnicodeString& token) {
   1379     return PatternProps::isIdentifier(token.getBuffer(), token.length());
   1380 }
   1381 
   1382 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
   1383         : pos(0), fKeywordNames(status) {
   1384     if (U_FAILURE(status)) {
   1385         return;
   1386     }
   1387     fKeywordNames.setDeleter(uhash_deleteUObject);
   1388     UBool  addKeywordOther=TRUE;
   1389     RuleChain *node=header;
   1390     while(node!=NULL) {
   1391         fKeywordNames.addElement(new UnicodeString(node->keyword), status);
   1392         if (U_FAILURE(status)) {
   1393             return;
   1394         }
   1395         if (node->keyword == PLURAL_KEYWORD_OTHER) {
   1396             addKeywordOther= FALSE;
   1397         }
   1398         node=node->next;
   1399     }
   1400 
   1401     if (addKeywordOther) {
   1402         fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
   1403     }
   1404 }
   1405 
   1406 const UnicodeString*
   1407 PluralKeywordEnumeration::snext(UErrorCode& status) {
   1408     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
   1409         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
   1410     }
   1411     return NULL;
   1412 }
   1413 
   1414 void
   1415 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
   1416     pos=0;
   1417 }
   1418 
   1419 int32_t
   1420 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
   1421        return fKeywordNames.size();
   1422 }
   1423 
   1424 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
   1425 }
   1426 
   1427 U_NAMESPACE_END
   1428 
   1429 
   1430 #endif /* #if !UCONFIG_NO_FORMATTING */
   1431 
   1432 //eof
   1433