Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2010, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 * File PLURRULE.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *******************************************************************************
     13 */
     14 
     15 
     16 #include "unicode/uniset.h"
     17 #include "unicode/utypes.h"
     18 #include "unicode/ures.h"
     19 #include "unicode/plurrule.h"
     20 #include "cmemory.h"
     21 #include "cstring.h"
     22 #include "hash.h"
     23 #include "mutex.h"
     24 #include "plurrule_impl.h"
     25 #include "putilimp.h"
     26 #include "ucln_in.h"
     27 #include "ustrfmt.h"
     28 #include "locutil.h"
     29 
     30 /*
     31 // TODO(claireho): remove stdio
     32 #include "stdio.h"
     33 */
     34 
     35 #if !UCONFIG_NO_FORMATTING
     36 
     37 U_NAMESPACE_BEGIN
     38 
     39 
     40 #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
     41 
     42 static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
     43 static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
     44 static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
     45 static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
     46 static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
     47 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
     48 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
     49 static const UChar PK_IN[]={LOW_I,LOW_N,0};
     50 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
     51 static const UChar PK_IS[]={LOW_I,LOW_S,0};
     52 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
     53 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
     54 static const UChar PK_OR[]={LOW_O,LOW_R,0};
     55 static const UChar PK_VAR_N[]={LOW_N,0};
     56 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
     57 
     58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
     59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
     60 
     61 PluralRules::PluralRules(UErrorCode& status)
     62 :   UObject(),
     63     mRules(NULL)
     64 {
     65     if (U_FAILURE(status)) {
     66         return;
     67     }
     68     mParser = new RuleParser();
     69     if (mParser==NULL) {
     70         status = U_MEMORY_ALLOCATION_ERROR;
     71     }
     72 }
     73 
     74 PluralRules::PluralRules(const PluralRules& other)
     75 : UObject(other),
     76     mRules(NULL),
     77     mParser(new RuleParser())
     78 {
     79     *this=other;
     80 }
     81 
     82 PluralRules::~PluralRules() {
     83     delete mRules;
     84     delete mParser;
     85 }
     86 
     87 PluralRules*
     88 PluralRules::clone() const {
     89     return new PluralRules(*this);
     90 }
     91 
     92 PluralRules&
     93 PluralRules::operator=(const PluralRules& other) {
     94     if (this != &other) {
     95         delete mRules;
     96         if (other.mRules==NULL) {
     97             mRules = NULL;
     98         }
     99         else {
    100             mRules = new RuleChain(*other.mRules);
    101         }
    102         delete mParser;
    103         mParser = new RuleParser();
    104     }
    105 
    106     return *this;
    107 }
    108 
    109 PluralRules* U_EXPORT2
    110 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
    111     RuleChain   rules;
    112 
    113     if (U_FAILURE(status)) {
    114         return NULL;
    115     }
    116     PluralRules *newRules = new PluralRules(status);
    117     if ( (newRules != NULL)&& U_SUCCESS(status) ) {
    118         newRules->parseDescription((UnicodeString &)description, rules, status);
    119         if (U_SUCCESS(status)) {
    120             newRules->addRules(rules);
    121         }
    122     }
    123     if (U_FAILURE(status)) {
    124         delete newRules;
    125         return NULL;
    126     }
    127     else {
    128         return newRules;
    129     }
    130 }
    131 
    132 PluralRules* U_EXPORT2
    133 PluralRules::createDefaultRules(UErrorCode& status) {
    134     return createRules(PLURAL_DEFAULT_RULE, status);
    135 }
    136 
    137 PluralRules* U_EXPORT2
    138 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
    139     RuleChain   rChain;
    140     if (U_FAILURE(status)) {
    141         return NULL;
    142     }
    143     PluralRules *newObj = new PluralRules(status);
    144     if (newObj==NULL || U_FAILURE(status)) {
    145         return NULL;
    146     }
    147     UnicodeString locRule = newObj->getRuleFromResource(locale, status);
    148     if ((locRule.length() != 0) && U_SUCCESS(status)) {
    149         newObj->parseDescription(locRule, rChain, status);
    150         if (U_SUCCESS(status)) {
    151             newObj->addRules(rChain);
    152         }
    153     }
    154     if (U_FAILURE(status)||(locRule.length() == 0)) {
    155         // use default plural rule
    156         status = U_ZERO_ERROR;
    157         UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
    158         newObj->parseDescription(defRule, rChain, status);
    159         newObj->addRules(rChain);
    160     }
    161 
    162     return newObj;
    163 }
    164 
    165 UnicodeString
    166 PluralRules::select(int32_t number) const {
    167     if (mRules == NULL) {
    168         return PLURAL_DEFAULT_RULE;
    169     }
    170     else {
    171         return mRules->select(number);
    172     }
    173 }
    174 
    175 UnicodeString
    176 PluralRules::select(double number) const {
    177     if (mRules == NULL) {
    178         return PLURAL_DEFAULT_RULE;
    179     }
    180     else {
    181         return mRules->select(number);
    182     }
    183 }
    184 
    185 StringEnumeration*
    186 PluralRules::getKeywords(UErrorCode& status) const {
    187     if (U_FAILURE(status))  return NULL;
    188     StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
    189     if (U_FAILURE(status))  return NULL;
    190 
    191     return nameEnumerator;
    192 }
    193 
    194 
    195 UBool
    196 PluralRules::isKeyword(const UnicodeString& keyword) const {
    197     if ( keyword == PLURAL_KEYWORD_OTHER ) {
    198         return true;
    199     }
    200     else {
    201         if (mRules==NULL) {
    202             return false;
    203         }
    204         else {
    205             return mRules->isKeyword(keyword);
    206         }
    207     }
    208 }
    209 
    210 UnicodeString
    211 PluralRules::getKeywordOther() const {
    212     return PLURAL_KEYWORD_OTHER;
    213 }
    214 
    215 UBool
    216 PluralRules::operator==(const PluralRules& other) const  {
    217     int32_t limit;
    218     UBool sameList = TRUE;
    219     const UnicodeString *ptrKeyword;
    220     UErrorCode status= U_ZERO_ERROR;
    221 
    222     if ( this == &other ) {
    223         return TRUE;
    224     }
    225     StringEnumeration* myKeywordList = getKeywords(status);
    226     if (U_FAILURE(status)) {
    227         return FALSE;
    228     }
    229     StringEnumeration* otherKeywordList =other.getKeywords(status);
    230     if (U_FAILURE(status)) {
    231         return FALSE;
    232     }
    233 
    234     if (myKeywordList->count(status)!=otherKeywordList->count(status) ||
    235         U_FAILURE(status)) {
    236         sameList = FALSE;
    237     }
    238     else {
    239         myKeywordList->reset(status);
    240         if (U_FAILURE(status)) {
    241             return FALSE;
    242         }
    243         while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
    244             if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) {
    245                 sameList = FALSE;
    246             }
    247         }
    248         otherKeywordList->reset(status);
    249         if (U_FAILURE(status)) {
    250             return FALSE;
    251         }
    252         while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
    253             if (U_FAILURE(status)) {
    254                 return FALSE;
    255             }
    256             if (!this->isKeyword(*ptrKeyword))  {
    257                 sameList = FALSE;
    258             }
    259         }
    260         delete myKeywordList;
    261         delete otherKeywordList;
    262         if (!sameList) {
    263             return FALSE;
    264         }
    265     }
    266 
    267     if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
    268         return FALSE;
    269     }
    270     UnicodeString myKeyword, otherKeyword;
    271     for (int32_t i=0; i<limit; ++i) {
    272         myKeyword = this->select(i);
    273         otherKeyword = other.select(i);
    274         if (myKeyword!=otherKeyword) {
    275             return FALSE;
    276         }
    277     }
    278     return TRUE;
    279 }
    280 
    281 void
    282 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
    283 {
    284     int32_t ruleIndex=0;
    285     UnicodeString token;
    286     tokenType type;
    287     tokenType prevType=none;
    288     RuleChain *ruleChain=NULL;
    289     AndConstraint *curAndConstraint=NULL;
    290     OrConstraint *orNode=NULL;
    291     RuleChain *lastChain=NULL;
    292 
    293     if (U_FAILURE(status)) {
    294         return;
    295     }
    296     UnicodeString ruleData = data.toLower();
    297     while (ruleIndex< ruleData.length()) {
    298         mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
    299         if (U_FAILURE(status)) {
    300             return;
    301         }
    302         mParser->checkSyntax(prevType, type, status);
    303         if (U_FAILURE(status)) {
    304             return;
    305         }
    306         switch (type) {
    307         case tAnd:
    308             curAndConstraint = curAndConstraint->add();
    309             break;
    310         case tOr:
    311             lastChain = &rules;
    312             while (lastChain->next !=NULL) {
    313                 lastChain = lastChain->next;
    314             }
    315             orNode=lastChain->ruleHeader;
    316             while (orNode->next != NULL) {
    317                 orNode = orNode->next;
    318             }
    319             orNode->next= new OrConstraint();
    320             orNode=orNode->next;
    321             orNode->next=NULL;
    322             curAndConstraint = orNode->add();
    323             break;
    324         case tIs:
    325             curAndConstraint->rangeHigh=-1;
    326             break;
    327         case tNot:
    328             curAndConstraint->notIn=TRUE;
    329             break;
    330         case tIn:
    331             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
    332             curAndConstraint->integerOnly = TRUE;
    333             break;
    334         case tWithin:
    335             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
    336             break;
    337         case tNumber:
    338             if ( (curAndConstraint->op==AndConstraint::MOD)&&
    339                  (curAndConstraint->opNum == -1 ) ) {
    340                 curAndConstraint->opNum=getNumberValue(token);
    341             }
    342             else {
    343                 if (curAndConstraint->rangeLow == -1) {
    344                     curAndConstraint->rangeLow=getNumberValue(token);
    345                 }
    346                 else {
    347                     curAndConstraint->rangeHigh=getNumberValue(token);
    348                 }
    349             }
    350             break;
    351         case tMod:
    352             curAndConstraint->op=AndConstraint::MOD;
    353             break;
    354         case tKeyword:
    355             if (ruleChain==NULL) {
    356                 ruleChain = &rules;
    357             }
    358             else {
    359                 while (ruleChain->next!=NULL){
    360                     ruleChain=ruleChain->next;
    361                 }
    362                 ruleChain=ruleChain->next=new RuleChain();
    363             }
    364             orNode = ruleChain->ruleHeader = new OrConstraint();
    365             curAndConstraint = orNode->add();
    366             ruleChain->keyword = token;
    367             break;
    368         default:
    369             break;
    370         }
    371         prevType=type;
    372     }
    373 }
    374 
    375 int32_t
    376 PluralRules::getNumberValue(const UnicodeString& token) const {
    377     int32_t i;
    378     char digits[128];
    379 
    380     i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
    381     digits[i]='\0';
    382 
    383     return((int32_t)atoi(digits));
    384 }
    385 
    386 
    387 void
    388 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
    389     int32_t i=*curIndex;
    390 
    391     localeName.remove();
    392     while (i< localeData.length()) {
    393        if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
    394            break;
    395        }
    396        i++;
    397     }
    398 
    399     while (i< localeData.length()) {
    400        if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
    401            break;
    402        }
    403        localeName+=localeData.charAt(i++);
    404     }
    405     *curIndex=i;
    406 }
    407 
    408 
    409 int32_t
    410 PluralRules::getRepeatLimit() const {
    411     if (mRules!=NULL) {
    412         return mRules->getRepeatLimit();
    413     }
    414     else {
    415         return 0;
    416     }
    417 }
    418 
    419 
    420 void
    421 PluralRules::addRules(RuleChain& rules) {
    422     RuleChain *newRule = new RuleChain(rules);
    423     this->mRules=newRule;
    424     newRule->setRepeatLimit();
    425 }
    426 
    427 UnicodeString
    428 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
    429     UnicodeString emptyStr;
    430 
    431     if (U_FAILURE(errCode)) {
    432         return emptyStr;
    433     }
    434     UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
    435     if(U_FAILURE(errCode)) {
    436         /* total failure, not even root could be opened */
    437         return emptyStr;
    438     }
    439     UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
    440     if(U_FAILURE(errCode)) {
    441         ures_close(rb);
    442         return emptyStr;
    443     }
    444     int32_t resLen=0;
    445     const char *curLocaleName=locale.getName();
    446     const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
    447 
    448     if (s == NULL) {
    449         // Check parent locales.
    450         UErrorCode status = U_ZERO_ERROR;
    451         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
    452         const char *curLocaleName=locale.getName();
    453         int32_t localeNameLen=0;
    454         uprv_strcpy(parentLocaleName, curLocaleName);
    455 
    456         while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
    457                                        ULOC_FULLNAME_CAPACITY, &status)) > 0) {
    458             resLen=0;
    459             s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
    460             if (s != NULL) {
    461                 errCode = U_ZERO_ERROR;
    462                 break;
    463             }
    464             status = U_ZERO_ERROR;
    465         }
    466     }
    467     if (s==NULL) {
    468         ures_close(locRes);
    469         ures_close(rb);
    470         return emptyStr;
    471     }
    472 
    473     char setKey[256];
    474     UChar result[256];
    475     u_UCharsToChars(s, setKey, resLen + 1);
    476     // printf("\n PluralRule: %s\n", setKey);
    477 
    478 
    479     UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
    480     if(U_FAILURE(errCode)) {
    481         ures_close(locRes);
    482         ures_close(rb);
    483         return emptyStr;
    484     }
    485     resLen=0;
    486     UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
    487     if (U_FAILURE(errCode)) {
    488         ures_close(ruleRes);
    489         ures_close(locRes);
    490         ures_close(rb);
    491         return emptyStr;
    492     }
    493 
    494     int32_t numberKeys = ures_getSize(setRes);
    495     char *key=NULL;
    496     int32_t len=0;
    497     for(int32_t i=0; i<numberKeys; ++i) {
    498         int32_t keyLen;
    499         resLen=0;
    500         s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
    501         keyLen = (int32_t)uprv_strlen(key);
    502         u_charsToUChars(key, result+len, keyLen);
    503         len += keyLen;
    504         result[len++]=COLON;
    505         uprv_memcpy(result+len, s, resLen*sizeof(UChar));
    506         len += resLen;
    507         result[len++]=SEMI_COLON;
    508     }
    509     result[len++]=0;
    510     u_UCharsToChars(result, setKey, len);
    511     // printf(" Rule: %s\n", setKey);
    512 
    513     ures_close(setRes);
    514     ures_close(ruleRes);
    515     ures_close(locRes);
    516     ures_close(rb);
    517     return UnicodeString(result);
    518 
    519 }
    520 
    521 AndConstraint::AndConstraint() {
    522     op = AndConstraint::NONE;
    523     opNum=-1;
    524     rangeLow=-1;
    525     rangeHigh=-1;
    526     notIn=FALSE;
    527     integerOnly=FALSE;
    528     next=NULL;
    529 }
    530 
    531 
    532 AndConstraint::AndConstraint(const AndConstraint& other) {
    533     this->op = other.op;
    534     this->opNum=other.opNum;
    535     this->rangeLow=other.rangeLow;
    536     this->rangeHigh=other.rangeHigh;
    537     this->integerOnly=other.integerOnly;
    538     this->notIn=other.notIn;
    539     if (other.next==NULL) {
    540         this->next=NULL;
    541     }
    542     else {
    543         this->next = new AndConstraint(*other.next);
    544     }
    545 }
    546 
    547 AndConstraint::~AndConstraint() {
    548     if (next!=NULL) {
    549         delete next;
    550     }
    551 }
    552 
    553 
    554 UBool
    555 AndConstraint::isFulfilled(double number) {
    556     UBool result=TRUE;
    557     double value=number;
    558 
    559     if ( op == MOD ) {
    560         value = (int32_t)value % opNum;
    561     }
    562     if ( rangeHigh == -1 ) {
    563         if ( rangeLow == -1 ) {
    564             result = TRUE; // empty rule
    565         }
    566         else {
    567             if ( value == rangeLow ) {
    568                 result = TRUE;
    569             }
    570             else {
    571                 result = FALSE;
    572             }
    573         }
    574     }
    575     else {
    576         if ((rangeLow <= value) && (value <= rangeHigh)) {
    577             if (integerOnly) {
    578                 if ( value != (int32_t)value) {
    579                     result = FALSE;
    580                 }
    581                 else {
    582                     result = TRUE;
    583                 }
    584             }
    585             else {
    586                 result = TRUE;
    587             }
    588         }
    589         else {
    590             result = FALSE;
    591         }
    592     }
    593     if (notIn) {
    594         return !result;
    595     }
    596     else {
    597         return result;
    598     }
    599 }
    600 
    601 int32_t
    602 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
    603 
    604     if ( op == MOD ) {
    605         return uprv_max(opNum, maxLimit);
    606     }
    607     else {
    608         if ( rangeHigh == -1 ) {
    609             return uprv_max(rangeLow, maxLimit);
    610         }
    611         else{
    612             return uprv_max(rangeHigh, maxLimit);
    613         }
    614     }
    615 }
    616 
    617 
    618 AndConstraint*
    619 AndConstraint::add()
    620 {
    621     this->next = new AndConstraint();
    622     return this->next;
    623 }
    624 
    625 OrConstraint::OrConstraint() {
    626     childNode=NULL;
    627     next=NULL;
    628 }
    629 
    630 OrConstraint::OrConstraint(const OrConstraint& other) {
    631     if ( other.childNode == NULL ) {
    632         this->childNode = NULL;
    633     }
    634     else {
    635         this->childNode = new AndConstraint(*(other.childNode));
    636     }
    637     if (other.next == NULL ) {
    638         this->next = NULL;
    639     }
    640     else {
    641         this->next = new OrConstraint(*(other.next));
    642     }
    643 }
    644 
    645 OrConstraint::~OrConstraint() {
    646     if (childNode!=NULL) {
    647         delete childNode;
    648     }
    649     if (next!=NULL) {
    650         delete next;
    651     }
    652 }
    653 
    654 AndConstraint*
    655 OrConstraint::add()
    656 {
    657     OrConstraint *curOrConstraint=this;
    658     {
    659         while (curOrConstraint->next!=NULL) {
    660             curOrConstraint = curOrConstraint->next;
    661         }
    662         curOrConstraint->next = NULL;
    663         curOrConstraint->childNode = new AndConstraint();
    664     }
    665     return curOrConstraint->childNode;
    666 }
    667 
    668 UBool
    669 OrConstraint::isFulfilled(double number) {
    670     OrConstraint* orRule=this;
    671     UBool result=FALSE;
    672 
    673     while (orRule!=NULL && !result) {
    674         result=TRUE;
    675         AndConstraint* andRule = orRule->childNode;
    676         while (andRule!=NULL && result) {
    677             result = andRule->isFulfilled(number);
    678             andRule=andRule->next;
    679         }
    680         orRule = orRule->next;
    681     }
    682 
    683     return result;
    684 }
    685 
    686 
    687 RuleChain::RuleChain() {
    688     ruleHeader=NULL;
    689     next = NULL;
    690     repeatLimit=0;
    691 }
    692 
    693 RuleChain::RuleChain(const RuleChain& other) {
    694     this->repeatLimit = other.repeatLimit;
    695     this->keyword=other.keyword;
    696     if (other.ruleHeader != NULL) {
    697         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
    698     }
    699     else {
    700         this->ruleHeader = NULL;
    701     }
    702     if (other.next != NULL ) {
    703         this->next = new RuleChain(*other.next);
    704     }
    705     else
    706     {
    707         this->next = NULL;
    708     }
    709 }
    710 
    711 RuleChain::~RuleChain() {
    712     if (next != NULL) {
    713         delete next;
    714     }
    715     if ( ruleHeader != NULL ) {
    716         delete ruleHeader;
    717     }
    718 }
    719 
    720 UnicodeString
    721 RuleChain::select(double number) const {
    722 
    723    if ( ruleHeader != NULL ) {
    724        if (ruleHeader->isFulfilled(number)) {
    725            return keyword;
    726        }
    727    }
    728    if ( next != NULL ) {
    729        return next->select(number);
    730    }
    731    else {
    732        return PLURAL_KEYWORD_OTHER;
    733    }
    734 
    735 }
    736 
    737 void
    738 RuleChain::dumpRules(UnicodeString& result) {
    739     UChar digitString[16];
    740 
    741     if ( ruleHeader != NULL ) {
    742         result +=  keyword;
    743         OrConstraint* orRule=ruleHeader;
    744         while ( orRule != NULL ) {
    745             AndConstraint* andRule=orRule->childNode;
    746             while ( andRule != NULL ) {
    747                 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
    748                     result += UNICODE_STRING_SIMPLE(" n is ");
    749                     if (andRule->notIn) {
    750                         result += UNICODE_STRING_SIMPLE("not ");
    751                     }
    752                     uprv_itou(digitString,16, andRule->rangeLow,10,0);
    753                     result += UnicodeString(digitString);
    754                 }
    755                 else {
    756                     if (andRule->op==AndConstraint::MOD) {
    757                         result += UNICODE_STRING_SIMPLE("  n mod ");
    758                         uprv_itou(digitString,16, andRule->opNum,10,0);
    759                         result += UnicodeString(digitString);
    760                     }
    761                     else {
    762                         result += UNICODE_STRING_SIMPLE("  n ");
    763                     }
    764                     if (andRule->rangeHigh==-1) {
    765                         if (andRule->notIn) {
    766                             result += UNICODE_STRING_SIMPLE(" is not ");
    767                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    768                             result += UnicodeString(digitString);
    769                         }
    770                         else {
    771                             result += UNICODE_STRING_SIMPLE(" is ");
    772                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    773                             result += UnicodeString(digitString);
    774                         }
    775                     }
    776                     else {
    777                         if (andRule->notIn) {
    778                             if ( andRule->integerOnly ) {
    779                                 result += UNICODE_STRING_SIMPLE("  not in ");
    780                             }
    781                             else {
    782                                 result += UNICODE_STRING_SIMPLE("  not within ");
    783                             }
    784                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    785                             result += UnicodeString(digitString);
    786                             result += UNICODE_STRING_SIMPLE(" .. ");
    787                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
    788                             result += UnicodeString(digitString);
    789                         }
    790                         else {
    791                             if ( andRule->integerOnly ) {
    792                                 result += UNICODE_STRING_SIMPLE(" in ");
    793                             }
    794                             else {
    795                                 result += UNICODE_STRING_SIMPLE(" within ");
    796                             }
    797                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    798                             result += UnicodeString(digitString);
    799                             result += UNICODE_STRING_SIMPLE(" .. ");
    800                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
    801                         }
    802                     }
    803                 }
    804                 if ( (andRule=andRule->next) != NULL) {
    805                     result += PK_AND;
    806                 }
    807             }
    808             if ( (orRule = orRule->next) != NULL ) {
    809                 result += PK_OR;
    810             }
    811         }
    812     }
    813     if ( next != NULL ) {
    814         next->dumpRules(result);
    815     }
    816 }
    817 
    818 int32_t
    819 RuleChain::getRepeatLimit () {
    820     return repeatLimit;
    821 }
    822 
    823 void
    824 RuleChain::setRepeatLimit () {
    825     int32_t limit=0;
    826 
    827     if ( next != NULL ) {
    828         next->setRepeatLimit();
    829         limit = next->repeatLimit;
    830     }
    831 
    832     if ( ruleHeader != NULL ) {
    833         OrConstraint* orRule=ruleHeader;
    834         while ( orRule != NULL ) {
    835             AndConstraint* andRule=orRule->childNode;
    836             while ( andRule != NULL ) {
    837                 limit = andRule->updateRepeatLimit(limit);
    838                 andRule = andRule->next;
    839             }
    840             orRule = orRule->next;
    841         }
    842     }
    843     repeatLimit = limit;
    844 }
    845 
    846 UErrorCode
    847 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
    848     if ( arraySize < capacityOfKeywords-1 ) {
    849         keywords[arraySize++]=keyword;
    850     }
    851     else {
    852         return U_BUFFER_OVERFLOW_ERROR;
    853     }
    854 
    855     if ( next != NULL ) {
    856         return next->getKeywords(capacityOfKeywords, keywords, arraySize);
    857     }
    858     else {
    859         return U_ZERO_ERROR;
    860     }
    861 }
    862 
    863 UBool
    864 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
    865     if ( keyword == keywordParam ) {
    866         return TRUE;
    867     }
    868 
    869     if ( next != NULL ) {
    870         return next->isKeyword(keywordParam);
    871     }
    872     else {
    873         return FALSE;
    874     }
    875 }
    876 
    877 
    878 RuleParser::RuleParser() {
    879     UErrorCode err=U_ZERO_ERROR;
    880     const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
    881     const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
    882     idStartFilter = new UnicodeSet(idStart, err);
    883     idContinueFilter = new UnicodeSet(idContinue, err);
    884 }
    885 
    886 RuleParser::~RuleParser() {
    887     delete idStartFilter;
    888     delete idContinueFilter;
    889 }
    890 
    891 void
    892 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
    893 {
    894     if (U_FAILURE(status)) {
    895         return;
    896     }
    897     switch(prevType) {
    898     case none:
    899     case tSemiColon:
    900         if (curType!=tKeyword) {
    901             status = U_UNEXPECTED_TOKEN;
    902         }
    903         break;
    904     case tVariableN :
    905         if (curType != tIs && curType != tMod && curType != tIn &&
    906             curType != tNot && curType != tWithin) {
    907             status = U_UNEXPECTED_TOKEN;
    908         }
    909         break;
    910     case tZero:
    911     case tOne:
    912     case tTwo:
    913     case tFew:
    914     case tMany:
    915     case tOther:
    916     case tKeyword:
    917         if (curType != tColon) {
    918             status = U_UNEXPECTED_TOKEN;
    919         }
    920         break;
    921     case tColon :
    922         if (curType != tVariableN) {
    923             status = U_UNEXPECTED_TOKEN;
    924         }
    925         break;
    926     case tIs:
    927         if ( curType != tNumber && curType != tNot) {
    928             status = U_UNEXPECTED_TOKEN;
    929         }
    930         break;
    931     case tNot:
    932         if (curType != tNumber && curType != tIn && curType != tWithin) {
    933             status = U_UNEXPECTED_TOKEN;
    934         }
    935         break;
    936     case tMod:
    937     case tDot:
    938     case tIn:
    939     case tWithin:
    940     case tAnd:
    941     case tOr:
    942         if (curType != tNumber && curType != tVariableN) {
    943             status = U_UNEXPECTED_TOKEN;
    944         }
    945         break;
    946     case tNumber:
    947         if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
    948             curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
    949         {
    950             status = U_UNEXPECTED_TOKEN;
    951         }
    952         break;
    953     default:
    954         status = U_UNEXPECTED_TOKEN;
    955         break;
    956     }
    957 }
    958 
    959 void
    960 RuleParser::getNextToken(const UnicodeString& ruleData,
    961                          int32_t *ruleIndex,
    962                          UnicodeString& token,
    963                          tokenType& type,
    964                          UErrorCode &status)
    965 {
    966     int32_t curIndex= *ruleIndex;
    967     UChar ch;
    968     tokenType prevType=none;
    969 
    970     if (U_FAILURE(status)) {
    971         return;
    972     }
    973     while (curIndex<ruleData.length()) {
    974         ch = ruleData.charAt(curIndex);
    975         if ( !inRange(ch, type) ) {
    976             status = U_ILLEGAL_CHARACTER;
    977             return;
    978         }
    979         switch (type) {
    980         case tSpace:
    981             if ( *ruleIndex != curIndex ) { // letter
    982                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
    983                 *ruleIndex=curIndex;
    984                 type=prevType;
    985                 getKeyType(token, type, status);
    986                 return;
    987             }
    988             else {
    989                 *ruleIndex=*ruleIndex+1;
    990             }
    991             break; // consective space
    992         case tColon:
    993         case tSemiColon:
    994             if ( *ruleIndex != curIndex ) {
    995                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
    996                 *ruleIndex=curIndex;
    997                 type=prevType;
    998                 getKeyType(token, type, status);
    999                 return;
   1000             }
   1001             else {
   1002                 *ruleIndex=curIndex+1;
   1003                 return;
   1004             }
   1005         case tLetter:
   1006              if ((type==prevType)||(prevType==none)) {
   1007                 prevType=type;
   1008                 break;
   1009              }
   1010              break;
   1011         case tNumber:
   1012              if ((type==prevType)||(prevType==none)) {
   1013                 prevType=type;
   1014                 break;
   1015              }
   1016              else {
   1017                 *ruleIndex=curIndex+1;
   1018                 return;
   1019              }
   1020          case tDot:
   1021              if (prevType==none) {  // first dot
   1022                 prevType=type;
   1023                 continue;
   1024              }
   1025              else {
   1026                  if ( *ruleIndex != curIndex ) {
   1027                     token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1028                     *ruleIndex=curIndex;  // letter
   1029                     type=prevType;
   1030                     getKeyType(token, type, status);
   1031                     return;
   1032                  }
   1033                  else {  // two consective dots
   1034                     *ruleIndex=curIndex+2;
   1035                     return;
   1036                  }
   1037              }
   1038              break;
   1039          default:
   1040              status = U_UNEXPECTED_TOKEN;
   1041              return;
   1042         }
   1043         curIndex++;
   1044     }
   1045     if ( curIndex>=ruleData.length() ) {
   1046         if ( (type == tLetter)||(type == tNumber) ) {
   1047             token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1048             getKeyType(token, type, status);
   1049             if (U_FAILURE(status)) {
   1050                 return;
   1051             }
   1052         }
   1053         *ruleIndex = ruleData.length();
   1054     }
   1055 }
   1056 
   1057 UBool
   1058 RuleParser::inRange(UChar ch, tokenType& type) {
   1059     if ((ch>=CAP_A) && (ch<=CAP_Z)) {
   1060         // we assume all characters are in lower case already.
   1061         return FALSE;
   1062     }
   1063     if ((ch>=LOW_A) && (ch<=LOW_Z)) {
   1064         type = tLetter;
   1065         return TRUE;
   1066     }
   1067     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
   1068         type = tNumber;
   1069         return TRUE;
   1070     }
   1071     switch (ch) {
   1072     case COLON:
   1073         type = tColon;
   1074         return TRUE;
   1075     case SPACE:
   1076         type = tSpace;
   1077         return TRUE;
   1078     case SEMI_COLON:
   1079         type = tSemiColon;
   1080         return TRUE;
   1081     case DOT:
   1082         type = tDot;
   1083         return TRUE;
   1084     default :
   1085         type = none;
   1086         return FALSE;
   1087     }
   1088 }
   1089 
   1090 
   1091 void
   1092 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
   1093 {
   1094     if (U_FAILURE(status)) {
   1095         return;
   1096     }
   1097     if ( keyType==tNumber) {
   1098     }
   1099     else if (token==PK_VAR_N) {
   1100         keyType = tVariableN;
   1101     }
   1102     else if (token==PK_IS) {
   1103         keyType = tIs;
   1104     }
   1105     else if (token==PK_AND) {
   1106         keyType = tAnd;
   1107     }
   1108     else if (token==PK_IN) {
   1109         keyType = tIn;
   1110     }
   1111     else if (token==PK_WITHIN) {
   1112         keyType = tWithin;
   1113     }
   1114     else if (token==PK_NOT) {
   1115         keyType = tNot;
   1116     }
   1117     else if (token==PK_MOD) {
   1118         keyType = tMod;
   1119     }
   1120     else if (token==PK_OR) {
   1121         keyType = tOr;
   1122     }
   1123     else if ( isValidKeyword(token) ) {
   1124         keyType = tKeyword;
   1125     }
   1126     else {
   1127         status = U_UNEXPECTED_TOKEN;
   1128     }
   1129 }
   1130 
   1131 UBool
   1132 RuleParser::isValidKeyword(const UnicodeString& token) {
   1133     if ( token.length()==0 ) {
   1134         return FALSE;
   1135     }
   1136     if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
   1137         int32_t i;
   1138         for (i=1; i< token.length(); i++) {
   1139             if (idContinueFilter->contains(token.charAt(i))== FALSE) {
   1140                 return FALSE;
   1141             }
   1142         }
   1143         return TRUE;
   1144     }
   1145     else {
   1146         return FALSE;
   1147     }
   1148 }
   1149 
   1150 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
   1151 fKeywordNames(status)
   1152 {
   1153     RuleChain *node=header;
   1154     UBool  addKeywordOther=true;
   1155 
   1156     if (U_FAILURE(status)) {
   1157         return;
   1158     }
   1159     pos=0;
   1160     fKeywordNames.removeAllElements();
   1161     while(node!=NULL) {
   1162         fKeywordNames.addElement(new UnicodeString(node->keyword), status);
   1163         if (U_FAILURE(status)) {
   1164             return;
   1165         }
   1166         if (node->keyword == PLURAL_KEYWORD_OTHER) {
   1167             addKeywordOther= false;
   1168         }
   1169         node=node->next;
   1170     }
   1171 
   1172     if (addKeywordOther) {
   1173         fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
   1174         if (U_FAILURE(status)) {
   1175             return;
   1176         }
   1177     }
   1178 }
   1179 
   1180 const UnicodeString*
   1181 PluralKeywordEnumeration::snext(UErrorCode& status) {
   1182     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
   1183         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
   1184     }
   1185     return NULL;
   1186 }
   1187 
   1188 void
   1189 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
   1190     pos=0;
   1191 }
   1192 
   1193 int32_t
   1194 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
   1195        return fKeywordNames.size();
   1196 }
   1197 
   1198 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
   1199     UnicodeString *s;
   1200     for (int32_t i=0; i<fKeywordNames.size(); ++i) {
   1201         if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
   1202             delete s;
   1203         }
   1204     }
   1205 }
   1206 
   1207 U_NAMESPACE_END
   1208 
   1209 
   1210 #endif /* #if !UCONFIG_NO_FORMATTING */
   1211 
   1212 //eof
   1213