Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2009, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 * File PLURRULE.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *******************************************************************************
     13 */
     14 
     15 
     16 #include "unicode/uniset.h"
     17 #include "unicode/utypes.h"
     18 #include "unicode/ures.h"
     19 #include "unicode/plurrule.h"
     20 #include "cmemory.h"
     21 #include "cstring.h"
     22 #include "hash.h"
     23 #include "mutex.h"
     24 #include "plurrule_impl.h"
     25 #include "putilimp.h"
     26 #include "ucln_in.h"
     27 #include "ustrfmt.h"
     28 #include "locutil.h"
     29 
     30 /*
     31 // TODO(claireho): remove stdio
     32 #include "stdio.h"
     33 */
     34 
     35 #if !UCONFIG_NO_FORMATTING
     36 
     37 U_NAMESPACE_BEGIN
     38 
     39 
     40 #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
     41 
     42 static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
     43 static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
     44 static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
     45 static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
     46 static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
     47 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
     48 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
     49 static const UChar PK_IN[]={LOW_I,LOW_N,0};
     50 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
     51 static const UChar PK_IS[]={LOW_I,LOW_S,0};
     52 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
     53 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
     54 static const UChar PK_OR[]={LOW_O,LOW_R,0};
     55 static const UChar PK_VAR_N[]={LOW_N,0};
     56 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
     57 
     58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
     59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
     60 
     61 PluralRules::PluralRules(UErrorCode& status)
     62 :   UObject(),
     63     mRules(NULL)
     64 {
     65     if (U_FAILURE(status)) {
     66         return;
     67     }
     68     mParser = new RuleParser();
     69     if (mParser==NULL) {
     70         status = U_MEMORY_ALLOCATION_ERROR;
     71     }
     72 }
     73 
     74 PluralRules::PluralRules(const PluralRules& other)
     75 : UObject(other),
     76     mRules(NULL),
     77     mParser(new RuleParser())
     78 {
     79     *this=other;
     80 }
     81 
     82 PluralRules::~PluralRules() {
     83     delete mRules;
     84     delete mParser;
     85 }
     86 
     87 PluralRules*
     88 PluralRules::clone() const {
     89     return new PluralRules(*this);
     90 }
     91 
     92 PluralRules&
     93 PluralRules::operator=(const PluralRules& other) {
     94     if (this != &other) {
     95         delete mRules;
     96         if (other.mRules==NULL) {
     97             mRules = NULL;
     98         }
     99         else {
    100             mRules = new RuleChain(*other.mRules);
    101         }
    102         delete mParser;
    103         mParser = new RuleParser();
    104     }
    105 
    106     return *this;
    107 }
    108 
    109 PluralRules* U_EXPORT2
    110 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
    111     RuleChain   rules;
    112 
    113     if (U_FAILURE(status)) {
    114         return NULL;
    115     }
    116     PluralRules *newRules = new PluralRules(status);
    117     if ( (newRules != NULL)&& U_SUCCESS(status) ) {
    118         newRules->parseDescription((UnicodeString &)description, rules, status);
    119         if (U_SUCCESS(status)) {
    120             newRules->addRules(rules);
    121         }
    122     }
    123     if (U_FAILURE(status)) {
    124         delete newRules;
    125         return NULL;
    126     }
    127     else {
    128         return newRules;
    129     }
    130 }
    131 
    132 PluralRules* U_EXPORT2
    133 PluralRules::createDefaultRules(UErrorCode& status) {
    134     return createRules(PLURAL_DEFAULT_RULE, status);
    135 }
    136 
    137 PluralRules* U_EXPORT2
    138 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
    139     RuleChain   rChain;
    140     if (U_FAILURE(status)) {
    141         return NULL;
    142     }
    143     PluralRules *newObj = new PluralRules(status);
    144     if (newObj==NULL || U_FAILURE(status)) {
    145         return NULL;
    146     }
    147     UnicodeString locRule = newObj->getRuleFromResource(locale, status);
    148     if ((locRule.length() != 0) && U_SUCCESS(status)) {
    149         newObj->parseDescription(locRule, rChain, status);
    150         if (U_SUCCESS(status)) {
    151             newObj->addRules(rChain);
    152         }
    153     }
    154     if (U_FAILURE(status)||(locRule.length() == 0)) {
    155         // use default plural rule
    156         status = U_ZERO_ERROR;
    157         UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
    158         newObj->parseDescription(defRule, rChain, status);
    159         newObj->addRules(rChain);
    160     }
    161 
    162     return newObj;
    163 }
    164 
    165 UnicodeString
    166 PluralRules::select(int32_t number) const {
    167     if (mRules == NULL) {
    168         return PLURAL_DEFAULT_RULE;
    169     }
    170     else {
    171         return mRules->select(number);
    172     }
    173 }
    174 
    175 UnicodeString
    176 PluralRules::select(double number) const {
    177     if (mRules == NULL) {
    178         return PLURAL_DEFAULT_RULE;
    179     }
    180     else {
    181         return mRules->select(number);
    182     }
    183 }
    184 
    185 StringEnumeration*
    186 PluralRules::getKeywords(UErrorCode& status) const {
    187     if (U_FAILURE(status))  return NULL;
    188     StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
    189     if (U_FAILURE(status))  return NULL;
    190 
    191     return nameEnumerator;
    192 }
    193 
    194 
    195 UBool
    196 PluralRules::isKeyword(const UnicodeString& keyword) const {
    197     if ( keyword == PLURAL_KEYWORD_OTHER ) {
    198         return true;
    199     }
    200     else {
    201         if (mRules==NULL) {
    202             return false;
    203         }
    204         else {
    205             return mRules->isKeyword(keyword);
    206         }
    207     }
    208 }
    209 
    210 UnicodeString
    211 PluralRules::getKeywordOther() const {
    212     return PLURAL_KEYWORD_OTHER;
    213 }
    214 
    215 UBool
    216 PluralRules::operator==(const PluralRules& other) const  {
    217     int32_t limit;
    218     UBool sameList = TRUE;
    219     const UnicodeString *ptrKeyword;
    220     UErrorCode status= U_ZERO_ERROR;
    221 
    222     if ( this == &other ) {
    223         return TRUE;
    224     }
    225     StringEnumeration* myKeywordList = getKeywords(status);
    226     if (U_FAILURE(status)) {
    227         return FALSE;
    228     }
    229     StringEnumeration* otherKeywordList =other.getKeywords(status);
    230     if (U_FAILURE(status)) {
    231         return FALSE;
    232     }
    233 
    234     if (myKeywordList->count(status)!=otherKeywordList->count(status) ||
    235         U_FAILURE(status)) {
    236         sameList = FALSE;
    237     }
    238     else {
    239         myKeywordList->reset(status);
    240         if (U_FAILURE(status)) {
    241             return FALSE;
    242         }
    243         while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
    244             if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) {
    245                 sameList = FALSE;
    246             }
    247         }
    248         otherKeywordList->reset(status);
    249         if (U_FAILURE(status)) {
    250             return FALSE;
    251         }
    252         while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
    253             if (U_FAILURE(status)) {
    254                 return FALSE;
    255             }
    256             if (!this->isKeyword(*ptrKeyword))  {
    257                 sameList = FALSE;
    258             }
    259         }
    260         delete myKeywordList;
    261         delete otherKeywordList;
    262         if (!sameList) {
    263             return FALSE;
    264         }
    265     }
    266 
    267     if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
    268         return FALSE;
    269     }
    270     UnicodeString myKeyword, otherKeyword;
    271     for (int32_t i=0; i<limit; ++i) {
    272         myKeyword = this->select(i);
    273         otherKeyword = other.select(i);
    274         if (myKeyword!=otherKeyword) {
    275             return FALSE;
    276         }
    277     }
    278     return TRUE;
    279 }
    280 
    281 void
    282 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
    283 {
    284     int32_t ruleIndex=0;
    285     UnicodeString token;
    286     tokenType type;
    287     tokenType prevType=none;
    288     RuleChain *ruleChain=NULL;
    289     AndConstraint *curAndConstraint=NULL;
    290     OrConstraint *orNode=NULL;
    291     RuleChain *lastChain=NULL;
    292 
    293     if (U_FAILURE(status)) {
    294         return;
    295     }
    296     UnicodeString ruleData = data.toLower();
    297     while (ruleIndex< ruleData.length()) {
    298         mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
    299         if (U_FAILURE(status)) {
    300             return;
    301         }
    302         mParser->checkSyntax(prevType, type, status);
    303         if (U_FAILURE(status)) {
    304             return;
    305         }
    306         switch (type) {
    307         case tAnd:
    308             curAndConstraint = curAndConstraint->add();
    309             break;
    310         case tOr:
    311             lastChain = &rules;
    312             while (lastChain->next !=NULL) {
    313                 lastChain = lastChain->next;
    314             }
    315             orNode=lastChain->ruleHeader;
    316             while (orNode->next != NULL) {
    317                 orNode = orNode->next;
    318             }
    319             orNode->next= new OrConstraint();
    320             orNode=orNode->next;
    321             orNode->next=NULL;
    322             curAndConstraint = orNode->add();
    323             break;
    324         case tIs:
    325             curAndConstraint->rangeHigh=-1;
    326             break;
    327         case tNot:
    328             curAndConstraint->notIn=TRUE;
    329             break;
    330         case tIn:
    331             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
    332             curAndConstraint->integerOnly = TRUE;
    333             break;
    334         case tWithin:
    335             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
    336             break;
    337         case tNumber:
    338             if ( (curAndConstraint->op==AndConstraint::MOD)&&
    339                  (curAndConstraint->opNum == -1 ) ) {
    340                 curAndConstraint->opNum=getNumberValue(token);
    341             }
    342             else {
    343                 if (curAndConstraint->rangeLow == -1) {
    344                     curAndConstraint->rangeLow=getNumberValue(token);
    345                 }
    346                 else {
    347                     curAndConstraint->rangeHigh=getNumberValue(token);
    348                 }
    349             }
    350             break;
    351         case tMod:
    352             curAndConstraint->op=AndConstraint::MOD;
    353             break;
    354         case tKeyword:
    355             if (ruleChain==NULL) {
    356                 ruleChain = &rules;
    357             }
    358             else {
    359                 while (ruleChain->next!=NULL){
    360                     ruleChain=ruleChain->next;
    361                 }
    362                 ruleChain=ruleChain->next=new RuleChain();
    363             }
    364             orNode = ruleChain->ruleHeader = new OrConstraint();
    365             curAndConstraint = orNode->add();
    366             ruleChain->keyword = token;
    367             break;
    368         default:
    369             break;
    370         }
    371         prevType=type;
    372     }
    373 }
    374 
    375 int32_t
    376 PluralRules::getNumberValue(const UnicodeString& token) const {
    377     int32_t i;
    378     char digits[128];
    379 
    380     i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
    381     digits[i]='\0';
    382 
    383     return((int32_t)atoi(digits));
    384 }
    385 
    386 
    387 void
    388 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
    389     int32_t i=*curIndex;
    390 
    391     localeName.remove();
    392     while (i< localeData.length()) {
    393        if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
    394            break;
    395        }
    396        i++;
    397     }
    398 
    399     while (i< localeData.length()) {
    400        if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
    401            break;
    402        }
    403        localeName+=localeData.charAt(i++);
    404     }
    405     *curIndex=i;
    406 }
    407 
    408 
    409 int32_t
    410 PluralRules::getRepeatLimit() const {
    411     if (mRules!=NULL) {
    412         return mRules->getRepeatLimit();
    413     }
    414     else {
    415         return 0;
    416     }
    417 }
    418 
    419 
    420 void
    421 PluralRules::addRules(RuleChain& rules) {
    422     RuleChain *newRule = new RuleChain(rules);
    423     this->mRules=newRule;
    424     newRule->setRepeatLimit();
    425 }
    426 
    427 UnicodeString
    428 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
    429     UnicodeString emptyStr;
    430 
    431     if (U_FAILURE(errCode)) {
    432         return emptyStr;
    433     }
    434     UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
    435     if(U_FAILURE(errCode)) {
    436         /* total failure, not even root could be opened */
    437         return emptyStr;
    438     }
    439     UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
    440     if(U_FAILURE(errCode)) {
    441         ures_close(rb);
    442         return emptyStr;
    443     }
    444     int32_t resLen=0;
    445     const char *curLocaleName=locale.getName();
    446     const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
    447 
    448     if (s == NULL) {
    449         // Check parent locales.
    450         UErrorCode status = U_ZERO_ERROR;
    451         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
    452         const char *curLocaleName=locale.getName();
    453         int32_t localeNameLen=0;
    454         uprv_strcpy(parentLocaleName, curLocaleName);
    455 
    456         while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
    457                                        ULOC_FULLNAME_CAPACITY, &status)) > 0) {
    458             resLen=0;
    459             s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
    460             if (s != NULL) {
    461                 errCode = U_ZERO_ERROR;
    462                 break;
    463             }
    464             status = U_ZERO_ERROR;
    465         }
    466     }
    467     if (s==NULL) {
    468         ures_close(locRes);
    469         ures_close(rb);
    470         return emptyStr;
    471     }
    472 
    473     char setKey[256];
    474     UChar result[256];
    475     u_UCharsToChars(s, setKey, resLen + 1);
    476     // printf("\n PluralRule: %s\n", setKey);
    477 
    478 
    479     UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
    480     if(U_FAILURE(errCode)) {
    481         ures_close(locRes);
    482         ures_close(rb);
    483         return emptyStr;
    484     }
    485     resLen=0;
    486     UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
    487     if (U_FAILURE(errCode)) {
    488         ures_close(ruleRes);
    489         ures_close(locRes);
    490         ures_close(rb);
    491         return emptyStr;
    492     }
    493 
    494     int32_t numberKeys = ures_getSize(setRes);
    495     char *key=NULL;
    496     int32_t len=0;
    497     for(int32_t i=0; i<numberKeys; ++i) {
    498         int32_t keyLen;
    499         resLen=0;
    500         s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
    501         keyLen = (int32_t)uprv_strlen(key);
    502         u_charsToUChars(key, result+len, keyLen);
    503         len += keyLen;
    504         result[len++]=COLON;
    505         uprv_memcpy(result+len, s, resLen*sizeof(UChar));
    506         len += resLen;
    507         result[len++]=SEMI_COLON;
    508     }
    509     result[len++]=0;
    510     u_UCharsToChars(result, setKey, len);
    511     // printf(" Rule: %s\n", setKey);
    512 
    513     ures_close(setRes);
    514     ures_close(ruleRes);
    515     ures_close(locRes);
    516     ures_close(rb);
    517     return UnicodeString(result);
    518 
    519 }
    520 
    521 AndConstraint::AndConstraint() {
    522     op = AndConstraint::NONE;
    523     opNum=-1;
    524     rangeLow=-1;
    525     rangeHigh=-1;
    526     notIn=FALSE;
    527     integerOnly=FALSE;
    528     next=NULL;
    529 }
    530 
    531 
    532 AndConstraint::AndConstraint(const AndConstraint& other) {
    533     this->op = other.op;
    534     this->opNum=other.opNum;
    535     this->rangeLow=other.rangeLow;
    536     this->rangeHigh=other.rangeHigh;
    537     this->integerOnly=other.integerOnly;
    538     this->notIn=other.notIn;
    539     if (other.next==NULL) {
    540         this->next=NULL;
    541     }
    542     else {
    543         this->next = new AndConstraint(*other.next);
    544     }
    545 }
    546 
    547 AndConstraint::~AndConstraint() {
    548     if (next!=NULL) {
    549         delete next;
    550     }
    551 }
    552 
    553 
    554 UBool
    555 AndConstraint::isFulfilled(double number) {
    556     UBool result=TRUE;
    557     double value=number;
    558 
    559     if ( op == MOD ) {
    560         value = (int32_t)value % opNum;
    561     }
    562     if ( rangeHigh == -1 ) {
    563         if ( rangeLow == -1 ) {
    564             result = TRUE; // empty rule
    565         }
    566         else {
    567             if ( value == rangeLow ) {
    568                 result = TRUE;
    569             }
    570             else {
    571                 result = FALSE;
    572             }
    573         }
    574     }
    575     else {
    576         if ((rangeLow <= value) && (value <= rangeHigh)) {
    577             if (integerOnly) {
    578                 if ( value != (int32_t)value) {
    579                     result = FALSE;
    580                 }
    581                 else {
    582                     result = TRUE;
    583                 }
    584             }
    585             else {
    586                 result = TRUE;
    587             }
    588         }
    589         else {
    590             result = FALSE;
    591         }
    592     }
    593     if (notIn) {
    594         return !result;
    595     }
    596     else {
    597         return result;
    598     }
    599 }
    600 
    601 int32_t
    602 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
    603 
    604     if ( op == MOD ) {
    605         return uprv_max(opNum, maxLimit);
    606     }
    607     else {
    608         if ( rangeHigh == -1 ) {
    609             return(rangeLow>maxLimit? rangeLow : maxLimit);
    610             return uprv_max(rangeLow, maxLimit);
    611         }
    612         else{
    613             return uprv_max(rangeHigh, maxLimit);
    614         }
    615     }
    616 }
    617 
    618 
    619 AndConstraint*
    620 AndConstraint::add()
    621 {
    622     this->next = new AndConstraint();
    623     return this->next;
    624 }
    625 
    626 OrConstraint::OrConstraint() {
    627     childNode=NULL;
    628     next=NULL;
    629 }
    630 
    631 OrConstraint::OrConstraint(const OrConstraint& other) {
    632     if ( other.childNode == NULL ) {
    633         this->childNode = NULL;
    634     }
    635     else {
    636         this->childNode = new AndConstraint(*(other.childNode));
    637     }
    638     if (other.next == NULL ) {
    639         this->next = NULL;
    640     }
    641     else {
    642         this->next = new OrConstraint(*(other.next));
    643     }
    644 }
    645 
    646 OrConstraint::~OrConstraint() {
    647     if (childNode!=NULL) {
    648         delete childNode;
    649     }
    650     if (next!=NULL) {
    651         delete next;
    652     }
    653 }
    654 
    655 AndConstraint*
    656 OrConstraint::add()
    657 {
    658     OrConstraint *curOrConstraint=this;
    659     {
    660         while (curOrConstraint->next!=NULL) {
    661             curOrConstraint = curOrConstraint->next;
    662         }
    663         curOrConstraint->next = NULL;
    664         curOrConstraint->childNode = new AndConstraint();
    665     }
    666     return curOrConstraint->childNode;
    667 }
    668 
    669 UBool
    670 OrConstraint::isFulfilled(double number) {
    671     OrConstraint* orRule=this;
    672     UBool result=FALSE;
    673 
    674     while (orRule!=NULL && !result) {
    675         result=TRUE;
    676         AndConstraint* andRule = orRule->childNode;
    677         while (andRule!=NULL && result) {
    678             result = andRule->isFulfilled(number);
    679             andRule=andRule->next;
    680         }
    681         orRule = orRule->next;
    682     }
    683 
    684     return result;
    685 }
    686 
    687 
    688 RuleChain::RuleChain() {
    689     ruleHeader=NULL;
    690     next = NULL;
    691     repeatLimit=0;
    692 }
    693 
    694 RuleChain::RuleChain(const RuleChain& other) {
    695     this->repeatLimit = other.repeatLimit;
    696     this->keyword=other.keyword;
    697     if (other.ruleHeader != NULL) {
    698         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
    699     }
    700     else {
    701         this->ruleHeader = NULL;
    702     }
    703     if (other.next != NULL ) {
    704         this->next = new RuleChain(*other.next);
    705     }
    706     else
    707     {
    708         this->next = NULL;
    709     }
    710 }
    711 
    712 RuleChain::~RuleChain() {
    713     if (next != NULL) {
    714         delete next;
    715     }
    716     if ( ruleHeader != NULL ) {
    717         delete ruleHeader;
    718     }
    719 }
    720 
    721 UnicodeString
    722 RuleChain::select(double number) const {
    723 
    724    if ( ruleHeader != NULL ) {
    725        if (ruleHeader->isFulfilled(number)) {
    726            return keyword;
    727        }
    728    }
    729    if ( next != NULL ) {
    730        return next->select(number);
    731    }
    732    else {
    733        return PLURAL_KEYWORD_OTHER;
    734    }
    735 
    736 }
    737 
    738 void
    739 RuleChain::dumpRules(UnicodeString& result) {
    740     UChar digitString[16];
    741 
    742     if ( ruleHeader != NULL ) {
    743         result +=  keyword;
    744         OrConstraint* orRule=ruleHeader;
    745         while ( orRule != NULL ) {
    746             AndConstraint* andRule=orRule->childNode;
    747             while ( andRule != NULL ) {
    748                 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
    749                     result += UNICODE_STRING_SIMPLE(" n is ");
    750                     if (andRule->notIn) {
    751                         result += UNICODE_STRING_SIMPLE("not ");
    752                     }
    753                     uprv_itou(digitString,16, andRule->rangeLow,10,0);
    754                     result += UnicodeString(digitString);
    755                 }
    756                 else {
    757                     if (andRule->op==AndConstraint::MOD) {
    758                         result += UNICODE_STRING_SIMPLE("  n mod ");
    759                         uprv_itou(digitString,16, andRule->opNum,10,0);
    760                         result += UnicodeString(digitString);
    761                     }
    762                     else {
    763                         result += UNICODE_STRING_SIMPLE("  n ");
    764                     }
    765                     if (andRule->rangeHigh==-1) {
    766                         if (andRule->notIn) {
    767                             result += UNICODE_STRING_SIMPLE(" is not ");
    768                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    769                             result += UnicodeString(digitString);
    770                         }
    771                         else {
    772                             result += UNICODE_STRING_SIMPLE(" is ");
    773                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    774                             result += UnicodeString(digitString);
    775                         }
    776                     }
    777                     else {
    778                         if (andRule->notIn) {
    779                             if ( andRule->integerOnly ) {
    780                                 result += UNICODE_STRING_SIMPLE("  not in ");
    781                             }
    782                             else {
    783                                 result += UNICODE_STRING_SIMPLE("  not within ");
    784                             }
    785                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    786                             result += UnicodeString(digitString);
    787                             result += UNICODE_STRING_SIMPLE(" .. ");
    788                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
    789                             result += UnicodeString(digitString);
    790                         }
    791                         else {
    792                             if ( andRule->integerOnly ) {
    793                                 result += UNICODE_STRING_SIMPLE(" in ");
    794                             }
    795                             else {
    796                                 result += UNICODE_STRING_SIMPLE(" within ");
    797                             }
    798                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
    799                             result += UnicodeString(digitString);
    800                             result += UNICODE_STRING_SIMPLE(" .. ");
    801                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
    802                         }
    803                     }
    804                 }
    805                 if ( (andRule=andRule->next) != NULL) {
    806                     result += PK_AND;
    807                 }
    808             }
    809             if ( (orRule = orRule->next) != NULL ) {
    810                 result += PK_OR;
    811             }
    812         }
    813     }
    814     if ( next != NULL ) {
    815         next->dumpRules(result);
    816     }
    817 }
    818 
    819 int32_t
    820 RuleChain::getRepeatLimit () {
    821     return repeatLimit;
    822 }
    823 
    824 void
    825 RuleChain::setRepeatLimit () {
    826     int32_t limit=0;
    827 
    828     if ( next != NULL ) {
    829         next->setRepeatLimit();
    830         limit = next->repeatLimit;
    831     }
    832 
    833     if ( ruleHeader != NULL ) {
    834         OrConstraint* orRule=ruleHeader;
    835         while ( orRule != NULL ) {
    836             AndConstraint* andRule=orRule->childNode;
    837             while ( andRule != NULL ) {
    838                 limit = andRule->updateRepeatLimit(limit);
    839                 andRule = andRule->next;
    840             }
    841             orRule = orRule->next;
    842         }
    843     }
    844     repeatLimit = limit;
    845 }
    846 
    847 UErrorCode
    848 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
    849     if ( arraySize < capacityOfKeywords-1 ) {
    850         keywords[arraySize++]=keyword;
    851     }
    852     else {
    853         return U_BUFFER_OVERFLOW_ERROR;
    854     }
    855 
    856     if ( next != NULL ) {
    857         return next->getKeywords(capacityOfKeywords, keywords, arraySize);
    858     }
    859     else {
    860         return U_ZERO_ERROR;
    861     }
    862 }
    863 
    864 UBool
    865 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
    866     if ( keyword == keywordParam ) {
    867         return TRUE;
    868     }
    869 
    870     if ( next != NULL ) {
    871         return next->isKeyword(keywordParam);
    872     }
    873     else {
    874         return FALSE;
    875     }
    876 }
    877 
    878 
    879 RuleParser::RuleParser() {
    880     UErrorCode err=U_ZERO_ERROR;
    881     const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
    882     const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
    883     idStartFilter = new UnicodeSet(idStart, err);
    884     idContinueFilter = new UnicodeSet(idContinue, err);
    885 }
    886 
    887 RuleParser::~RuleParser() {
    888     delete idStartFilter;
    889     delete idContinueFilter;
    890 }
    891 
    892 void
    893 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
    894 {
    895     if (U_FAILURE(status)) {
    896         return;
    897     }
    898     switch(prevType) {
    899     case none:
    900     case tSemiColon:
    901         if (curType!=tKeyword) {
    902             status = U_UNEXPECTED_TOKEN;
    903         }
    904         break;
    905     case tVariableN :
    906         if (curType != tIs && curType != tMod && curType != tIn &&
    907             curType != tNot && curType != tWithin) {
    908             status = U_UNEXPECTED_TOKEN;
    909         }
    910         break;
    911     case tZero:
    912     case tOne:
    913     case tTwo:
    914     case tFew:
    915     case tMany:
    916     case tOther:
    917     case tKeyword:
    918         if (curType != tColon) {
    919             status = U_UNEXPECTED_TOKEN;
    920         }
    921         break;
    922     case tColon :
    923         if (curType != tVariableN) {
    924             status = U_UNEXPECTED_TOKEN;
    925         }
    926         break;
    927     case tIs:
    928         if ( curType != tNumber && curType != tNot) {
    929             status = U_UNEXPECTED_TOKEN;
    930         }
    931         break;
    932     case tNot:
    933         if (curType != tNumber && curType != tIn && curType != tWithin) {
    934             status = U_UNEXPECTED_TOKEN;
    935         }
    936         break;
    937     case tMod:
    938     case tDot:
    939     case tIn:
    940     case tWithin:
    941     case tAnd:
    942     case tOr:
    943         if (curType != tNumber && curType != tVariableN) {
    944             status = U_UNEXPECTED_TOKEN;
    945         }
    946         break;
    947     case tNumber:
    948         if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
    949             curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
    950         {
    951             status = U_UNEXPECTED_TOKEN;
    952         }
    953         break;
    954     default:
    955         status = U_UNEXPECTED_TOKEN;
    956         break;
    957     }
    958 }
    959 
    960 void
    961 RuleParser::getNextToken(const UnicodeString& ruleData,
    962                          int32_t *ruleIndex,
    963                          UnicodeString& token,
    964                          tokenType& type,
    965                          UErrorCode &status)
    966 {
    967     int32_t curIndex= *ruleIndex;
    968     UChar ch;
    969     tokenType prevType=none;
    970 
    971     if (U_FAILURE(status)) {
    972         return;
    973     }
    974     while (curIndex<ruleData.length()) {
    975         ch = ruleData.charAt(curIndex);
    976         if ( !inRange(ch, type) ) {
    977             status = U_ILLEGAL_CHARACTER;
    978             return;
    979         }
    980         switch (type) {
    981         case tSpace:
    982             if ( *ruleIndex != curIndex ) { // letter
    983                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
    984                 *ruleIndex=curIndex;
    985                 type=prevType;
    986                 getKeyType(token, type, status);
    987                 return;
    988             }
    989             else {
    990                 *ruleIndex=*ruleIndex+1;
    991             }
    992             break; // consective space
    993         case tColon:
    994         case tSemiColon:
    995             if ( *ruleIndex != curIndex ) {
    996                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
    997                 *ruleIndex=curIndex;
    998                 type=prevType;
    999                 getKeyType(token, type, status);
   1000                 return;
   1001             }
   1002             else {
   1003                 *ruleIndex=curIndex+1;
   1004                 return;
   1005             }
   1006         case tLetter:
   1007              if ((type==prevType)||(prevType==none)) {
   1008                 prevType=type;
   1009                 break;
   1010              }
   1011              break;
   1012         case tNumber:
   1013              if ((type==prevType)||(prevType==none)) {
   1014                 prevType=type;
   1015                 break;
   1016              }
   1017              else {
   1018                 *ruleIndex=curIndex+1;
   1019                 return;
   1020              }
   1021          case tDot:
   1022              if (prevType==none) {  // first dot
   1023                 prevType=type;
   1024                 continue;
   1025              }
   1026              else {
   1027                  if ( *ruleIndex != curIndex ) {
   1028                     token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1029                     *ruleIndex=curIndex;  // letter
   1030                     type=prevType;
   1031                     getKeyType(token, type, status);
   1032                     return;
   1033                  }
   1034                  else {  // two consective dots
   1035                     *ruleIndex=curIndex+2;
   1036                     return;
   1037                  }
   1038              }
   1039              break;
   1040          default:
   1041              status = U_UNEXPECTED_TOKEN;
   1042              return;
   1043         }
   1044         curIndex++;
   1045     }
   1046     if ( curIndex>=ruleData.length() ) {
   1047         if ( (type == tLetter)||(type == tNumber) ) {
   1048             token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
   1049             getKeyType(token, type, status);
   1050             if (U_FAILURE(status)) {
   1051                 return;
   1052             }
   1053         }
   1054         *ruleIndex = ruleData.length();
   1055     }
   1056 }
   1057 
   1058 UBool
   1059 RuleParser::inRange(UChar ch, tokenType& type) {
   1060     if ((ch>=CAP_A) && (ch<=CAP_Z)) {
   1061         // we assume all characters are in lower case already.
   1062         return FALSE;
   1063     }
   1064     if ((ch>=LOW_A) && (ch<=LOW_Z)) {
   1065         type = tLetter;
   1066         return TRUE;
   1067     }
   1068     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
   1069         type = tNumber;
   1070         return TRUE;
   1071     }
   1072     switch (ch) {
   1073     case COLON:
   1074         type = tColon;
   1075         return TRUE;
   1076     case SPACE:
   1077         type = tSpace;
   1078         return TRUE;
   1079     case SEMI_COLON:
   1080         type = tSemiColon;
   1081         return TRUE;
   1082     case DOT:
   1083         type = tDot;
   1084         return TRUE;
   1085     default :
   1086         type = none;
   1087         return FALSE;
   1088     }
   1089 }
   1090 
   1091 
   1092 void
   1093 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
   1094 {
   1095     if (U_FAILURE(status)) {
   1096         return;
   1097     }
   1098     if ( keyType==tNumber) {
   1099     }
   1100     else if (token==PK_VAR_N) {
   1101         keyType = tVariableN;
   1102     }
   1103     else if (token==PK_IS) {
   1104         keyType = tIs;
   1105     }
   1106     else if (token==PK_AND) {
   1107         keyType = tAnd;
   1108     }
   1109     else if (token==PK_IN) {
   1110         keyType = tIn;
   1111     }
   1112     else if (token==PK_WITHIN) {
   1113         keyType = tWithin;
   1114     }
   1115     else if (token==PK_NOT) {
   1116         keyType = tNot;
   1117     }
   1118     else if (token==PK_MOD) {
   1119         keyType = tMod;
   1120     }
   1121     else if (token==PK_OR) {
   1122         keyType = tOr;
   1123     }
   1124     else if ( isValidKeyword(token) ) {
   1125         keyType = tKeyword;
   1126     }
   1127     else {
   1128         status = U_UNEXPECTED_TOKEN;
   1129     }
   1130 }
   1131 
   1132 UBool
   1133 RuleParser::isValidKeyword(const UnicodeString& token) {
   1134     if ( token.length()==0 ) {
   1135         return FALSE;
   1136     }
   1137     if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
   1138         int32_t i;
   1139         for (i=1; i< token.length(); i++) {
   1140             if (idContinueFilter->contains(token.charAt(i))== FALSE) {
   1141                 return FALSE;
   1142             }
   1143         }
   1144         return TRUE;
   1145     }
   1146     else {
   1147         return FALSE;
   1148     }
   1149 }
   1150 
   1151 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
   1152 fKeywordNames(status)
   1153 {
   1154     RuleChain *node=header;
   1155     UBool  addKeywordOther=true;
   1156 
   1157     if (U_FAILURE(status)) {
   1158         return;
   1159     }
   1160     pos=0;
   1161     fKeywordNames.removeAllElements();
   1162     while(node!=NULL) {
   1163         fKeywordNames.addElement(new UnicodeString(node->keyword), status);
   1164         if (U_FAILURE(status)) {
   1165             return;
   1166         }
   1167         if (node->keyword == PLURAL_KEYWORD_OTHER) {
   1168             addKeywordOther= false;
   1169         }
   1170         node=node->next;
   1171     }
   1172 
   1173     if (addKeywordOther) {
   1174         fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
   1175         if (U_FAILURE(status)) {
   1176             return;
   1177         }
   1178     }
   1179 }
   1180 
   1181 const UnicodeString*
   1182 PluralKeywordEnumeration::snext(UErrorCode& status) {
   1183     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
   1184         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
   1185     }
   1186     return NULL;
   1187 }
   1188 
   1189 void
   1190 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
   1191     pos=0;
   1192 }
   1193 
   1194 int32_t
   1195 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
   1196        return fKeywordNames.size();
   1197 }
   1198 
   1199 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
   1200     UnicodeString *s;
   1201     for (int32_t i=0; i<fKeywordNames.size(); ++i) {
   1202         if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
   1203             delete s;
   1204         }
   1205     }
   1206 }
   1207 
   1208 U_NAMESPACE_END
   1209 
   1210 
   1211 #endif /* #if !UCONFIG_NO_FORMATTING */
   1212 
   1213 //eof
   1214