Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1997-2014, International Business Machines Corporation
      4 * and others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 #include "utypeinfo.h"  // for 'typeid' to work
     10 
     11 #include "unicode/rbnf.h"
     12 
     13 #if U_HAVE_RBNF
     14 
     15 #include "unicode/normlzr.h"
     16 #include "unicode/plurfmt.h"
     17 #include "unicode/tblcoll.h"
     18 #include "unicode/uchar.h"
     19 #include "unicode/ucol.h"
     20 #include "unicode/uloc.h"
     21 #include "unicode/unum.h"
     22 #include "unicode/ures.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/utf16.h"
     25 #include "unicode/udata.h"
     26 #include "unicode/udisplaycontext.h"
     27 #include "unicode/brkiter.h"
     28 #include "nfrs.h"
     29 
     30 #include "cmemory.h"
     31 #include "cstring.h"
     32 #include "patternprops.h"
     33 #include "uresimp.h"
     34 
     35 // debugging
     36 // #define RBNF_DEBUG
     37 
     38 #ifdef RBNF_DEBUG
     39 #include "stdio.h"
     40 #endif
     41 
     42 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
     43 
     44 static const UChar gPercentPercent[] =
     45 {
     46     0x25, 0x25, 0
     47 }; /* "%%" */
     48 
     49 // All urbnf objects are created through openRules, so we init all of the
     50 // Unicode string constants required by rbnf, nfrs, or nfr here.
     51 static const UChar gLenientParse[] =
     52 {
     53     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
     54 }; /* "%%lenient-parse:" */
     55 static const UChar gSemiColon = 0x003B;
     56 static const UChar gSemiPercent[] =
     57 {
     58     0x3B, 0x25, 0
     59 }; /* ";%" */
     60 
     61 #define kSomeNumberOfBitsDiv2 22
     62 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
     63 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
     64 
     65 U_NAMESPACE_BEGIN
     66 
     67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
     68 
     69 /*
     70 This is a utility class. It does not use ICU's RTTI.
     71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
     72 Please make sure that intltest passes on Windows in Release mode,
     73 since the string pooling per compilation unit will mess up how RTTI works.
     74 The RTTI code was also removed due to lack of code coverage.
     75 */
     76 class LocalizationInfo : public UMemory {
     77 protected:
     78     virtual ~LocalizationInfo();
     79     uint32_t refcount;
     80 
     81 public:
     82     LocalizationInfo() : refcount(0) {}
     83 
     84     LocalizationInfo* ref(void) {
     85         ++refcount;
     86         return this;
     87     }
     88 
     89     LocalizationInfo* unref(void) {
     90         if (refcount && --refcount == 0) {
     91             delete this;
     92         }
     93         return NULL;
     94     }
     95 
     96     virtual UBool operator==(const LocalizationInfo* rhs) const;
     97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
     98 
     99     virtual int32_t getNumberOfRuleSets(void) const = 0;
    100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
    101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
    102     virtual const UChar* getLocaleName(int32_t index) const = 0;
    103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
    104 
    105     virtual int32_t indexForLocale(const UChar* locale) const;
    106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
    107 
    108 //    virtual UClassID getDynamicClassID() const = 0;
    109 //    static UClassID getStaticClassID(void);
    110 };
    111 
    112 LocalizationInfo::~LocalizationInfo() {}
    113 
    114 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
    115 
    116 // if both strings are NULL, this returns TRUE
    117 static UBool
    118 streq(const UChar* lhs, const UChar* rhs) {
    119     if (rhs == lhs) {
    120         return TRUE;
    121     }
    122     if (lhs && rhs) {
    123         return u_strcmp(lhs, rhs) == 0;
    124     }
    125     return FALSE;
    126 }
    127 
    128 UBool
    129 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
    130     if (rhs) {
    131         if (this == rhs) {
    132             return TRUE;
    133         }
    134 
    135         int32_t rsc = getNumberOfRuleSets();
    136         if (rsc == rhs->getNumberOfRuleSets()) {
    137             for (int i = 0; i < rsc; ++i) {
    138                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
    139                     return FALSE;
    140                 }
    141             }
    142             int32_t dlc = getNumberOfDisplayLocales();
    143             if (dlc == rhs->getNumberOfDisplayLocales()) {
    144                 for (int i = 0; i < dlc; ++i) {
    145                     const UChar* locale = getLocaleName(i);
    146                     int32_t ix = rhs->indexForLocale(locale);
    147                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
    148                     if (!streq(locale, rhs->getLocaleName(ix))) {
    149                         return FALSE;
    150                     }
    151                     for (int j = 0; j < rsc; ++j) {
    152                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
    153                             return FALSE;
    154                         }
    155                     }
    156                 }
    157                 return TRUE;
    158             }
    159         }
    160     }
    161     return FALSE;
    162 }
    163 
    164 int32_t
    165 LocalizationInfo::indexForLocale(const UChar* locale) const {
    166     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
    167         if (streq(locale, getLocaleName(i))) {
    168             return i;
    169         }
    170     }
    171     return -1;
    172 }
    173 
    174 int32_t
    175 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
    176     if (ruleset) {
    177         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
    178             if (streq(ruleset, getRuleSetName(i))) {
    179                 return i;
    180             }
    181         }
    182     }
    183     return -1;
    184 }
    185 
    186 
    187 typedef void (*Fn_Deleter)(void*);
    188 
    189 class VArray {
    190     void** buf;
    191     int32_t cap;
    192     int32_t size;
    193     Fn_Deleter deleter;
    194 public:
    195     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
    196 
    197     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
    198 
    199     ~VArray() {
    200         if (deleter) {
    201             for (int i = 0; i < size; ++i) {
    202                 (*deleter)(buf[i]);
    203             }
    204         }
    205         uprv_free(buf);
    206     }
    207 
    208     int32_t length() {
    209         return size;
    210     }
    211 
    212     void add(void* elem, UErrorCode& status) {
    213         if (U_SUCCESS(status)) {
    214             if (size == cap) {
    215                 if (cap == 0) {
    216                     cap = 1;
    217                 } else if (cap < 256) {
    218                     cap *= 2;
    219                 } else {
    220                     cap += 256;
    221                 }
    222                 if (buf == NULL) {
    223                     buf = (void**)uprv_malloc(cap * sizeof(void*));
    224                 } else {
    225                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
    226                 }
    227                 if (buf == NULL) {
    228                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
    229                     status = U_MEMORY_ALLOCATION_ERROR;
    230                     return;
    231                 }
    232                 void* start = &buf[size];
    233                 size_t count = (cap - size) * sizeof(void*);
    234                 uprv_memset(start, 0, count); // fill with nulls, just because
    235             }
    236             buf[size++] = elem;
    237         }
    238     }
    239 
    240     void** release(void) {
    241         void** result = buf;
    242         buf = NULL;
    243         cap = 0;
    244         size = 0;
    245         return result;
    246     }
    247 };
    248 
    249 class LocDataParser;
    250 
    251 class StringLocalizationInfo : public LocalizationInfo {
    252     UChar* info;
    253     UChar*** data;
    254     int32_t numRuleSets;
    255     int32_t numLocales;
    256 
    257 friend class LocDataParser;
    258 
    259     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
    260         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
    261     {
    262     }
    263 
    264 public:
    265     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
    266 
    267     virtual ~StringLocalizationInfo();
    268     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
    269     virtual const UChar* getRuleSetName(int32_t index) const;
    270     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
    271     virtual const UChar* getLocaleName(int32_t index) const;
    272     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
    273 
    274 //    virtual UClassID getDynamicClassID() const;
    275 //    static UClassID getStaticClassID(void);
    276 
    277 private:
    278     void init(UErrorCode& status) const;
    279 };
    280 
    281 
    282 enum {
    283     OPEN_ANGLE = 0x003c, /* '<' */
    284     CLOSE_ANGLE = 0x003e, /* '>' */
    285     COMMA = 0x002c,
    286     TICK = 0x0027,
    287     QUOTE = 0x0022,
    288     SPACE = 0x0020
    289 };
    290 
    291 /**
    292  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
    293  */
    294 class LocDataParser {
    295     UChar* data;
    296     const UChar* e;
    297     UChar* p;
    298     UChar ch;
    299     UParseError& pe;
    300     UErrorCode& ec;
    301 
    302 public:
    303     LocDataParser(UParseError& parseError, UErrorCode& status)
    304         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
    305     ~LocDataParser() {}
    306 
    307     /*
    308     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
    309     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
    310     */
    311     StringLocalizationInfo* parse(UChar* data, int32_t len);
    312 
    313 private:
    314 
    315     void inc(void) { ++p; ch = 0xffff; }
    316     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
    317     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
    318     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
    319     UBool inList(UChar c, const UChar* list) const {
    320         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
    321         while (*list && *list != c) ++list; return *list == c;
    322     }
    323     void parseError(const char* msg);
    324 
    325     StringLocalizationInfo* doParse(void);
    326 
    327     UChar** nextArray(int32_t& requiredLength);
    328     UChar*  nextString(void);
    329 };
    330 
    331 #ifdef RBNF_DEBUG
    332 #define ERROR(msg) parseError(msg); return NULL;
    333 #define EXPLANATION_ARG explanationArg
    334 #else
    335 #define ERROR(msg) parseError(NULL); return NULL;
    336 #define EXPLANATION_ARG
    337 #endif
    338 
    339 
    340 static const UChar DQUOTE_STOPLIST[] = {
    341     QUOTE, 0
    342 };
    343 
    344 static const UChar SQUOTE_STOPLIST[] = {
    345     TICK, 0
    346 };
    347 
    348 static const UChar NOQUOTE_STOPLIST[] = {
    349     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
    350 };
    351 
    352 static void
    353 DeleteFn(void* p) {
    354   uprv_free(p);
    355 }
    356 
    357 StringLocalizationInfo*
    358 LocDataParser::parse(UChar* _data, int32_t len) {
    359     if (U_FAILURE(ec)) {
    360         if (_data) uprv_free(_data);
    361         return NULL;
    362     }
    363 
    364     pe.line = 0;
    365     pe.offset = -1;
    366     pe.postContext[0] = 0;
    367     pe.preContext[0] = 0;
    368 
    369     if (_data == NULL) {
    370         ec = U_ILLEGAL_ARGUMENT_ERROR;
    371         return NULL;
    372     }
    373 
    374     if (len <= 0) {
    375         ec = U_ILLEGAL_ARGUMENT_ERROR;
    376         uprv_free(_data);
    377         return NULL;
    378     }
    379 
    380     data = _data;
    381     e = data + len;
    382     p = _data;
    383     ch = 0xffff;
    384 
    385     return doParse();
    386 }
    387 
    388 
    389 StringLocalizationInfo*
    390 LocDataParser::doParse(void) {
    391     skipWhitespace();
    392     if (!checkInc(OPEN_ANGLE)) {
    393         ERROR("Missing open angle");
    394     } else {
    395         VArray array(DeleteFn);
    396         UBool mightHaveNext = TRUE;
    397         int32_t requiredLength = -1;
    398         while (mightHaveNext) {
    399             mightHaveNext = FALSE;
    400             UChar** elem = nextArray(requiredLength);
    401             skipWhitespace();
    402             UBool haveComma = check(COMMA);
    403             if (elem) {
    404                 array.add(elem, ec);
    405                 if (haveComma) {
    406                     inc();
    407                     mightHaveNext = TRUE;
    408                 }
    409             } else if (haveComma) {
    410                 ERROR("Unexpected character");
    411             }
    412         }
    413 
    414         skipWhitespace();
    415         if (!checkInc(CLOSE_ANGLE)) {
    416             if (check(OPEN_ANGLE)) {
    417                 ERROR("Missing comma in outer array");
    418             } else {
    419                 ERROR("Missing close angle bracket in outer array");
    420             }
    421         }
    422 
    423         skipWhitespace();
    424         if (p != e) {
    425             ERROR("Extra text after close of localization data");
    426         }
    427 
    428         array.add(NULL, ec);
    429         if (U_SUCCESS(ec)) {
    430             int32_t numLocs = array.length() - 2; // subtract first, NULL
    431             UChar*** result = (UChar***)array.release();
    432 
    433             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
    434         }
    435     }
    436 
    437     ERROR("Unknown error");
    438 }
    439 
    440 UChar**
    441 LocDataParser::nextArray(int32_t& requiredLength) {
    442     if (U_FAILURE(ec)) {
    443         return NULL;
    444     }
    445 
    446     skipWhitespace();
    447     if (!checkInc(OPEN_ANGLE)) {
    448         ERROR("Missing open angle");
    449     }
    450 
    451     VArray array;
    452     UBool mightHaveNext = TRUE;
    453     while (mightHaveNext) {
    454         mightHaveNext = FALSE;
    455         UChar* elem = nextString();
    456         skipWhitespace();
    457         UBool haveComma = check(COMMA);
    458         if (elem) {
    459             array.add(elem, ec);
    460             if (haveComma) {
    461                 inc();
    462                 mightHaveNext = TRUE;
    463             }
    464         } else if (haveComma) {
    465             ERROR("Unexpected comma");
    466         }
    467     }
    468     skipWhitespace();
    469     if (!checkInc(CLOSE_ANGLE)) {
    470         if (check(OPEN_ANGLE)) {
    471             ERROR("Missing close angle bracket in inner array");
    472         } else {
    473             ERROR("Missing comma in inner array");
    474         }
    475     }
    476 
    477     array.add(NULL, ec);
    478     if (U_SUCCESS(ec)) {
    479         if (requiredLength == -1) {
    480             requiredLength = array.length() + 1;
    481         } else if (array.length() != requiredLength) {
    482             ec = U_ILLEGAL_ARGUMENT_ERROR;
    483             ERROR("Array not of required length");
    484         }
    485 
    486         return (UChar**)array.release();
    487     }
    488     ERROR("Unknown Error");
    489 }
    490 
    491 UChar*
    492 LocDataParser::nextString() {
    493     UChar* result = NULL;
    494 
    495     skipWhitespace();
    496     if (p < e) {
    497         const UChar* terminators;
    498         UChar c = *p;
    499         UBool haveQuote = c == QUOTE || c == TICK;
    500         if (haveQuote) {
    501             inc();
    502             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
    503         } else {
    504             terminators = NOQUOTE_STOPLIST;
    505         }
    506         UChar* start = p;
    507         while (p < e && !inList(*p, terminators)) ++p;
    508         if (p == e) {
    509             ERROR("Unexpected end of data");
    510         }
    511 
    512         UChar x = *p;
    513         if (p > start) {
    514             ch = x;
    515             *p = 0x0; // terminate by writing to data
    516             result = start; // just point into data
    517         }
    518         if (haveQuote) {
    519             if (x != c) {
    520                 ERROR("Missing matching quote");
    521             } else if (p == start) {
    522                 ERROR("Empty string");
    523             }
    524             inc();
    525         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
    526             ERROR("Unexpected character in string");
    527         }
    528     }
    529 
    530     // ok for there to be no next string
    531     return result;
    532 }
    533 
    534 void LocDataParser::parseError(const char* EXPLANATION_ARG)
    535 {
    536     if (!data) {
    537         return;
    538     }
    539 
    540     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
    541     if (start < data) {
    542         start = data;
    543     }
    544     for (UChar* x = p; --x >= start;) {
    545         if (!*x) {
    546             start = x+1;
    547             break;
    548         }
    549     }
    550     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
    551     if (limit > e) {
    552         limit = e;
    553     }
    554     u_strncpy(pe.preContext, start, (int32_t)(p-start));
    555     pe.preContext[p-start] = 0;
    556     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
    557     pe.postContext[limit-p] = 0;
    558     pe.offset = (int32_t)(p - data);
    559 
    560 #ifdef RBNF_DEBUG
    561     fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
    562 
    563     UnicodeString msg;
    564     msg.append(start, p - start);
    565     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
    566     msg.append(p, limit-p);
    567     msg.append(UNICODE_STRING_SIMPLE("'"));
    568 
    569     char buf[128];
    570     int32_t len = msg.extract(0, msg.length(), buf, 128);
    571     if (len >= 128) {
    572         buf[127] = 0;
    573     } else {
    574         buf[len] = 0;
    575     }
    576     fprintf(stderr, "%s\n", buf);
    577     fflush(stderr);
    578 #endif
    579 
    580     uprv_free(data);
    581     data = NULL;
    582     p = NULL;
    583     e = NULL;
    584 
    585     if (U_SUCCESS(ec)) {
    586         ec = U_PARSE_ERROR;
    587     }
    588 }
    589 
    590 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
    591 
    592 StringLocalizationInfo*
    593 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
    594     if (U_FAILURE(status)) {
    595         return NULL;
    596     }
    597 
    598     int32_t len = info.length();
    599     if (len == 0) {
    600         return NULL; // no error;
    601     }
    602 
    603     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
    604     if (!p) {
    605         status = U_MEMORY_ALLOCATION_ERROR;
    606         return NULL;
    607     }
    608     info.extract(p, len, status);
    609     if (!U_FAILURE(status)) {
    610         status = U_ZERO_ERROR; // clear warning about non-termination
    611     }
    612 
    613     LocDataParser parser(perror, status);
    614     return parser.parse(p, len);
    615 }
    616 
    617 StringLocalizationInfo::~StringLocalizationInfo() {
    618     for (UChar*** p = (UChar***)data; *p; ++p) {
    619         // remaining data is simply pointer into our unicode string data.
    620         if (*p) uprv_free(*p);
    621     }
    622     if (data) uprv_free(data);
    623     if (info) uprv_free(info);
    624 }
    625 
    626 
    627 const UChar*
    628 StringLocalizationInfo::getRuleSetName(int32_t index) const {
    629     if (index >= 0 && index < getNumberOfRuleSets()) {
    630         return data[0][index];
    631     }
    632     return NULL;
    633 }
    634 
    635 const UChar*
    636 StringLocalizationInfo::getLocaleName(int32_t index) const {
    637     if (index >= 0 && index < getNumberOfDisplayLocales()) {
    638         return data[index+1][0];
    639     }
    640     return NULL;
    641 }
    642 
    643 const UChar*
    644 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
    645     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
    646         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
    647         return data[localeIndex+1][ruleIndex+1];
    648     }
    649     return NULL;
    650 }
    651 
    652 // ----------
    653 
    654 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    655                                              const UnicodeString& locs,
    656                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    657   : ruleSets(NULL)
    658   , ruleSetDescriptions(NULL)
    659   , numRuleSets(0)
    660   , defaultRuleSet(NULL)
    661   , locale(alocale)
    662   , collator(NULL)
    663   , decimalFormatSymbols(NULL)
    664   , lenient(FALSE)
    665   , lenientParseRules(NULL)
    666   , localizations(NULL)
    667   , capitalizationInfoSet(FALSE)
    668   , capitalizationForUIListMenu(FALSE)
    669   , capitalizationForStandAlone(FALSE)
    670   , capitalizationBrkIter(NULL)
    671 {
    672   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    673   init(description, locinfo, perror, status);
    674 }
    675 
    676 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    677                                              const UnicodeString& locs,
    678                                              UParseError& perror, UErrorCode& status)
    679   : ruleSets(NULL)
    680   , ruleSetDescriptions(NULL)
    681   , numRuleSets(0)
    682   , defaultRuleSet(NULL)
    683   , locale(Locale::getDefault())
    684   , collator(NULL)
    685   , decimalFormatSymbols(NULL)
    686   , lenient(FALSE)
    687   , lenientParseRules(NULL)
    688   , localizations(NULL)
    689   , capitalizationInfoSet(FALSE)
    690   , capitalizationForUIListMenu(FALSE)
    691   , capitalizationForStandAlone(FALSE)
    692   , capitalizationBrkIter(NULL)
    693 {
    694   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    695   init(description, locinfo, perror, status);
    696 }
    697 
    698 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    699                                              LocalizationInfo* info,
    700                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    701   : ruleSets(NULL)
    702   , ruleSetDescriptions(NULL)
    703   , numRuleSets(0)
    704   , defaultRuleSet(NULL)
    705   , locale(alocale)
    706   , collator(NULL)
    707   , decimalFormatSymbols(NULL)
    708   , lenient(FALSE)
    709   , lenientParseRules(NULL)
    710   , localizations(NULL)
    711   , capitalizationInfoSet(FALSE)
    712   , capitalizationForUIListMenu(FALSE)
    713   , capitalizationForStandAlone(FALSE)
    714   , capitalizationBrkIter(NULL)
    715 {
    716   init(description, info, perror, status);
    717 }
    718 
    719 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    720                          UParseError& perror,
    721                          UErrorCode& status)
    722   : ruleSets(NULL)
    723   , ruleSetDescriptions(NULL)
    724   , numRuleSets(0)
    725   , defaultRuleSet(NULL)
    726   , locale(Locale::getDefault())
    727   , collator(NULL)
    728   , decimalFormatSymbols(NULL)
    729   , lenient(FALSE)
    730   , lenientParseRules(NULL)
    731   , localizations(NULL)
    732   , capitalizationInfoSet(FALSE)
    733   , capitalizationForUIListMenu(FALSE)
    734   , capitalizationForStandAlone(FALSE)
    735   , capitalizationBrkIter(NULL)
    736 {
    737     init(description, NULL, perror, status);
    738 }
    739 
    740 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    741                          const Locale& aLocale,
    742                          UParseError& perror,
    743                          UErrorCode& status)
    744   : ruleSets(NULL)
    745   , ruleSetDescriptions(NULL)
    746   , numRuleSets(0)
    747   , defaultRuleSet(NULL)
    748   , locale(aLocale)
    749   , collator(NULL)
    750   , decimalFormatSymbols(NULL)
    751   , lenient(FALSE)
    752   , lenientParseRules(NULL)
    753   , localizations(NULL)
    754   , capitalizationInfoSet(FALSE)
    755   , capitalizationForUIListMenu(FALSE)
    756   , capitalizationForStandAlone(FALSE)
    757   , capitalizationBrkIter(NULL)
    758 {
    759     init(description, NULL, perror, status);
    760 }
    761 
    762 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
    763   : ruleSets(NULL)
    764   , ruleSetDescriptions(NULL)
    765   , numRuleSets(0)
    766   , defaultRuleSet(NULL)
    767   , locale(alocale)
    768   , collator(NULL)
    769   , decimalFormatSymbols(NULL)
    770   , lenient(FALSE)
    771   , lenientParseRules(NULL)
    772   , localizations(NULL)
    773   , capitalizationInfoSet(FALSE)
    774   , capitalizationForUIListMenu(FALSE)
    775   , capitalizationForStandAlone(FALSE)
    776   , capitalizationBrkIter(NULL)
    777 {
    778     if (U_FAILURE(status)) {
    779         return;
    780     }
    781 
    782     const char* rules_tag = "RBNFRules";
    783     const char* fmt_tag = "";
    784     switch (tag) {
    785     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
    786     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
    787     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
    788     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
    789     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
    790     }
    791 
    792     // TODO: read localization info from resource
    793     LocalizationInfo* locinfo = NULL;
    794 
    795     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
    796     if (U_SUCCESS(status)) {
    797         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
    798                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
    799 
    800         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
    801         if (U_FAILURE(status)) {
    802             ures_close(nfrb);
    803         }
    804         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
    805         if (U_FAILURE(status)) {
    806             ures_close(rbnfRules);
    807             ures_close(nfrb);
    808             return;
    809         }
    810 
    811         UnicodeString desc;
    812         while (ures_hasNext(ruleSets)) {
    813            desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
    814         }
    815         UParseError perror;
    816 
    817         init (desc, locinfo, perror, status);
    818 
    819         ures_close(ruleSets);
    820         ures_close(rbnfRules);
    821     }
    822     ures_close(nfrb);
    823 }
    824 
    825 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
    826   : NumberFormat(rhs)
    827   , ruleSets(NULL)
    828   , ruleSetDescriptions(NULL)
    829   , numRuleSets(0)
    830   , defaultRuleSet(NULL)
    831   , locale(rhs.locale)
    832   , collator(NULL)
    833   , decimalFormatSymbols(NULL)
    834   , lenient(FALSE)
    835   , lenientParseRules(NULL)
    836   , localizations(NULL)
    837   , capitalizationInfoSet(FALSE)
    838   , capitalizationForUIListMenu(FALSE)
    839   , capitalizationForStandAlone(FALSE)
    840   , capitalizationBrkIter(NULL)
    841 {
    842     this->operator=(rhs);
    843 }
    844 
    845 // --------
    846 
    847 RuleBasedNumberFormat&
    848 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
    849 {
    850     if (this == &rhs) {
    851         return *this;
    852     }
    853     NumberFormat::operator=(rhs);
    854     UErrorCode status = U_ZERO_ERROR;
    855     dispose();
    856     locale = rhs.locale;
    857     lenient = rhs.lenient;
    858 
    859     UParseError perror;
    860     init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
    861     setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
    862     setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
    863 
    864     capitalizationInfoSet = rhs.capitalizationInfoSet;
    865     capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
    866     capitalizationForStandAlone = rhs.capitalizationForStandAlone;
    867 #if !UCONFIG_NO_BREAK_ITERATION
    868     capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
    869 #endif
    870 
    871     return *this;
    872 }
    873 
    874 RuleBasedNumberFormat::~RuleBasedNumberFormat()
    875 {
    876     dispose();
    877 }
    878 
    879 Format*
    880 RuleBasedNumberFormat::clone(void) const
    881 {
    882     return new RuleBasedNumberFormat(*this);
    883 }
    884 
    885 UBool
    886 RuleBasedNumberFormat::operator==(const Format& other) const
    887 {
    888     if (this == &other) {
    889         return TRUE;
    890     }
    891 
    892     if (typeid(*this) == typeid(other)) {
    893         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
    894         // test for capitalization info equality is adequately handled
    895         // by the NumberFormat test for fCapitalizationContext equality;
    896         // the info here is just derived from that.
    897         if (locale == rhs.locale &&
    898             lenient == rhs.lenient &&
    899             (localizations == NULL
    900                 ? rhs.localizations == NULL
    901                 : (rhs.localizations == NULL
    902                     ? FALSE
    903                     : *localizations == rhs.localizations))) {
    904 
    905             NFRuleSet** p = ruleSets;
    906             NFRuleSet** q = rhs.ruleSets;
    907             if (p == NULL) {
    908                 return q == NULL;
    909             } else if (q == NULL) {
    910                 return FALSE;
    911             }
    912             while (*p && *q && (**p == **q)) {
    913                 ++p;
    914                 ++q;
    915             }
    916             return *q == NULL && *p == NULL;
    917         }
    918     }
    919 
    920     return FALSE;
    921 }
    922 
    923 UnicodeString
    924 RuleBasedNumberFormat::getRules() const
    925 {
    926     UnicodeString result;
    927     if (ruleSets != NULL) {
    928         for (NFRuleSet** p = ruleSets; *p; ++p) {
    929             (*p)->appendRules(result);
    930         }
    931     }
    932     return result;
    933 }
    934 
    935 UnicodeString
    936 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
    937 {
    938     if (localizations) {
    939       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
    940       return string;
    941     } else if (ruleSets) {
    942         UnicodeString result;
    943         for (NFRuleSet** p = ruleSets; *p; ++p) {
    944             NFRuleSet* rs = *p;
    945             if (rs->isPublic()) {
    946                 if (--index == -1) {
    947                     rs->getName(result);
    948                     return result;
    949                 }
    950             }
    951         }
    952     }
    953     UnicodeString empty;
    954     return empty;
    955 }
    956 
    957 int32_t
    958 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
    959 {
    960     int32_t result = 0;
    961     if (localizations) {
    962       result = localizations->getNumberOfRuleSets();
    963     } else if (ruleSets) {
    964         for (NFRuleSet** p = ruleSets; *p; ++p) {
    965             if ((**p).isPublic()) {
    966                 ++result;
    967             }
    968         }
    969     }
    970     return result;
    971 }
    972 
    973 int32_t
    974 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
    975     if (localizations) {
    976         return localizations->getNumberOfDisplayLocales();
    977     }
    978     return 0;
    979 }
    980 
    981 Locale
    982 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
    983     if (U_FAILURE(status)) {
    984         return Locale("");
    985     }
    986     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
    987         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
    988         char buffer[64];
    989         int32_t cap = name.length() + 1;
    990         char* bp = buffer;
    991         if (cap > 64) {
    992             bp = (char *)uprv_malloc(cap);
    993             if (bp == NULL) {
    994                 status = U_MEMORY_ALLOCATION_ERROR;
    995                 return Locale("");
    996             }
    997         }
    998         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
    999         Locale retLocale(bp);
   1000         if (bp != buffer) {
   1001             uprv_free(bp);
   1002         }
   1003         return retLocale;
   1004     }
   1005     status = U_ILLEGAL_ARGUMENT_ERROR;
   1006     Locale retLocale;
   1007     return retLocale;
   1008 }
   1009 
   1010 UnicodeString
   1011 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
   1012     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
   1013         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
   1014         int32_t len = localeName.length();
   1015         UChar* localeStr = localeName.getBuffer(len + 1);
   1016         while (len >= 0) {
   1017             localeStr[len] = 0;
   1018             int32_t ix = localizations->indexForLocale(localeStr);
   1019             if (ix >= 0) {
   1020                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
   1021                 return name;
   1022             }
   1023 
   1024             // trim trailing portion, skipping over ommitted sections
   1025             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
   1026             while (len > 0 && localeStr[len-1] == 0x005F) --len;
   1027         }
   1028         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
   1029         return name;
   1030     }
   1031     UnicodeString bogus;
   1032     bogus.setToBogus();
   1033     return bogus;
   1034 }
   1035 
   1036 UnicodeString
   1037 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
   1038     if (localizations) {
   1039         UnicodeString rsn(ruleSetName);
   1040         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
   1041         return getRuleSetDisplayName(ix, localeParam);
   1042     }
   1043     UnicodeString bogus;
   1044     bogus.setToBogus();
   1045     return bogus;
   1046 }
   1047 
   1048 NFRuleSet*
   1049 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
   1050 {
   1051     if (U_SUCCESS(status) && ruleSets) {
   1052         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1053             NFRuleSet* rs = *p;
   1054             if (rs->isNamed(name)) {
   1055                 return rs;
   1056             }
   1057         }
   1058         status = U_ILLEGAL_ARGUMENT_ERROR;
   1059     }
   1060     return NULL;
   1061 }
   1062 
   1063 UnicodeString&
   1064 RuleBasedNumberFormat::format(int32_t number,
   1065                               UnicodeString& toAppendTo,
   1066                               FieldPosition& /* pos */) const
   1067 {
   1068     if (defaultRuleSet) {
   1069         UErrorCode status = U_ZERO_ERROR;
   1070         int32_t startPos = toAppendTo.length();
   1071         defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
   1072         adjustForCapitalizationContext(startPos, toAppendTo);
   1073     }
   1074     return toAppendTo;
   1075 }
   1076 
   1077 
   1078 UnicodeString&
   1079 RuleBasedNumberFormat::format(int64_t number,
   1080                               UnicodeString& toAppendTo,
   1081                               FieldPosition& /* pos */) const
   1082 {
   1083     if (defaultRuleSet) {
   1084         UErrorCode status = U_ZERO_ERROR;
   1085         int32_t startPos = toAppendTo.length();
   1086         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
   1087         adjustForCapitalizationContext(startPos, toAppendTo);
   1088     }
   1089     return toAppendTo;
   1090 }
   1091 
   1092 
   1093 UnicodeString&
   1094 RuleBasedNumberFormat::format(double number,
   1095                               UnicodeString& toAppendTo,
   1096                               FieldPosition& /* pos */) const
   1097 {
   1098     int32_t startPos = toAppendTo.length();
   1099     // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
   1100     if (uprv_isNaN(number)) {
   1101         DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
   1102         if (decFmtSyms) {
   1103             toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
   1104         }
   1105     } else if (defaultRuleSet) {
   1106         UErrorCode status = U_ZERO_ERROR;
   1107         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
   1108     }
   1109     return adjustForCapitalizationContext(startPos, toAppendTo);
   1110 }
   1111 
   1112 
   1113 UnicodeString&
   1114 RuleBasedNumberFormat::format(int32_t number,
   1115                               const UnicodeString& ruleSetName,
   1116                               UnicodeString& toAppendTo,
   1117                               FieldPosition& /* pos */,
   1118                               UErrorCode& status) const
   1119 {
   1120     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
   1121     if (U_SUCCESS(status)) {
   1122         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1123             // throw new IllegalArgumentException("Can't use internal rule set");
   1124             status = U_ILLEGAL_ARGUMENT_ERROR;
   1125         } else {
   1126             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1127             if (rs) {
   1128                 int32_t startPos = toAppendTo.length();
   1129                 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
   1130                 adjustForCapitalizationContext(startPos, toAppendTo);
   1131             }
   1132         }
   1133     }
   1134     return toAppendTo;
   1135 }
   1136 
   1137 
   1138 UnicodeString&
   1139 RuleBasedNumberFormat::format(int64_t number,
   1140                               const UnicodeString& ruleSetName,
   1141                               UnicodeString& toAppendTo,
   1142                               FieldPosition& /* pos */,
   1143                               UErrorCode& status) const
   1144 {
   1145     if (U_SUCCESS(status)) {
   1146         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1147             // throw new IllegalArgumentException("Can't use internal rule set");
   1148             status = U_ILLEGAL_ARGUMENT_ERROR;
   1149         } else {
   1150             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1151             if (rs) {
   1152                 int32_t startPos = toAppendTo.length();
   1153                 rs->format(number, toAppendTo, toAppendTo.length(), status);
   1154                 adjustForCapitalizationContext(startPos, toAppendTo);
   1155             }
   1156         }
   1157     }
   1158     return toAppendTo;
   1159 }
   1160 
   1161 
   1162 UnicodeString&
   1163 RuleBasedNumberFormat::format(double number,
   1164                               const UnicodeString& ruleSetName,
   1165                               UnicodeString& toAppendTo,
   1166                               FieldPosition& /* pos */,
   1167                               UErrorCode& status) const
   1168 {
   1169     if (U_SUCCESS(status)) {
   1170         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1171             // throw new IllegalArgumentException("Can't use internal rule set");
   1172             status = U_ILLEGAL_ARGUMENT_ERROR;
   1173         } else {
   1174             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1175             if (rs) {
   1176                 int32_t startPos = toAppendTo.length();
   1177                 rs->format(number, toAppendTo, toAppendTo.length(), status);
   1178                 adjustForCapitalizationContext(startPos, toAppendTo);
   1179             }
   1180         }
   1181     }
   1182     return toAppendTo;
   1183 }
   1184 
   1185 UnicodeString&
   1186 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
   1187                                                       UnicodeString& currentResult) const
   1188 {
   1189 #if !UCONFIG_NO_BREAK_ITERATION
   1190     if (startPos==0 && currentResult.length() > 0) {
   1191         // capitalize currentResult according to context
   1192         UChar32 ch = currentResult.char32At(0);
   1193         UErrorCode status = U_ZERO_ERROR;
   1194         UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
   1195         if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
   1196               ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1197                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
   1198                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
   1199             // titlecase first word of currentResult, here use sentence iterator unlike current implementations
   1200             // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
   1201             currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
   1202         }
   1203     }
   1204 #endif
   1205     return currentResult;
   1206 }
   1207 
   1208 
   1209 void
   1210 RuleBasedNumberFormat::parse(const UnicodeString& text,
   1211                              Formattable& result,
   1212                              ParsePosition& parsePosition) const
   1213 {
   1214     if (!ruleSets) {
   1215         parsePosition.setErrorIndex(0);
   1216         return;
   1217     }
   1218 
   1219     UnicodeString workingText(text, parsePosition.getIndex());
   1220     ParsePosition workingPos(0);
   1221 
   1222     ParsePosition high_pp(0);
   1223     Formattable high_result;
   1224 
   1225     for (NFRuleSet** p = ruleSets; *p; ++p) {
   1226         NFRuleSet *rp = *p;
   1227         if (rp->isPublic() && rp->isParseable()) {
   1228             ParsePosition working_pp(0);
   1229             Formattable working_result;
   1230 
   1231             rp->parse(workingText, working_pp, kMaxDouble, working_result);
   1232             if (working_pp.getIndex() > high_pp.getIndex()) {
   1233                 high_pp = working_pp;
   1234                 high_result = working_result;
   1235 
   1236                 if (high_pp.getIndex() == workingText.length()) {
   1237                     break;
   1238                 }
   1239             }
   1240         }
   1241     }
   1242 
   1243     int32_t startIndex = parsePosition.getIndex();
   1244     parsePosition.setIndex(startIndex + high_pp.getIndex());
   1245     if (high_pp.getIndex() > 0) {
   1246         parsePosition.setErrorIndex(-1);
   1247     } else {
   1248         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
   1249         parsePosition.setErrorIndex(startIndex + errorIndex);
   1250     }
   1251     result = high_result;
   1252     if (result.getType() == Formattable::kDouble) {
   1253         int32_t r = (int32_t)result.getDouble();
   1254         if ((double)r == result.getDouble()) {
   1255             result.setLong(r);
   1256         }
   1257     }
   1258 }
   1259 
   1260 #if !UCONFIG_NO_COLLATION
   1261 
   1262 void
   1263 RuleBasedNumberFormat::setLenient(UBool enabled)
   1264 {
   1265     lenient = enabled;
   1266     if (!enabled && collator) {
   1267         delete collator;
   1268         collator = NULL;
   1269     }
   1270 }
   1271 
   1272 #endif
   1273 
   1274 void
   1275 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
   1276     if (U_SUCCESS(status)) {
   1277         if (ruleSetName.isEmpty()) {
   1278           if (localizations) {
   1279               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
   1280               defaultRuleSet = findRuleSet(name, status);
   1281           } else {
   1282             initDefaultRuleSet();
   1283           }
   1284         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
   1285             status = U_ILLEGAL_ARGUMENT_ERROR;
   1286         } else {
   1287             NFRuleSet* result = findRuleSet(ruleSetName, status);
   1288             if (result != NULL) {
   1289                 defaultRuleSet = result;
   1290             }
   1291         }
   1292     }
   1293 }
   1294 
   1295 UnicodeString
   1296 RuleBasedNumberFormat::getDefaultRuleSetName() const {
   1297   UnicodeString result;
   1298   if (defaultRuleSet && defaultRuleSet->isPublic()) {
   1299     defaultRuleSet->getName(result);
   1300   } else {
   1301     result.setToBogus();
   1302   }
   1303   return result;
   1304 }
   1305 
   1306 void
   1307 RuleBasedNumberFormat::initDefaultRuleSet()
   1308 {
   1309     defaultRuleSet = NULL;
   1310     if (!ruleSets) {
   1311       return;
   1312     }
   1313 
   1314     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
   1315     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
   1316     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
   1317 
   1318     NFRuleSet**p = &ruleSets[0];
   1319     while (*p) {
   1320         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
   1321             defaultRuleSet = *p;
   1322             return;
   1323         } else {
   1324             ++p;
   1325         }
   1326     }
   1327 
   1328     defaultRuleSet = *--p;
   1329     if (!defaultRuleSet->isPublic()) {
   1330         while (p != ruleSets) {
   1331             if ((*--p)->isPublic()) {
   1332                 defaultRuleSet = *p;
   1333                 break;
   1334             }
   1335         }
   1336     }
   1337 }
   1338 
   1339 
   1340 void
   1341 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
   1342                             UParseError& pErr, UErrorCode& status)
   1343 {
   1344     // TODO: implement UParseError
   1345     uprv_memset(&pErr, 0, sizeof(UParseError));
   1346     // Note: this can leave ruleSets == NULL, so remaining code should check
   1347     if (U_FAILURE(status)) {
   1348         return;
   1349     }
   1350 
   1351     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
   1352 
   1353     UnicodeString description(rules);
   1354     if (!description.length()) {
   1355         status = U_MEMORY_ALLOCATION_ERROR;
   1356         return;
   1357     }
   1358 
   1359     // start by stripping the trailing whitespace from all the rules
   1360     // (this is all the whitespace follwing each semicolon in the
   1361     // description).  This allows us to look for rule-set boundaries
   1362     // by searching for ";%" without having to worry about whitespace
   1363     // between the ; and the %
   1364     stripWhitespace(description);
   1365 
   1366     // check to see if there's a set of lenient-parse rules.  If there
   1367     // is, pull them out into our temporary holding place for them,
   1368     // and delete them from the description before the real desciption-
   1369     // parsing code sees them
   1370     int32_t lp = description.indexOf(gLenientParse, -1, 0);
   1371     if (lp != -1) {
   1372         // we've got to make sure we're not in the middle of a rule
   1373         // (where "%%lenient-parse" would actually get treated as
   1374         // rule text)
   1375         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
   1376             // locate the beginning and end of the actual collation
   1377             // rules (there may be whitespace between the name and
   1378             // the first token in the description)
   1379             int lpEnd = description.indexOf(gSemiPercent, 2, lp);
   1380 
   1381             if (lpEnd == -1) {
   1382                 lpEnd = description.length() - 1;
   1383             }
   1384             int lpStart = lp + u_strlen(gLenientParse);
   1385             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
   1386                 ++lpStart;
   1387             }
   1388 
   1389             // copy out the lenient-parse rules and delete them
   1390             // from the description
   1391             lenientParseRules = new UnicodeString();
   1392             /* test for NULL */
   1393             if (lenientParseRules == 0) {
   1394                 status = U_MEMORY_ALLOCATION_ERROR;
   1395                 return;
   1396             }
   1397             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
   1398 
   1399             description.remove(lp, lpEnd + 1 - lp);
   1400         }
   1401     }
   1402 
   1403     // pre-flight parsing the description and count the number of
   1404     // rule sets (";%" marks the end of one rule set and the beginning
   1405     // of the next)
   1406     numRuleSets = 0;
   1407     for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
   1408         ++numRuleSets;
   1409         ++p;
   1410     }
   1411     ++numRuleSets;
   1412 
   1413     // our rule list is an array of the appropriate size
   1414     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
   1415     /* test for NULL */
   1416     if (ruleSets == 0) {
   1417         status = U_MEMORY_ALLOCATION_ERROR;
   1418         return;
   1419     }
   1420 
   1421     for (int i = 0; i <= numRuleSets; ++i) {
   1422         ruleSets[i] = NULL;
   1423     }
   1424 
   1425     // divide up the descriptions into individual rule-set descriptions
   1426     // and store them in a temporary array.  At each step, we also
   1427     // new up a rule set, but all this does is initialize its name
   1428     // and remove it from its description.  We can't actually parse
   1429     // the rest of the descriptions and finish initializing everything
   1430     // because we have to know the names and locations of all the rule
   1431     // sets before we can actually set everything up
   1432     if(!numRuleSets) {
   1433         status = U_ILLEGAL_ARGUMENT_ERROR;
   1434         return;
   1435     }
   1436 
   1437     ruleSetDescriptions = new UnicodeString[numRuleSets];
   1438     if (ruleSetDescriptions == 0) {
   1439         status = U_MEMORY_ALLOCATION_ERROR;
   1440         return;
   1441     }
   1442 
   1443     {
   1444         int curRuleSet = 0;
   1445         int32_t start = 0;
   1446         for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
   1447             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
   1448             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1449             if (ruleSets[curRuleSet] == 0) {
   1450                 status = U_MEMORY_ALLOCATION_ERROR;
   1451                 return;
   1452             }
   1453             ++curRuleSet;
   1454             start = p + 1;
   1455         }
   1456         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
   1457         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1458         if (ruleSets[curRuleSet] == 0) {
   1459             status = U_MEMORY_ALLOCATION_ERROR;
   1460             return;
   1461         }
   1462     }
   1463 
   1464     // now we can take note of the formatter's default rule set, which
   1465     // is the last public rule set in the description (it's the last
   1466     // rather than the first so that a user can create a new formatter
   1467     // from an existing formatter and change its default behavior just
   1468     // by appending more rule sets to the end)
   1469 
   1470     // {dlf} Initialization of a fraction rule set requires the default rule
   1471     // set to be known.  For purposes of initialization, this is always the
   1472     // last public rule set, no matter what the localization data says.
   1473     initDefaultRuleSet();
   1474 
   1475     // finally, we can go back through the temporary descriptions
   1476     // list and finish seting up the substructure (and we throw
   1477     // away the temporary descriptions as we go)
   1478     {
   1479         for (int i = 0; i < numRuleSets; i++) {
   1480             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
   1481         }
   1482     }
   1483 
   1484     // Now that the rules are initialized, the 'real' default rule
   1485     // set can be adjusted by the localization data.
   1486 
   1487     // The C code keeps the localization array as is, rather than building
   1488     // a separate array of the public rule set names, so we have less work
   1489     // to do here-- but we still need to check the names.
   1490 
   1491     if (localizationInfos) {
   1492         // confirm the names, if any aren't in the rules, that's an error
   1493         // it is ok if the rules contain public rule sets that are not in this list
   1494         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
   1495             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
   1496             NFRuleSet* rs = findRuleSet(name, status);
   1497             if (rs == NULL) {
   1498                 break; // error
   1499             }
   1500             if (i == 0) {
   1501                 defaultRuleSet = rs;
   1502             }
   1503         }
   1504     } else {
   1505         defaultRuleSet = getDefaultRuleSet();
   1506     }
   1507     originalDescription = rules;
   1508 }
   1509 
   1510 // override the NumberFormat implementation in order to
   1511 // lazily initialize relevant items
   1512 void
   1513 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
   1514 {
   1515     NumberFormat::setContext(value, status);
   1516     if (U_SUCCESS(status)) {
   1517     	if (!capitalizationInfoSet &&
   1518     	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
   1519     	    initCapitalizationContextInfo(locale);
   1520     	    capitalizationInfoSet = TRUE;
   1521         }
   1522 #if !UCONFIG_NO_BREAK_ITERATION
   1523         if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1524                 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
   1525                 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
   1526             UErrorCode status = U_ZERO_ERROR;
   1527             capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
   1528             if (U_FAILURE(status)) {
   1529                 delete capitalizationBrkIter;
   1530                 capitalizationBrkIter = NULL;
   1531             }
   1532         }
   1533 #endif
   1534     }
   1535 }
   1536 
   1537 void
   1538 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
   1539 {
   1540 #if !UCONFIG_NO_BREAK_ITERATION
   1541     const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
   1542     UErrorCode status = U_ZERO_ERROR;
   1543     UResourceBundle *rb = ures_open(NULL, localeID, &status);
   1544     rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
   1545     rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
   1546     if (U_SUCCESS(status) && rb != NULL) {
   1547         int32_t len = 0;
   1548         const int32_t * intVector = ures_getIntVector(rb, &len, &status);
   1549         if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
   1550             capitalizationForUIListMenu = intVector[0];
   1551             capitalizationForStandAlone = intVector[1];
   1552         }
   1553     }
   1554     ures_close(rb);
   1555 #endif
   1556 }
   1557 
   1558 void
   1559 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
   1560 {
   1561     // iterate through the characters...
   1562     UnicodeString result;
   1563 
   1564     int start = 0;
   1565     while (start != -1 && start < description.length()) {
   1566         // seek to the first non-whitespace character...
   1567         while (start < description.length()
   1568             && PatternProps::isWhiteSpace(description.charAt(start))) {
   1569             ++start;
   1570         }
   1571 
   1572         // locate the next semicolon in the text and copy the text from
   1573         // our current position up to that semicolon into the result
   1574         int32_t p = description.indexOf(gSemiColon, start);
   1575         if (p == -1) {
   1576             // or if we don't find a semicolon, just copy the rest of
   1577             // the string into the result
   1578             result.append(description, start, description.length() - start);
   1579             start = -1;
   1580         }
   1581         else if (p < description.length()) {
   1582             result.append(description, start, p + 1 - start);
   1583             start = p + 1;
   1584         }
   1585 
   1586         // when we get here, we've seeked off the end of the sring, and
   1587         // we terminate the loop (we continue until *start* is -1 rather
   1588         // than until *p* is -1, because otherwise we'd miss the last
   1589         // rule in the description)
   1590         else {
   1591             start = -1;
   1592         }
   1593     }
   1594 
   1595     description.setTo(result);
   1596 }
   1597 
   1598 
   1599 void
   1600 RuleBasedNumberFormat::dispose()
   1601 {
   1602     if (ruleSets) {
   1603         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1604             delete *p;
   1605         }
   1606         uprv_free(ruleSets);
   1607         ruleSets = NULL;
   1608     }
   1609 
   1610     if (ruleSetDescriptions) {
   1611         delete [] ruleSetDescriptions;
   1612     }
   1613 
   1614 #if !UCONFIG_NO_COLLATION
   1615     delete collator;
   1616 #endif
   1617     collator = NULL;
   1618 
   1619     delete decimalFormatSymbols;
   1620     decimalFormatSymbols = NULL;
   1621 
   1622     delete lenientParseRules;
   1623     lenientParseRules = NULL;
   1624 
   1625 #if !UCONFIG_NO_BREAK_ITERATION
   1626    delete capitalizationBrkIter;
   1627    capitalizationBrkIter = NULL;
   1628 #endif
   1629 
   1630     if (localizations) localizations = localizations->unref();
   1631 }
   1632 
   1633 
   1634 //-----------------------------------------------------------------------
   1635 // package-internal API
   1636 //-----------------------------------------------------------------------
   1637 
   1638 /**
   1639  * Returns the collator to use for lenient parsing.  The collator is lazily created:
   1640  * this function creates it the first time it's called.
   1641  * @return The collator to use for lenient parsing, or null if lenient parsing
   1642  * is turned off.
   1643 */
   1644 const RuleBasedCollator*
   1645 RuleBasedNumberFormat::getCollator() const
   1646 {
   1647 #if !UCONFIG_NO_COLLATION
   1648     if (!ruleSets) {
   1649         return NULL;
   1650     }
   1651 
   1652     // lazy-evaluate the collator
   1653     if (collator == NULL && lenient) {
   1654         // create a default collator based on the formatter's locale,
   1655         // then pull out that collator's rules, append any additional
   1656         // rules specified in the description, and create a _new_
   1657         // collator based on the combinaiton of those rules
   1658 
   1659         UErrorCode status = U_ZERO_ERROR;
   1660 
   1661         Collator* temp = Collator::createInstance(locale, status);
   1662         RuleBasedCollator* newCollator;
   1663         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
   1664             if (lenientParseRules) {
   1665                 UnicodeString rules(newCollator->getRules());
   1666                 rules.append(*lenientParseRules);
   1667 
   1668                 newCollator = new RuleBasedCollator(rules, status);
   1669                 // Exit if newCollator could not be created.
   1670                 if (newCollator == NULL) {
   1671                     return NULL;
   1672                 }
   1673             } else {
   1674                 temp = NULL;
   1675             }
   1676             if (U_SUCCESS(status)) {
   1677                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
   1678                 // cast away const
   1679                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
   1680             } else {
   1681                 delete newCollator;
   1682             }
   1683         }
   1684         delete temp;
   1685     }
   1686 #endif
   1687 
   1688     // if lenient-parse mode is off, this will be null
   1689     // (see setLenientParseMode())
   1690     return collator;
   1691 }
   1692 
   1693 
   1694 /**
   1695  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
   1696  * instances owned by this formatter.  This object is lazily created: this function
   1697  * creates it the first time it's called.
   1698  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
   1699  * instances owned by this formatter.
   1700 */
   1701 DecimalFormatSymbols*
   1702 RuleBasedNumberFormat::getDecimalFormatSymbols() const
   1703 {
   1704     // lazy-evaluate the DecimalFormatSymbols object.  This object
   1705     // is shared by all DecimalFormat instances belonging to this
   1706     // formatter
   1707     if (decimalFormatSymbols == NULL) {
   1708         UErrorCode status = U_ZERO_ERROR;
   1709         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
   1710         if (U_SUCCESS(status)) {
   1711             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
   1712         } else {
   1713             delete temp;
   1714         }
   1715     }
   1716     return decimalFormatSymbols;
   1717 }
   1718 
   1719 // De-owning the current localized symbols and adopt the new symbols.
   1720 void
   1721 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
   1722 {
   1723     if (symbolsToAdopt == NULL) {
   1724         return; // do not allow caller to set decimalFormatSymbols to NULL
   1725     }
   1726 
   1727     if (decimalFormatSymbols != NULL) {
   1728         delete decimalFormatSymbols;
   1729     }
   1730 
   1731     decimalFormatSymbols = symbolsToAdopt;
   1732 
   1733     {
   1734         // Apply the new decimalFormatSymbols by reparsing the rulesets
   1735         UErrorCode status = U_ZERO_ERROR;
   1736 
   1737         for (int32_t i = 0; i < numRuleSets; i++) {
   1738             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
   1739         }
   1740     }
   1741 }
   1742 
   1743 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
   1744 void
   1745 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
   1746 {
   1747     adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
   1748 }
   1749 
   1750 PluralFormat *
   1751 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
   1752                                           const UnicodeString &pattern,
   1753                                           UErrorCode& status) const
   1754 {
   1755     return new PluralFormat(locale, pluralType, pattern, status);
   1756 }
   1757 
   1758 U_NAMESPACE_END
   1759 
   1760 /* U_HAVE_RBNF */
   1761 #endif
   1762