Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1997-2015, International Business Machines Corporation
      4 * and others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 #include "utypeinfo.h"  // for 'typeid' to work
     10 
     11 #include "unicode/rbnf.h"
     12 
     13 #if U_HAVE_RBNF
     14 
     15 #include "unicode/normlzr.h"
     16 #include "unicode/plurfmt.h"
     17 #include "unicode/tblcoll.h"
     18 #include "unicode/uchar.h"
     19 #include "unicode/ucol.h"
     20 #include "unicode/uloc.h"
     21 #include "unicode/unum.h"
     22 #include "unicode/ures.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/utf16.h"
     25 #include "unicode/udata.h"
     26 #include "unicode/udisplaycontext.h"
     27 #include "unicode/brkiter.h"
     28 #include "nfrs.h"
     29 
     30 #include "cmemory.h"
     31 #include "cstring.h"
     32 #include "patternprops.h"
     33 #include "uresimp.h"
     34 
     35 // debugging
     36 // #define RBNF_DEBUG
     37 
     38 #ifdef RBNF_DEBUG
     39 #include <stdio.h>
     40 #endif
     41 
     42 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
     43 
     44 static const UChar gPercentPercent[] =
     45 {
     46     0x25, 0x25, 0
     47 }; /* "%%" */
     48 
     49 // All urbnf objects are created through openRules, so we init all of the
     50 // Unicode string constants required by rbnf, nfrs, or nfr here.
     51 static const UChar gLenientParse[] =
     52 {
     53     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
     54 }; /* "%%lenient-parse:" */
     55 static const UChar gSemiColon = 0x003B;
     56 static const UChar gSemiPercent[] =
     57 {
     58     0x3B, 0x25, 0
     59 }; /* ";%" */
     60 
     61 #define kSomeNumberOfBitsDiv2 22
     62 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
     63 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
     64 
     65 U_NAMESPACE_BEGIN
     66 
     67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
     68 
     69 /*
     70 This is a utility class. It does not use ICU's RTTI.
     71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
     72 Please make sure that intltest passes on Windows in Release mode,
     73 since the string pooling per compilation unit will mess up how RTTI works.
     74 The RTTI code was also removed due to lack of code coverage.
     75 */
     76 class LocalizationInfo : public UMemory {
     77 protected:
     78     virtual ~LocalizationInfo();
     79     uint32_t refcount;
     80 
     81 public:
     82     LocalizationInfo() : refcount(0) {}
     83 
     84     LocalizationInfo* ref(void) {
     85         ++refcount;
     86         return this;
     87     }
     88 
     89     LocalizationInfo* unref(void) {
     90         if (refcount && --refcount == 0) {
     91             delete this;
     92         }
     93         return NULL;
     94     }
     95 
     96     virtual UBool operator==(const LocalizationInfo* rhs) const;
     97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
     98 
     99     virtual int32_t getNumberOfRuleSets(void) const = 0;
    100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
    101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
    102     virtual const UChar* getLocaleName(int32_t index) const = 0;
    103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
    104 
    105     virtual int32_t indexForLocale(const UChar* locale) const;
    106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
    107 
    108 //    virtual UClassID getDynamicClassID() const = 0;
    109 //    static UClassID getStaticClassID(void);
    110 };
    111 
    112 LocalizationInfo::~LocalizationInfo() {}
    113 
    114 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
    115 
    116 // if both strings are NULL, this returns TRUE
    117 static UBool
    118 streq(const UChar* lhs, const UChar* rhs) {
    119     if (rhs == lhs) {
    120         return TRUE;
    121     }
    122     if (lhs && rhs) {
    123         return u_strcmp(lhs, rhs) == 0;
    124     }
    125     return FALSE;
    126 }
    127 
    128 UBool
    129 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
    130     if (rhs) {
    131         if (this == rhs) {
    132             return TRUE;
    133         }
    134 
    135         int32_t rsc = getNumberOfRuleSets();
    136         if (rsc == rhs->getNumberOfRuleSets()) {
    137             for (int i = 0; i < rsc; ++i) {
    138                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
    139                     return FALSE;
    140                 }
    141             }
    142             int32_t dlc = getNumberOfDisplayLocales();
    143             if (dlc == rhs->getNumberOfDisplayLocales()) {
    144                 for (int i = 0; i < dlc; ++i) {
    145                     const UChar* locale = getLocaleName(i);
    146                     int32_t ix = rhs->indexForLocale(locale);
    147                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
    148                     if (!streq(locale, rhs->getLocaleName(ix))) {
    149                         return FALSE;
    150                     }
    151                     for (int j = 0; j < rsc; ++j) {
    152                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
    153                             return FALSE;
    154                         }
    155                     }
    156                 }
    157                 return TRUE;
    158             }
    159         }
    160     }
    161     return FALSE;
    162 }
    163 
    164 int32_t
    165 LocalizationInfo::indexForLocale(const UChar* locale) const {
    166     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
    167         if (streq(locale, getLocaleName(i))) {
    168             return i;
    169         }
    170     }
    171     return -1;
    172 }
    173 
    174 int32_t
    175 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
    176     if (ruleset) {
    177         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
    178             if (streq(ruleset, getRuleSetName(i))) {
    179                 return i;
    180             }
    181         }
    182     }
    183     return -1;
    184 }
    185 
    186 
    187 typedef void (*Fn_Deleter)(void*);
    188 
    189 class VArray {
    190     void** buf;
    191     int32_t cap;
    192     int32_t size;
    193     Fn_Deleter deleter;
    194 public:
    195     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
    196 
    197     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
    198 
    199     ~VArray() {
    200         if (deleter) {
    201             for (int i = 0; i < size; ++i) {
    202                 (*deleter)(buf[i]);
    203             }
    204         }
    205         uprv_free(buf);
    206     }
    207 
    208     int32_t length() {
    209         return size;
    210     }
    211 
    212     void add(void* elem, UErrorCode& status) {
    213         if (U_SUCCESS(status)) {
    214             if (size == cap) {
    215                 if (cap == 0) {
    216                     cap = 1;
    217                 } else if (cap < 256) {
    218                     cap *= 2;
    219                 } else {
    220                     cap += 256;
    221                 }
    222                 if (buf == NULL) {
    223                     buf = (void**)uprv_malloc(cap * sizeof(void*));
    224                 } else {
    225                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
    226                 }
    227                 if (buf == NULL) {
    228                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
    229                     status = U_MEMORY_ALLOCATION_ERROR;
    230                     return;
    231                 }
    232                 void* start = &buf[size];
    233                 size_t count = (cap - size) * sizeof(void*);
    234                 uprv_memset(start, 0, count); // fill with nulls, just because
    235             }
    236             buf[size++] = elem;
    237         }
    238     }
    239 
    240     void** release(void) {
    241         void** result = buf;
    242         buf = NULL;
    243         cap = 0;
    244         size = 0;
    245         return result;
    246     }
    247 };
    248 
    249 class LocDataParser;
    250 
    251 class StringLocalizationInfo : public LocalizationInfo {
    252     UChar* info;
    253     UChar*** data;
    254     int32_t numRuleSets;
    255     int32_t numLocales;
    256 
    257 friend class LocDataParser;
    258 
    259     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
    260         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
    261     {
    262     }
    263 
    264 public:
    265     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
    266 
    267     virtual ~StringLocalizationInfo();
    268     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
    269     virtual const UChar* getRuleSetName(int32_t index) const;
    270     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
    271     virtual const UChar* getLocaleName(int32_t index) const;
    272     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
    273 
    274 //    virtual UClassID getDynamicClassID() const;
    275 //    static UClassID getStaticClassID(void);
    276 
    277 private:
    278     void init(UErrorCode& status) const;
    279 };
    280 
    281 
    282 enum {
    283     OPEN_ANGLE = 0x003c, /* '<' */
    284     CLOSE_ANGLE = 0x003e, /* '>' */
    285     COMMA = 0x002c,
    286     TICK = 0x0027,
    287     QUOTE = 0x0022,
    288     SPACE = 0x0020
    289 };
    290 
    291 /**
    292  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
    293  */
    294 class LocDataParser {
    295     UChar* data;
    296     const UChar* e;
    297     UChar* p;
    298     UChar ch;
    299     UParseError& pe;
    300     UErrorCode& ec;
    301 
    302 public:
    303     LocDataParser(UParseError& parseError, UErrorCode& status)
    304         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
    305     ~LocDataParser() {}
    306 
    307     /*
    308     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
    309     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
    310     */
    311     StringLocalizationInfo* parse(UChar* data, int32_t len);
    312 
    313 private:
    314 
    315     void inc(void) { ++p; ch = 0xffff; }
    316     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
    317     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
    318     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
    319     UBool inList(UChar c, const UChar* list) const {
    320         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
    321         while (*list && *list != c) ++list; return *list == c;
    322     }
    323     void parseError(const char* msg);
    324 
    325     StringLocalizationInfo* doParse(void);
    326 
    327     UChar** nextArray(int32_t& requiredLength);
    328     UChar*  nextString(void);
    329 };
    330 
    331 #ifdef RBNF_DEBUG
    332 #define ERROR(msg) parseError(msg); return NULL;
    333 #define EXPLANATION_ARG explanationArg
    334 #else
    335 #define ERROR(msg) parseError(NULL); return NULL;
    336 #define EXPLANATION_ARG
    337 #endif
    338 
    339 
    340 static const UChar DQUOTE_STOPLIST[] = {
    341     QUOTE, 0
    342 };
    343 
    344 static const UChar SQUOTE_STOPLIST[] = {
    345     TICK, 0
    346 };
    347 
    348 static const UChar NOQUOTE_STOPLIST[] = {
    349     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
    350 };
    351 
    352 static void
    353 DeleteFn(void* p) {
    354   uprv_free(p);
    355 }
    356 
    357 StringLocalizationInfo*
    358 LocDataParser::parse(UChar* _data, int32_t len) {
    359     if (U_FAILURE(ec)) {
    360         if (_data) uprv_free(_data);
    361         return NULL;
    362     }
    363 
    364     pe.line = 0;
    365     pe.offset = -1;
    366     pe.postContext[0] = 0;
    367     pe.preContext[0] = 0;
    368 
    369     if (_data == NULL) {
    370         ec = U_ILLEGAL_ARGUMENT_ERROR;
    371         return NULL;
    372     }
    373 
    374     if (len <= 0) {
    375         ec = U_ILLEGAL_ARGUMENT_ERROR;
    376         uprv_free(_data);
    377         return NULL;
    378     }
    379 
    380     data = _data;
    381     e = data + len;
    382     p = _data;
    383     ch = 0xffff;
    384 
    385     return doParse();
    386 }
    387 
    388 
    389 StringLocalizationInfo*
    390 LocDataParser::doParse(void) {
    391     skipWhitespace();
    392     if (!checkInc(OPEN_ANGLE)) {
    393         ERROR("Missing open angle");
    394     } else {
    395         VArray array(DeleteFn);
    396         UBool mightHaveNext = TRUE;
    397         int32_t requiredLength = -1;
    398         while (mightHaveNext) {
    399             mightHaveNext = FALSE;
    400             UChar** elem = nextArray(requiredLength);
    401             skipWhitespace();
    402             UBool haveComma = check(COMMA);
    403             if (elem) {
    404                 array.add(elem, ec);
    405                 if (haveComma) {
    406                     inc();
    407                     mightHaveNext = TRUE;
    408                 }
    409             } else if (haveComma) {
    410                 ERROR("Unexpected character");
    411             }
    412         }
    413 
    414         skipWhitespace();
    415         if (!checkInc(CLOSE_ANGLE)) {
    416             if (check(OPEN_ANGLE)) {
    417                 ERROR("Missing comma in outer array");
    418             } else {
    419                 ERROR("Missing close angle bracket in outer array");
    420             }
    421         }
    422 
    423         skipWhitespace();
    424         if (p != e) {
    425             ERROR("Extra text after close of localization data");
    426         }
    427 
    428         array.add(NULL, ec);
    429         if (U_SUCCESS(ec)) {
    430             int32_t numLocs = array.length() - 2; // subtract first, NULL
    431             UChar*** result = (UChar***)array.release();
    432 
    433             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
    434         }
    435     }
    436 
    437     ERROR("Unknown error");
    438 }
    439 
    440 UChar**
    441 LocDataParser::nextArray(int32_t& requiredLength) {
    442     if (U_FAILURE(ec)) {
    443         return NULL;
    444     }
    445 
    446     skipWhitespace();
    447     if (!checkInc(OPEN_ANGLE)) {
    448         ERROR("Missing open angle");
    449     }
    450 
    451     VArray array;
    452     UBool mightHaveNext = TRUE;
    453     while (mightHaveNext) {
    454         mightHaveNext = FALSE;
    455         UChar* elem = nextString();
    456         skipWhitespace();
    457         UBool haveComma = check(COMMA);
    458         if (elem) {
    459             array.add(elem, ec);
    460             if (haveComma) {
    461                 inc();
    462                 mightHaveNext = TRUE;
    463             }
    464         } else if (haveComma) {
    465             ERROR("Unexpected comma");
    466         }
    467     }
    468     skipWhitespace();
    469     if (!checkInc(CLOSE_ANGLE)) {
    470         if (check(OPEN_ANGLE)) {
    471             ERROR("Missing close angle bracket in inner array");
    472         } else {
    473             ERROR("Missing comma in inner array");
    474         }
    475     }
    476 
    477     array.add(NULL, ec);
    478     if (U_SUCCESS(ec)) {
    479         if (requiredLength == -1) {
    480             requiredLength = array.length() + 1;
    481         } else if (array.length() != requiredLength) {
    482             ec = U_ILLEGAL_ARGUMENT_ERROR;
    483             ERROR("Array not of required length");
    484         }
    485 
    486         return (UChar**)array.release();
    487     }
    488     ERROR("Unknown Error");
    489 }
    490 
    491 UChar*
    492 LocDataParser::nextString() {
    493     UChar* result = NULL;
    494 
    495     skipWhitespace();
    496     if (p < e) {
    497         const UChar* terminators;
    498         UChar c = *p;
    499         UBool haveQuote = c == QUOTE || c == TICK;
    500         if (haveQuote) {
    501             inc();
    502             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
    503         } else {
    504             terminators = NOQUOTE_STOPLIST;
    505         }
    506         UChar* start = p;
    507         while (p < e && !inList(*p, terminators)) ++p;
    508         if (p == e) {
    509             ERROR("Unexpected end of data");
    510         }
    511 
    512         UChar x = *p;
    513         if (p > start) {
    514             ch = x;
    515             *p = 0x0; // terminate by writing to data
    516             result = start; // just point into data
    517         }
    518         if (haveQuote) {
    519             if (x != c) {
    520                 ERROR("Missing matching quote");
    521             } else if (p == start) {
    522                 ERROR("Empty string");
    523             }
    524             inc();
    525         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
    526             ERROR("Unexpected character in string");
    527         }
    528     }
    529 
    530     // ok for there to be no next string
    531     return result;
    532 }
    533 
    534 void LocDataParser::parseError(const char* EXPLANATION_ARG)
    535 {
    536     if (!data) {
    537         return;
    538     }
    539 
    540     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
    541     if (start < data) {
    542         start = data;
    543     }
    544     for (UChar* x = p; --x >= start;) {
    545         if (!*x) {
    546             start = x+1;
    547             break;
    548         }
    549     }
    550     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
    551     if (limit > e) {
    552         limit = e;
    553     }
    554     u_strncpy(pe.preContext, start, (int32_t)(p-start));
    555     pe.preContext[p-start] = 0;
    556     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
    557     pe.postContext[limit-p] = 0;
    558     pe.offset = (int32_t)(p - data);
    559 
    560 #ifdef RBNF_DEBUG
    561     fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
    562 
    563     UnicodeString msg;
    564     msg.append(start, p - start);
    565     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
    566     msg.append(p, limit-p);
    567     msg.append(UNICODE_STRING_SIMPLE("'"));
    568 
    569     char buf[128];
    570     int32_t len = msg.extract(0, msg.length(), buf, 128);
    571     if (len >= 128) {
    572         buf[127] = 0;
    573     } else {
    574         buf[len] = 0;
    575     }
    576     fprintf(stderr, "%s\n", buf);
    577     fflush(stderr);
    578 #endif
    579 
    580     uprv_free(data);
    581     data = NULL;
    582     p = NULL;
    583     e = NULL;
    584 
    585     if (U_SUCCESS(ec)) {
    586         ec = U_PARSE_ERROR;
    587     }
    588 }
    589 
    590 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
    591 
    592 StringLocalizationInfo*
    593 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
    594     if (U_FAILURE(status)) {
    595         return NULL;
    596     }
    597 
    598     int32_t len = info.length();
    599     if (len == 0) {
    600         return NULL; // no error;
    601     }
    602 
    603     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
    604     if (!p) {
    605         status = U_MEMORY_ALLOCATION_ERROR;
    606         return NULL;
    607     }
    608     info.extract(p, len, status);
    609     if (!U_FAILURE(status)) {
    610         status = U_ZERO_ERROR; // clear warning about non-termination
    611     }
    612 
    613     LocDataParser parser(perror, status);
    614     return parser.parse(p, len);
    615 }
    616 
    617 StringLocalizationInfo::~StringLocalizationInfo() {
    618     for (UChar*** p = (UChar***)data; *p; ++p) {
    619         // remaining data is simply pointer into our unicode string data.
    620         if (*p) uprv_free(*p);
    621     }
    622     if (data) uprv_free(data);
    623     if (info) uprv_free(info);
    624 }
    625 
    626 
    627 const UChar*
    628 StringLocalizationInfo::getRuleSetName(int32_t index) const {
    629     if (index >= 0 && index < getNumberOfRuleSets()) {
    630         return data[0][index];
    631     }
    632     return NULL;
    633 }
    634 
    635 const UChar*
    636 StringLocalizationInfo::getLocaleName(int32_t index) const {
    637     if (index >= 0 && index < getNumberOfDisplayLocales()) {
    638         return data[index+1][0];
    639     }
    640     return NULL;
    641 }
    642 
    643 const UChar*
    644 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
    645     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
    646         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
    647         return data[localeIndex+1][ruleIndex+1];
    648     }
    649     return NULL;
    650 }
    651 
    652 // ----------
    653 
    654 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    655                                              const UnicodeString& locs,
    656                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    657   : ruleSets(NULL)
    658   , ruleSetDescriptions(NULL)
    659   , numRuleSets(0)
    660   , defaultRuleSet(NULL)
    661   , locale(alocale)
    662   , collator(NULL)
    663   , decimalFormatSymbols(NULL)
    664   , defaultInfinityRule(NULL)
    665   , defaultNaNRule(NULL)
    666   , lenient(FALSE)
    667   , lenientParseRules(NULL)
    668   , localizations(NULL)
    669   , capitalizationInfoSet(FALSE)
    670   , capitalizationForUIListMenu(FALSE)
    671   , capitalizationForStandAlone(FALSE)
    672   , capitalizationBrkIter(NULL)
    673 {
    674   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    675   init(description, locinfo, perror, status);
    676 }
    677 
    678 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    679                                              const UnicodeString& locs,
    680                                              UParseError& perror, UErrorCode& status)
    681   : ruleSets(NULL)
    682   , ruleSetDescriptions(NULL)
    683   , numRuleSets(0)
    684   , defaultRuleSet(NULL)
    685   , locale(Locale::getDefault())
    686   , collator(NULL)
    687   , decimalFormatSymbols(NULL)
    688   , defaultInfinityRule(NULL)
    689   , defaultNaNRule(NULL)
    690   , lenient(FALSE)
    691   , lenientParseRules(NULL)
    692   , localizations(NULL)
    693   , capitalizationInfoSet(FALSE)
    694   , capitalizationForUIListMenu(FALSE)
    695   , capitalizationForStandAlone(FALSE)
    696   , capitalizationBrkIter(NULL)
    697 {
    698   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    699   init(description, locinfo, perror, status);
    700 }
    701 
    702 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    703                                              LocalizationInfo* info,
    704                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    705   : ruleSets(NULL)
    706   , ruleSetDescriptions(NULL)
    707   , numRuleSets(0)
    708   , defaultRuleSet(NULL)
    709   , locale(alocale)
    710   , collator(NULL)
    711   , decimalFormatSymbols(NULL)
    712   , defaultInfinityRule(NULL)
    713   , defaultNaNRule(NULL)
    714   , lenient(FALSE)
    715   , lenientParseRules(NULL)
    716   , localizations(NULL)
    717   , capitalizationInfoSet(FALSE)
    718   , capitalizationForUIListMenu(FALSE)
    719   , capitalizationForStandAlone(FALSE)
    720   , capitalizationBrkIter(NULL)
    721 {
    722   init(description, info, perror, status);
    723 }
    724 
    725 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    726                          UParseError& perror,
    727                          UErrorCode& status)
    728   : ruleSets(NULL)
    729   , ruleSetDescriptions(NULL)
    730   , numRuleSets(0)
    731   , defaultRuleSet(NULL)
    732   , locale(Locale::getDefault())
    733   , collator(NULL)
    734   , decimalFormatSymbols(NULL)
    735   , defaultInfinityRule(NULL)
    736   , defaultNaNRule(NULL)
    737   , lenient(FALSE)
    738   , lenientParseRules(NULL)
    739   , localizations(NULL)
    740   , capitalizationInfoSet(FALSE)
    741   , capitalizationForUIListMenu(FALSE)
    742   , capitalizationForStandAlone(FALSE)
    743   , capitalizationBrkIter(NULL)
    744 {
    745     init(description, NULL, perror, status);
    746 }
    747 
    748 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    749                          const Locale& aLocale,
    750                          UParseError& perror,
    751                          UErrorCode& status)
    752   : ruleSets(NULL)
    753   , ruleSetDescriptions(NULL)
    754   , numRuleSets(0)
    755   , defaultRuleSet(NULL)
    756   , locale(aLocale)
    757   , collator(NULL)
    758   , decimalFormatSymbols(NULL)
    759   , defaultInfinityRule(NULL)
    760   , defaultNaNRule(NULL)
    761   , lenient(FALSE)
    762   , lenientParseRules(NULL)
    763   , localizations(NULL)
    764   , capitalizationInfoSet(FALSE)
    765   , capitalizationForUIListMenu(FALSE)
    766   , capitalizationForStandAlone(FALSE)
    767   , capitalizationBrkIter(NULL)
    768 {
    769     init(description, NULL, perror, status);
    770 }
    771 
    772 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
    773   : ruleSets(NULL)
    774   , ruleSetDescriptions(NULL)
    775   , numRuleSets(0)
    776   , defaultRuleSet(NULL)
    777   , locale(alocale)
    778   , collator(NULL)
    779   , decimalFormatSymbols(NULL)
    780   , defaultInfinityRule(NULL)
    781   , defaultNaNRule(NULL)
    782   , lenient(FALSE)
    783   , lenientParseRules(NULL)
    784   , localizations(NULL)
    785   , capitalizationInfoSet(FALSE)
    786   , capitalizationForUIListMenu(FALSE)
    787   , capitalizationForStandAlone(FALSE)
    788   , capitalizationBrkIter(NULL)
    789 {
    790     if (U_FAILURE(status)) {
    791         return;
    792     }
    793 
    794     const char* rules_tag = "RBNFRules";
    795     const char* fmt_tag = "";
    796     switch (tag) {
    797     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
    798     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
    799     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
    800     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
    801     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
    802     }
    803 
    804     // TODO: read localization info from resource
    805     LocalizationInfo* locinfo = NULL;
    806 
    807     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
    808     if (U_SUCCESS(status)) {
    809         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
    810                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
    811 
    812         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
    813         if (U_FAILURE(status)) {
    814             ures_close(nfrb);
    815         }
    816         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
    817         if (U_FAILURE(status)) {
    818             ures_close(rbnfRules);
    819             ures_close(nfrb);
    820             return;
    821         }
    822 
    823         UnicodeString desc;
    824         while (ures_hasNext(ruleSets)) {
    825            desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
    826         }
    827         UParseError perror;
    828 
    829         init(desc, locinfo, perror, status);
    830 
    831         ures_close(ruleSets);
    832         ures_close(rbnfRules);
    833     }
    834     ures_close(nfrb);
    835 }
    836 
    837 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
    838   : NumberFormat(rhs)
    839   , ruleSets(NULL)
    840   , ruleSetDescriptions(NULL)
    841   , numRuleSets(0)
    842   , defaultRuleSet(NULL)
    843   , locale(rhs.locale)
    844   , collator(NULL)
    845   , decimalFormatSymbols(NULL)
    846   , defaultInfinityRule(NULL)
    847   , defaultNaNRule(NULL)
    848   , lenient(FALSE)
    849   , lenientParseRules(NULL)
    850   , localizations(NULL)
    851   , capitalizationInfoSet(FALSE)
    852   , capitalizationForUIListMenu(FALSE)
    853   , capitalizationForStandAlone(FALSE)
    854   , capitalizationBrkIter(NULL)
    855 {
    856     this->operator=(rhs);
    857 }
    858 
    859 // --------
    860 
    861 RuleBasedNumberFormat&
    862 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
    863 {
    864     if (this == &rhs) {
    865         return *this;
    866     }
    867     NumberFormat::operator=(rhs);
    868     UErrorCode status = U_ZERO_ERROR;
    869     dispose();
    870     locale = rhs.locale;
    871     lenient = rhs.lenient;
    872 
    873     UParseError perror;
    874     setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
    875     init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
    876     setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
    877 
    878     capitalizationInfoSet = rhs.capitalizationInfoSet;
    879     capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
    880     capitalizationForStandAlone = rhs.capitalizationForStandAlone;
    881 #if !UCONFIG_NO_BREAK_ITERATION
    882     capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
    883 #endif
    884 
    885     return *this;
    886 }
    887 
    888 RuleBasedNumberFormat::~RuleBasedNumberFormat()
    889 {
    890     dispose();
    891 }
    892 
    893 Format*
    894 RuleBasedNumberFormat::clone(void) const
    895 {
    896     return new RuleBasedNumberFormat(*this);
    897 }
    898 
    899 UBool
    900 RuleBasedNumberFormat::operator==(const Format& other) const
    901 {
    902     if (this == &other) {
    903         return TRUE;
    904     }
    905 
    906     if (typeid(*this) == typeid(other)) {
    907         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
    908         // test for capitalization info equality is adequately handled
    909         // by the NumberFormat test for fCapitalizationContext equality;
    910         // the info here is just derived from that.
    911         if (locale == rhs.locale &&
    912             lenient == rhs.lenient &&
    913             (localizations == NULL
    914                 ? rhs.localizations == NULL
    915                 : (rhs.localizations == NULL
    916                     ? FALSE
    917                     : *localizations == rhs.localizations))) {
    918 
    919             NFRuleSet** p = ruleSets;
    920             NFRuleSet** q = rhs.ruleSets;
    921             if (p == NULL) {
    922                 return q == NULL;
    923             } else if (q == NULL) {
    924                 return FALSE;
    925             }
    926             while (*p && *q && (**p == **q)) {
    927                 ++p;
    928                 ++q;
    929             }
    930             return *q == NULL && *p == NULL;
    931         }
    932     }
    933 
    934     return FALSE;
    935 }
    936 
    937 UnicodeString
    938 RuleBasedNumberFormat::getRules() const
    939 {
    940     UnicodeString result;
    941     if (ruleSets != NULL) {
    942         for (NFRuleSet** p = ruleSets; *p; ++p) {
    943             (*p)->appendRules(result);
    944         }
    945     }
    946     return result;
    947 }
    948 
    949 UnicodeString
    950 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
    951 {
    952     if (localizations) {
    953         UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
    954         return string;
    955     }
    956     else if (ruleSets) {
    957         UnicodeString result;
    958         for (NFRuleSet** p = ruleSets; *p; ++p) {
    959             NFRuleSet* rs = *p;
    960             if (rs->isPublic()) {
    961                 if (--index == -1) {
    962                     rs->getName(result);
    963                     return result;
    964                 }
    965             }
    966         }
    967     }
    968     UnicodeString empty;
    969     return empty;
    970 }
    971 
    972 int32_t
    973 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
    974 {
    975     int32_t result = 0;
    976     if (localizations) {
    977         result = localizations->getNumberOfRuleSets();
    978     }
    979     else if (ruleSets) {
    980         for (NFRuleSet** p = ruleSets; *p; ++p) {
    981             if ((**p).isPublic()) {
    982                 ++result;
    983             }
    984         }
    985     }
    986     return result;
    987 }
    988 
    989 int32_t
    990 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
    991     if (localizations) {
    992         return localizations->getNumberOfDisplayLocales();
    993     }
    994     return 0;
    995 }
    996 
    997 Locale
    998 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
    999     if (U_FAILURE(status)) {
   1000         return Locale("");
   1001     }
   1002     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
   1003         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
   1004         char buffer[64];
   1005         int32_t cap = name.length() + 1;
   1006         char* bp = buffer;
   1007         if (cap > 64) {
   1008             bp = (char *)uprv_malloc(cap);
   1009             if (bp == NULL) {
   1010                 status = U_MEMORY_ALLOCATION_ERROR;
   1011                 return Locale("");
   1012             }
   1013         }
   1014         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
   1015         Locale retLocale(bp);
   1016         if (bp != buffer) {
   1017             uprv_free(bp);
   1018         }
   1019         return retLocale;
   1020     }
   1021     status = U_ILLEGAL_ARGUMENT_ERROR;
   1022     Locale retLocale;
   1023     return retLocale;
   1024 }
   1025 
   1026 UnicodeString
   1027 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
   1028     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
   1029         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
   1030         int32_t len = localeName.length();
   1031         UChar* localeStr = localeName.getBuffer(len + 1);
   1032         while (len >= 0) {
   1033             localeStr[len] = 0;
   1034             int32_t ix = localizations->indexForLocale(localeStr);
   1035             if (ix >= 0) {
   1036                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
   1037                 return name;
   1038             }
   1039 
   1040             // trim trailing portion, skipping over ommitted sections
   1041             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
   1042             while (len > 0 && localeStr[len-1] == 0x005F) --len;
   1043         }
   1044         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
   1045         return name;
   1046     }
   1047     UnicodeString bogus;
   1048     bogus.setToBogus();
   1049     return bogus;
   1050 }
   1051 
   1052 UnicodeString
   1053 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
   1054     if (localizations) {
   1055         UnicodeString rsn(ruleSetName);
   1056         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
   1057         return getRuleSetDisplayName(ix, localeParam);
   1058     }
   1059     UnicodeString bogus;
   1060     bogus.setToBogus();
   1061     return bogus;
   1062 }
   1063 
   1064 NFRuleSet*
   1065 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
   1066 {
   1067     if (U_SUCCESS(status) && ruleSets) {
   1068         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1069             NFRuleSet* rs = *p;
   1070             if (rs->isNamed(name)) {
   1071                 return rs;
   1072             }
   1073         }
   1074         status = U_ILLEGAL_ARGUMENT_ERROR;
   1075     }
   1076     return NULL;
   1077 }
   1078 
   1079 UnicodeString&
   1080 RuleBasedNumberFormat::format(int32_t number,
   1081                               UnicodeString& toAppendTo,
   1082                               FieldPosition& /* pos */) const
   1083 {
   1084     if (defaultRuleSet) {
   1085         UErrorCode status = U_ZERO_ERROR;
   1086         int32_t startPos = toAppendTo.length();
   1087         defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
   1088         adjustForCapitalizationContext(startPos, toAppendTo);
   1089     }
   1090     return toAppendTo;
   1091 }
   1092 
   1093 
   1094 UnicodeString&
   1095 RuleBasedNumberFormat::format(int64_t number,
   1096                               UnicodeString& toAppendTo,
   1097                               FieldPosition& /* pos */) const
   1098 {
   1099     if (defaultRuleSet) {
   1100         UErrorCode status = U_ZERO_ERROR;
   1101         int32_t startPos = toAppendTo.length();
   1102         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1103         adjustForCapitalizationContext(startPos, toAppendTo);
   1104     }
   1105     return toAppendTo;
   1106 }
   1107 
   1108 
   1109 UnicodeString&
   1110 RuleBasedNumberFormat::format(double number,
   1111                               UnicodeString& toAppendTo,
   1112                               FieldPosition& /* pos */) const
   1113 {
   1114     int32_t startPos = toAppendTo.length();
   1115     if (defaultRuleSet) {
   1116         UErrorCode status = U_ZERO_ERROR;
   1117         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1118     }
   1119     return adjustForCapitalizationContext(startPos, toAppendTo);
   1120 }
   1121 
   1122 
   1123 UnicodeString&
   1124 RuleBasedNumberFormat::format(int32_t number,
   1125                               const UnicodeString& ruleSetName,
   1126                               UnicodeString& toAppendTo,
   1127                               FieldPosition& /* pos */,
   1128                               UErrorCode& status) const
   1129 {
   1130     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
   1131     if (U_SUCCESS(status)) {
   1132         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1133             // throw new IllegalArgumentException("Can't use internal rule set");
   1134             status = U_ILLEGAL_ARGUMENT_ERROR;
   1135         } else {
   1136             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1137             if (rs) {
   1138                 int32_t startPos = toAppendTo.length();
   1139                 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
   1140                 adjustForCapitalizationContext(startPos, toAppendTo);
   1141             }
   1142         }
   1143     }
   1144     return toAppendTo;
   1145 }
   1146 
   1147 
   1148 UnicodeString&
   1149 RuleBasedNumberFormat::format(int64_t number,
   1150                               const UnicodeString& ruleSetName,
   1151                               UnicodeString& toAppendTo,
   1152                               FieldPosition& /* pos */,
   1153                               UErrorCode& status) const
   1154 {
   1155     if (U_SUCCESS(status)) {
   1156         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1157             // throw new IllegalArgumentException("Can't use internal rule set");
   1158             status = U_ILLEGAL_ARGUMENT_ERROR;
   1159         } else {
   1160             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1161             if (rs) {
   1162                 int32_t startPos = toAppendTo.length();
   1163                 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1164                 adjustForCapitalizationContext(startPos, toAppendTo);
   1165             }
   1166         }
   1167     }
   1168     return toAppendTo;
   1169 }
   1170 
   1171 
   1172 UnicodeString&
   1173 RuleBasedNumberFormat::format(double number,
   1174                               const UnicodeString& ruleSetName,
   1175                               UnicodeString& toAppendTo,
   1176                               FieldPosition& /* pos */,
   1177                               UErrorCode& status) const
   1178 {
   1179     if (U_SUCCESS(status)) {
   1180         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1181             // throw new IllegalArgumentException("Can't use internal rule set");
   1182             status = U_ILLEGAL_ARGUMENT_ERROR;
   1183         } else {
   1184             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1185             if (rs) {
   1186                 int32_t startPos = toAppendTo.length();
   1187                 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1188                 adjustForCapitalizationContext(startPos, toAppendTo);
   1189             }
   1190         }
   1191     }
   1192     return toAppendTo;
   1193 }
   1194 
   1195 UnicodeString&
   1196 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
   1197                                                       UnicodeString& currentResult) const
   1198 {
   1199 #if !UCONFIG_NO_BREAK_ITERATION
   1200     if (startPos==0 && currentResult.length() > 0) {
   1201         // capitalize currentResult according to context
   1202         UChar32 ch = currentResult.char32At(0);
   1203         UErrorCode status = U_ZERO_ERROR;
   1204         UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
   1205         if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
   1206               ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1207                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
   1208                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
   1209             // titlecase first word of currentResult, here use sentence iterator unlike current implementations
   1210             // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
   1211             currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
   1212         }
   1213     }
   1214 #endif
   1215     return currentResult;
   1216 }
   1217 
   1218 
   1219 void
   1220 RuleBasedNumberFormat::parse(const UnicodeString& text,
   1221                              Formattable& result,
   1222                              ParsePosition& parsePosition) const
   1223 {
   1224     if (!ruleSets) {
   1225         parsePosition.setErrorIndex(0);
   1226         return;
   1227     }
   1228 
   1229     UnicodeString workingText(text, parsePosition.getIndex());
   1230     ParsePosition workingPos(0);
   1231 
   1232     ParsePosition high_pp(0);
   1233     Formattable high_result;
   1234 
   1235     for (NFRuleSet** p = ruleSets; *p; ++p) {
   1236         NFRuleSet *rp = *p;
   1237         if (rp->isPublic() && rp->isParseable()) {
   1238             ParsePosition working_pp(0);
   1239             Formattable working_result;
   1240 
   1241             rp->parse(workingText, working_pp, kMaxDouble, working_result);
   1242             if (working_pp.getIndex() > high_pp.getIndex()) {
   1243                 high_pp = working_pp;
   1244                 high_result = working_result;
   1245 
   1246                 if (high_pp.getIndex() == workingText.length()) {
   1247                     break;
   1248                 }
   1249             }
   1250         }
   1251     }
   1252 
   1253     int32_t startIndex = parsePosition.getIndex();
   1254     parsePosition.setIndex(startIndex + high_pp.getIndex());
   1255     if (high_pp.getIndex() > 0) {
   1256         parsePosition.setErrorIndex(-1);
   1257     } else {
   1258         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
   1259         parsePosition.setErrorIndex(startIndex + errorIndex);
   1260     }
   1261     result = high_result;
   1262     if (result.getType() == Formattable::kDouble) {
   1263         int32_t r = (int32_t)result.getDouble();
   1264         if ((double)r == result.getDouble()) {
   1265             result.setLong(r);
   1266         }
   1267     }
   1268 }
   1269 
   1270 #if !UCONFIG_NO_COLLATION
   1271 
   1272 void
   1273 RuleBasedNumberFormat::setLenient(UBool enabled)
   1274 {
   1275     lenient = enabled;
   1276     if (!enabled && collator) {
   1277         delete collator;
   1278         collator = NULL;
   1279     }
   1280 }
   1281 
   1282 #endif
   1283 
   1284 void
   1285 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
   1286     if (U_SUCCESS(status)) {
   1287         if (ruleSetName.isEmpty()) {
   1288           if (localizations) {
   1289               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
   1290               defaultRuleSet = findRuleSet(name, status);
   1291           } else {
   1292             initDefaultRuleSet();
   1293           }
   1294         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
   1295             status = U_ILLEGAL_ARGUMENT_ERROR;
   1296         } else {
   1297             NFRuleSet* result = findRuleSet(ruleSetName, status);
   1298             if (result != NULL) {
   1299                 defaultRuleSet = result;
   1300             }
   1301         }
   1302     }
   1303 }
   1304 
   1305 UnicodeString
   1306 RuleBasedNumberFormat::getDefaultRuleSetName() const {
   1307     UnicodeString result;
   1308     if (defaultRuleSet && defaultRuleSet->isPublic()) {
   1309         defaultRuleSet->getName(result);
   1310     } else {
   1311         result.setToBogus();
   1312     }
   1313     return result;
   1314 }
   1315 
   1316 void
   1317 RuleBasedNumberFormat::initDefaultRuleSet()
   1318 {
   1319     defaultRuleSet = NULL;
   1320     if (!ruleSets) {
   1321         return;
   1322     }
   1323 
   1324     const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
   1325     const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
   1326     const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
   1327 
   1328     NFRuleSet**p = &ruleSets[0];
   1329     while (*p) {
   1330         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
   1331             defaultRuleSet = *p;
   1332             return;
   1333         } else {
   1334             ++p;
   1335         }
   1336     }
   1337 
   1338     defaultRuleSet = *--p;
   1339     if (!defaultRuleSet->isPublic()) {
   1340         while (p != ruleSets) {
   1341             if ((*--p)->isPublic()) {
   1342                 defaultRuleSet = *p;
   1343                 break;
   1344             }
   1345         }
   1346     }
   1347 }
   1348 
   1349 
   1350 void
   1351 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
   1352                             UParseError& pErr, UErrorCode& status)
   1353 {
   1354     // TODO: implement UParseError
   1355     uprv_memset(&pErr, 0, sizeof(UParseError));
   1356     // Note: this can leave ruleSets == NULL, so remaining code should check
   1357     if (U_FAILURE(status)) {
   1358         return;
   1359     }
   1360 
   1361     initializeDecimalFormatSymbols(status);
   1362     initializeDefaultInfinityRule(status);
   1363     initializeDefaultNaNRule(status);
   1364     if (U_FAILURE(status)) {
   1365         return;
   1366     }
   1367 
   1368     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
   1369 
   1370     UnicodeString description(rules);
   1371     if (!description.length()) {
   1372         status = U_MEMORY_ALLOCATION_ERROR;
   1373         return;
   1374     }
   1375 
   1376     // start by stripping the trailing whitespace from all the rules
   1377     // (this is all the whitespace follwing each semicolon in the
   1378     // description).  This allows us to look for rule-set boundaries
   1379     // by searching for ";%" without having to worry about whitespace
   1380     // between the ; and the %
   1381     stripWhitespace(description);
   1382 
   1383     // check to see if there's a set of lenient-parse rules.  If there
   1384     // is, pull them out into our temporary holding place for them,
   1385     // and delete them from the description before the real desciption-
   1386     // parsing code sees them
   1387     int32_t lp = description.indexOf(gLenientParse, -1, 0);
   1388     if (lp != -1) {
   1389         // we've got to make sure we're not in the middle of a rule
   1390         // (where "%%lenient-parse" would actually get treated as
   1391         // rule text)
   1392         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
   1393             // locate the beginning and end of the actual collation
   1394             // rules (there may be whitespace between the name and
   1395             // the first token in the description)
   1396             int lpEnd = description.indexOf(gSemiPercent, 2, lp);
   1397 
   1398             if (lpEnd == -1) {
   1399                 lpEnd = description.length() - 1;
   1400             }
   1401             int lpStart = lp + u_strlen(gLenientParse);
   1402             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
   1403                 ++lpStart;
   1404             }
   1405 
   1406             // copy out the lenient-parse rules and delete them
   1407             // from the description
   1408             lenientParseRules = new UnicodeString();
   1409             /* test for NULL */
   1410             if (lenientParseRules == 0) {
   1411                 status = U_MEMORY_ALLOCATION_ERROR;
   1412                 return;
   1413             }
   1414             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
   1415 
   1416             description.remove(lp, lpEnd + 1 - lp);
   1417         }
   1418     }
   1419 
   1420     // pre-flight parsing the description and count the number of
   1421     // rule sets (";%" marks the end of one rule set and the beginning
   1422     // of the next)
   1423     numRuleSets = 0;
   1424     for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
   1425         ++numRuleSets;
   1426         ++p;
   1427     }
   1428     ++numRuleSets;
   1429 
   1430     // our rule list is an array of the appropriate size
   1431     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
   1432     /* test for NULL */
   1433     if (ruleSets == 0) {
   1434         status = U_MEMORY_ALLOCATION_ERROR;
   1435         return;
   1436     }
   1437 
   1438     for (int i = 0; i <= numRuleSets; ++i) {
   1439         ruleSets[i] = NULL;
   1440     }
   1441 
   1442     // divide up the descriptions into individual rule-set descriptions
   1443     // and store them in a temporary array.  At each step, we also
   1444     // new up a rule set, but all this does is initialize its name
   1445     // and remove it from its description.  We can't actually parse
   1446     // the rest of the descriptions and finish initializing everything
   1447     // because we have to know the names and locations of all the rule
   1448     // sets before we can actually set everything up
   1449     if(!numRuleSets) {
   1450         status = U_ILLEGAL_ARGUMENT_ERROR;
   1451         return;
   1452     }
   1453 
   1454     ruleSetDescriptions = new UnicodeString[numRuleSets];
   1455     if (ruleSetDescriptions == 0) {
   1456         status = U_MEMORY_ALLOCATION_ERROR;
   1457         return;
   1458     }
   1459 
   1460     {
   1461         int curRuleSet = 0;
   1462         int32_t start = 0;
   1463         for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
   1464             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
   1465             ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
   1466             if (ruleSets[curRuleSet] == 0) {
   1467                 status = U_MEMORY_ALLOCATION_ERROR;
   1468                 return;
   1469             }
   1470             ++curRuleSet;
   1471             start = p + 1;
   1472         }
   1473         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
   1474         ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
   1475         if (ruleSets[curRuleSet] == 0) {
   1476             status = U_MEMORY_ALLOCATION_ERROR;
   1477             return;
   1478         }
   1479     }
   1480 
   1481     // now we can take note of the formatter's default rule set, which
   1482     // is the last public rule set in the description (it's the last
   1483     // rather than the first so that a user can create a new formatter
   1484     // from an existing formatter and change its default behavior just
   1485     // by appending more rule sets to the end)
   1486 
   1487     // {dlf} Initialization of a fraction rule set requires the default rule
   1488     // set to be known.  For purposes of initialization, this is always the
   1489     // last public rule set, no matter what the localization data says.
   1490     initDefaultRuleSet();
   1491 
   1492     // finally, we can go back through the temporary descriptions
   1493     // list and finish seting up the substructure (and we throw
   1494     // away the temporary descriptions as we go)
   1495     {
   1496         for (int i = 0; i < numRuleSets; i++) {
   1497             ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
   1498         }
   1499     }
   1500 
   1501     // Now that the rules are initialized, the 'real' default rule
   1502     // set can be adjusted by the localization data.
   1503 
   1504     // The C code keeps the localization array as is, rather than building
   1505     // a separate array of the public rule set names, so we have less work
   1506     // to do here-- but we still need to check the names.
   1507 
   1508     if (localizationInfos) {
   1509         // confirm the names, if any aren't in the rules, that's an error
   1510         // it is ok if the rules contain public rule sets that are not in this list
   1511         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
   1512             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
   1513             NFRuleSet* rs = findRuleSet(name, status);
   1514             if (rs == NULL) {
   1515                 break; // error
   1516             }
   1517             if (i == 0) {
   1518                 defaultRuleSet = rs;
   1519             }
   1520         }
   1521     } else {
   1522         defaultRuleSet = getDefaultRuleSet();
   1523     }
   1524     originalDescription = rules;
   1525 }
   1526 
   1527 // override the NumberFormat implementation in order to
   1528 // lazily initialize relevant items
   1529 void
   1530 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
   1531 {
   1532     NumberFormat::setContext(value, status);
   1533     if (U_SUCCESS(status)) {
   1534     	if (!capitalizationInfoSet &&
   1535     	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
   1536     	    initCapitalizationContextInfo(locale);
   1537     	    capitalizationInfoSet = TRUE;
   1538         }
   1539 #if !UCONFIG_NO_BREAK_ITERATION
   1540         if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1541                 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
   1542                 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
   1543             UErrorCode status = U_ZERO_ERROR;
   1544             capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
   1545             if (U_FAILURE(status)) {
   1546                 delete capitalizationBrkIter;
   1547                 capitalizationBrkIter = NULL;
   1548             }
   1549         }
   1550 #endif
   1551     }
   1552 }
   1553 
   1554 void
   1555 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
   1556 {
   1557 #if !UCONFIG_NO_BREAK_ITERATION
   1558     const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
   1559     UErrorCode status = U_ZERO_ERROR;
   1560     UResourceBundle *rb = ures_open(NULL, localeID, &status);
   1561     rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
   1562     rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
   1563     if (U_SUCCESS(status) && rb != NULL) {
   1564         int32_t len = 0;
   1565         const int32_t * intVector = ures_getIntVector(rb, &len, &status);
   1566         if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
   1567             capitalizationForUIListMenu = intVector[0];
   1568             capitalizationForStandAlone = intVector[1];
   1569         }
   1570     }
   1571     ures_close(rb);
   1572 #endif
   1573 }
   1574 
   1575 void
   1576 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
   1577 {
   1578     // iterate through the characters...
   1579     UnicodeString result;
   1580 
   1581     int start = 0;
   1582     while (start != -1 && start < description.length()) {
   1583         // seek to the first non-whitespace character...
   1584         while (start < description.length()
   1585             && PatternProps::isWhiteSpace(description.charAt(start))) {
   1586             ++start;
   1587         }
   1588 
   1589         // locate the next semicolon in the text and copy the text from
   1590         // our current position up to that semicolon into the result
   1591         int32_t p = description.indexOf(gSemiColon, start);
   1592         if (p == -1) {
   1593             // or if we don't find a semicolon, just copy the rest of
   1594             // the string into the result
   1595             result.append(description, start, description.length() - start);
   1596             start = -1;
   1597         }
   1598         else if (p < description.length()) {
   1599             result.append(description, start, p + 1 - start);
   1600             start = p + 1;
   1601         }
   1602 
   1603         // when we get here, we've seeked off the end of the sring, and
   1604         // we terminate the loop (we continue until *start* is -1 rather
   1605         // than until *p* is -1, because otherwise we'd miss the last
   1606         // rule in the description)
   1607         else {
   1608             start = -1;
   1609         }
   1610     }
   1611 
   1612     description.setTo(result);
   1613 }
   1614 
   1615 
   1616 void
   1617 RuleBasedNumberFormat::dispose()
   1618 {
   1619     if (ruleSets) {
   1620         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1621             delete *p;
   1622         }
   1623         uprv_free(ruleSets);
   1624         ruleSets = NULL;
   1625     }
   1626 
   1627     if (ruleSetDescriptions) {
   1628         delete [] ruleSetDescriptions;
   1629         ruleSetDescriptions = NULL;
   1630     }
   1631 
   1632 #if !UCONFIG_NO_COLLATION
   1633     delete collator;
   1634 #endif
   1635     collator = NULL;
   1636 
   1637     delete decimalFormatSymbols;
   1638     decimalFormatSymbols = NULL;
   1639 
   1640     delete defaultInfinityRule;
   1641     defaultInfinityRule = NULL;
   1642 
   1643     delete defaultNaNRule;
   1644     defaultNaNRule = NULL;
   1645 
   1646     delete lenientParseRules;
   1647     lenientParseRules = NULL;
   1648 
   1649 #if !UCONFIG_NO_BREAK_ITERATION
   1650     delete capitalizationBrkIter;
   1651     capitalizationBrkIter = NULL;
   1652 #endif
   1653 
   1654     if (localizations) {
   1655         localizations = localizations->unref();
   1656     }
   1657 }
   1658 
   1659 
   1660 //-----------------------------------------------------------------------
   1661 // package-internal API
   1662 //-----------------------------------------------------------------------
   1663 
   1664 /**
   1665  * Returns the collator to use for lenient parsing.  The collator is lazily created:
   1666  * this function creates it the first time it's called.
   1667  * @return The collator to use for lenient parsing, or null if lenient parsing
   1668  * is turned off.
   1669 */
   1670 const RuleBasedCollator*
   1671 RuleBasedNumberFormat::getCollator() const
   1672 {
   1673 #if !UCONFIG_NO_COLLATION
   1674     if (!ruleSets) {
   1675         return NULL;
   1676     }
   1677 
   1678     // lazy-evaluate the collator
   1679     if (collator == NULL && lenient) {
   1680         // create a default collator based on the formatter's locale,
   1681         // then pull out that collator's rules, append any additional
   1682         // rules specified in the description, and create a _new_
   1683         // collator based on the combinaiton of those rules
   1684 
   1685         UErrorCode status = U_ZERO_ERROR;
   1686 
   1687         Collator* temp = Collator::createInstance(locale, status);
   1688         RuleBasedCollator* newCollator;
   1689         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
   1690             if (lenientParseRules) {
   1691                 UnicodeString rules(newCollator->getRules());
   1692                 rules.append(*lenientParseRules);
   1693 
   1694                 newCollator = new RuleBasedCollator(rules, status);
   1695                 // Exit if newCollator could not be created.
   1696                 if (newCollator == NULL) {
   1697                     return NULL;
   1698                 }
   1699             } else {
   1700                 temp = NULL;
   1701             }
   1702             if (U_SUCCESS(status)) {
   1703                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
   1704                 // cast away const
   1705                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
   1706             } else {
   1707                 delete newCollator;
   1708             }
   1709         }
   1710         delete temp;
   1711     }
   1712 #endif
   1713 
   1714     // if lenient-parse mode is off, this will be null
   1715     // (see setLenientParseMode())
   1716     return collator;
   1717 }
   1718 
   1719 
   1720 DecimalFormatSymbols*
   1721 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
   1722 {
   1723     // lazy-evaluate the DecimalFormatSymbols object.  This object
   1724     // is shared by all DecimalFormat instances belonging to this
   1725     // formatter
   1726     if (decimalFormatSymbols == NULL) {
   1727         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
   1728         if (U_SUCCESS(status)) {
   1729             decimalFormatSymbols = temp;
   1730         }
   1731         else {
   1732             delete temp;
   1733         }
   1734     }
   1735     return decimalFormatSymbols;
   1736 }
   1737 
   1738 /**
   1739  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
   1740  * instances owned by this formatter.
   1741 */
   1742 const DecimalFormatSymbols*
   1743 RuleBasedNumberFormat::getDecimalFormatSymbols() const
   1744 {
   1745     return decimalFormatSymbols;
   1746 }
   1747 
   1748 NFRule*
   1749 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
   1750 {
   1751     if (U_FAILURE(status)) {
   1752         return NULL;
   1753     }
   1754     if (defaultInfinityRule == NULL) {
   1755         UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
   1756         rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
   1757         NFRule* temp = new NFRule(this, rule, status);
   1758         if (U_SUCCESS(status)) {
   1759             defaultInfinityRule = temp;
   1760         }
   1761         else {
   1762             delete temp;
   1763         }
   1764     }
   1765     return defaultInfinityRule;
   1766 }
   1767 
   1768 const NFRule*
   1769 RuleBasedNumberFormat::getDefaultInfinityRule() const
   1770 {
   1771     return defaultInfinityRule;
   1772 }
   1773 
   1774 NFRule*
   1775 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
   1776 {
   1777     if (U_FAILURE(status)) {
   1778         return NULL;
   1779     }
   1780     if (defaultNaNRule == NULL) {
   1781         UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
   1782         rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
   1783         NFRule* temp = new NFRule(this, rule, status);
   1784         if (U_SUCCESS(status)) {
   1785             defaultNaNRule = temp;
   1786         }
   1787         else {
   1788             delete temp;
   1789         }
   1790     }
   1791     return defaultNaNRule;
   1792 }
   1793 
   1794 const NFRule*
   1795 RuleBasedNumberFormat::getDefaultNaNRule() const
   1796 {
   1797     return defaultNaNRule;
   1798 }
   1799 
   1800 // De-owning the current localized symbols and adopt the new symbols.
   1801 void
   1802 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
   1803 {
   1804     if (symbolsToAdopt == NULL) {
   1805         return; // do not allow caller to set decimalFormatSymbols to NULL
   1806     }
   1807 
   1808     if (decimalFormatSymbols != NULL) {
   1809         delete decimalFormatSymbols;
   1810     }
   1811 
   1812     decimalFormatSymbols = symbolsToAdopt;
   1813 
   1814     {
   1815         // Apply the new decimalFormatSymbols by reparsing the rulesets
   1816         UErrorCode status = U_ZERO_ERROR;
   1817 
   1818         delete defaultInfinityRule;
   1819         defaultInfinityRule = NULL;
   1820         initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
   1821 
   1822         delete defaultNaNRule;
   1823         defaultNaNRule = NULL;
   1824         initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
   1825 
   1826         if (ruleSets) {
   1827             for (int32_t i = 0; i < numRuleSets; i++) {
   1828                 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
   1829             }
   1830         }
   1831     }
   1832 }
   1833 
   1834 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
   1835 void
   1836 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
   1837 {
   1838     adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
   1839 }
   1840 
   1841 PluralFormat *
   1842 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
   1843                                           const UnicodeString &pattern,
   1844                                           UErrorCode& status) const
   1845 {
   1846     return new PluralFormat(locale, pluralType, pattern, status);
   1847 }
   1848 
   1849 U_NAMESPACE_END
   1850 
   1851 /* U_HAVE_RBNF */
   1852 #endif
   1853