Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1997-2015, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 #include "utypeinfo.h"  // for 'typeid' to work
     12 
     13 #include "unicode/rbnf.h"
     14 
     15 #if U_HAVE_RBNF
     16 
     17 #include "unicode/normlzr.h"
     18 #include "unicode/plurfmt.h"
     19 #include "unicode/tblcoll.h"
     20 #include "unicode/uchar.h"
     21 #include "unicode/ucol.h"
     22 #include "unicode/uloc.h"
     23 #include "unicode/unum.h"
     24 #include "unicode/ures.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/utf16.h"
     27 #include "unicode/udata.h"
     28 #include "unicode/udisplaycontext.h"
     29 #include "unicode/brkiter.h"
     30 #include "nfrs.h"
     31 
     32 #include "cmemory.h"
     33 #include "cstring.h"
     34 #include "patternprops.h"
     35 #include "uresimp.h"
     36 
     37 // debugging
     38 // #define RBNF_DEBUG
     39 
     40 #ifdef RBNF_DEBUG
     41 #include <stdio.h>
     42 #endif
     43 
     44 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
     45 
     46 static const UChar gPercentPercent[] =
     47 {
     48     0x25, 0x25, 0
     49 }; /* "%%" */
     50 
     51 // All urbnf objects are created through openRules, so we init all of the
     52 // Unicode string constants required by rbnf, nfrs, or nfr here.
     53 static const UChar gLenientParse[] =
     54 {
     55     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
     56 }; /* "%%lenient-parse:" */
     57 static const UChar gSemiColon = 0x003B;
     58 static const UChar gSemiPercent[] =
     59 {
     60     0x3B, 0x25, 0
     61 }; /* ";%" */
     62 
     63 #define kSomeNumberOfBitsDiv2 22
     64 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
     65 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
     66 
     67 U_NAMESPACE_BEGIN
     68 
     69 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
     70 
     71 /*
     72 This is a utility class. It does not use ICU's RTTI.
     73 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
     74 Please make sure that intltest passes on Windows in Release mode,
     75 since the string pooling per compilation unit will mess up how RTTI works.
     76 The RTTI code was also removed due to lack of code coverage.
     77 */
     78 class LocalizationInfo : public UMemory {
     79 protected:
     80     virtual ~LocalizationInfo();
     81     uint32_t refcount;
     82 
     83 public:
     84     LocalizationInfo() : refcount(0) {}
     85 
     86     LocalizationInfo* ref(void) {
     87         ++refcount;
     88         return this;
     89     }
     90 
     91     LocalizationInfo* unref(void) {
     92         if (refcount && --refcount == 0) {
     93             delete this;
     94         }
     95         return NULL;
     96     }
     97 
     98     virtual UBool operator==(const LocalizationInfo* rhs) const;
     99     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
    100 
    101     virtual int32_t getNumberOfRuleSets(void) const = 0;
    102     virtual const UChar* getRuleSetName(int32_t index) const = 0;
    103     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
    104     virtual const UChar* getLocaleName(int32_t index) const = 0;
    105     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
    106 
    107     virtual int32_t indexForLocale(const UChar* locale) const;
    108     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
    109 
    110 //    virtual UClassID getDynamicClassID() const = 0;
    111 //    static UClassID getStaticClassID(void);
    112 };
    113 
    114 LocalizationInfo::~LocalizationInfo() {}
    115 
    116 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
    117 
    118 // if both strings are NULL, this returns TRUE
    119 static UBool
    120 streq(const UChar* lhs, const UChar* rhs) {
    121     if (rhs == lhs) {
    122         return TRUE;
    123     }
    124     if (lhs && rhs) {
    125         return u_strcmp(lhs, rhs) == 0;
    126     }
    127     return FALSE;
    128 }
    129 
    130 UBool
    131 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
    132     if (rhs) {
    133         if (this == rhs) {
    134             return TRUE;
    135         }
    136 
    137         int32_t rsc = getNumberOfRuleSets();
    138         if (rsc == rhs->getNumberOfRuleSets()) {
    139             for (int i = 0; i < rsc; ++i) {
    140                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
    141                     return FALSE;
    142                 }
    143             }
    144             int32_t dlc = getNumberOfDisplayLocales();
    145             if (dlc == rhs->getNumberOfDisplayLocales()) {
    146                 for (int i = 0; i < dlc; ++i) {
    147                     const UChar* locale = getLocaleName(i);
    148                     int32_t ix = rhs->indexForLocale(locale);
    149                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
    150                     if (!streq(locale, rhs->getLocaleName(ix))) {
    151                         return FALSE;
    152                     }
    153                     for (int j = 0; j < rsc; ++j) {
    154                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
    155                             return FALSE;
    156                         }
    157                     }
    158                 }
    159                 return TRUE;
    160             }
    161         }
    162     }
    163     return FALSE;
    164 }
    165 
    166 int32_t
    167 LocalizationInfo::indexForLocale(const UChar* locale) const {
    168     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
    169         if (streq(locale, getLocaleName(i))) {
    170             return i;
    171         }
    172     }
    173     return -1;
    174 }
    175 
    176 int32_t
    177 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
    178     if (ruleset) {
    179         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
    180             if (streq(ruleset, getRuleSetName(i))) {
    181                 return i;
    182             }
    183         }
    184     }
    185     return -1;
    186 }
    187 
    188 
    189 typedef void (*Fn_Deleter)(void*);
    190 
    191 class VArray {
    192     void** buf;
    193     int32_t cap;
    194     int32_t size;
    195     Fn_Deleter deleter;
    196 public:
    197     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
    198 
    199     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
    200 
    201     ~VArray() {
    202         if (deleter) {
    203             for (int i = 0; i < size; ++i) {
    204                 (*deleter)(buf[i]);
    205             }
    206         }
    207         uprv_free(buf);
    208     }
    209 
    210     int32_t length() {
    211         return size;
    212     }
    213 
    214     void add(void* elem, UErrorCode& status) {
    215         if (U_SUCCESS(status)) {
    216             if (size == cap) {
    217                 if (cap == 0) {
    218                     cap = 1;
    219                 } else if (cap < 256) {
    220                     cap *= 2;
    221                 } else {
    222                     cap += 256;
    223                 }
    224                 if (buf == NULL) {
    225                     buf = (void**)uprv_malloc(cap * sizeof(void*));
    226                 } else {
    227                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
    228                 }
    229                 if (buf == NULL) {
    230                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
    231                     status = U_MEMORY_ALLOCATION_ERROR;
    232                     return;
    233                 }
    234                 void* start = &buf[size];
    235                 size_t count = (cap - size) * sizeof(void*);
    236                 uprv_memset(start, 0, count); // fill with nulls, just because
    237             }
    238             buf[size++] = elem;
    239         }
    240     }
    241 
    242     void** release(void) {
    243         void** result = buf;
    244         buf = NULL;
    245         cap = 0;
    246         size = 0;
    247         return result;
    248     }
    249 };
    250 
    251 class LocDataParser;
    252 
    253 class StringLocalizationInfo : public LocalizationInfo {
    254     UChar* info;
    255     UChar*** data;
    256     int32_t numRuleSets;
    257     int32_t numLocales;
    258 
    259 friend class LocDataParser;
    260 
    261     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
    262         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
    263     {
    264     }
    265 
    266 public:
    267     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
    268 
    269     virtual ~StringLocalizationInfo();
    270     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
    271     virtual const UChar* getRuleSetName(int32_t index) const;
    272     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
    273     virtual const UChar* getLocaleName(int32_t index) const;
    274     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
    275 
    276 //    virtual UClassID getDynamicClassID() const;
    277 //    static UClassID getStaticClassID(void);
    278 
    279 private:
    280     void init(UErrorCode& status) const;
    281 };
    282 
    283 
    284 enum {
    285     OPEN_ANGLE = 0x003c, /* '<' */
    286     CLOSE_ANGLE = 0x003e, /* '>' */
    287     COMMA = 0x002c,
    288     TICK = 0x0027,
    289     QUOTE = 0x0022,
    290     SPACE = 0x0020
    291 };
    292 
    293 /**
    294  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
    295  */
    296 class LocDataParser {
    297     UChar* data;
    298     const UChar* e;
    299     UChar* p;
    300     UChar ch;
    301     UParseError& pe;
    302     UErrorCode& ec;
    303 
    304 public:
    305     LocDataParser(UParseError& parseError, UErrorCode& status)
    306         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
    307     ~LocDataParser() {}
    308 
    309     /*
    310     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
    311     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
    312     */
    313     StringLocalizationInfo* parse(UChar* data, int32_t len);
    314 
    315 private:
    316 
    317     void inc(void) { ++p; ch = 0xffff; }
    318     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
    319     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
    320     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
    321     UBool inList(UChar c, const UChar* list) const {
    322         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
    323         while (*list && *list != c) ++list; return *list == c;
    324     }
    325     void parseError(const char* msg);
    326 
    327     StringLocalizationInfo* doParse(void);
    328 
    329     UChar** nextArray(int32_t& requiredLength);
    330     UChar*  nextString(void);
    331 };
    332 
    333 #ifdef RBNF_DEBUG
    334 #define ERROR(msg) parseError(msg); return NULL;
    335 #define EXPLANATION_ARG explanationArg
    336 #else
    337 #define ERROR(msg) parseError(NULL); return NULL;
    338 #define EXPLANATION_ARG
    339 #endif
    340 
    341 
    342 static const UChar DQUOTE_STOPLIST[] = {
    343     QUOTE, 0
    344 };
    345 
    346 static const UChar SQUOTE_STOPLIST[] = {
    347     TICK, 0
    348 };
    349 
    350 static const UChar NOQUOTE_STOPLIST[] = {
    351     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
    352 };
    353 
    354 static void
    355 DeleteFn(void* p) {
    356   uprv_free(p);
    357 }
    358 
    359 StringLocalizationInfo*
    360 LocDataParser::parse(UChar* _data, int32_t len) {
    361     if (U_FAILURE(ec)) {
    362         if (_data) uprv_free(_data);
    363         return NULL;
    364     }
    365 
    366     pe.line = 0;
    367     pe.offset = -1;
    368     pe.postContext[0] = 0;
    369     pe.preContext[0] = 0;
    370 
    371     if (_data == NULL) {
    372         ec = U_ILLEGAL_ARGUMENT_ERROR;
    373         return NULL;
    374     }
    375 
    376     if (len <= 0) {
    377         ec = U_ILLEGAL_ARGUMENT_ERROR;
    378         uprv_free(_data);
    379         return NULL;
    380     }
    381 
    382     data = _data;
    383     e = data + len;
    384     p = _data;
    385     ch = 0xffff;
    386 
    387     return doParse();
    388 }
    389 
    390 
    391 StringLocalizationInfo*
    392 LocDataParser::doParse(void) {
    393     skipWhitespace();
    394     if (!checkInc(OPEN_ANGLE)) {
    395         ERROR("Missing open angle");
    396     } else {
    397         VArray array(DeleteFn);
    398         UBool mightHaveNext = TRUE;
    399         int32_t requiredLength = -1;
    400         while (mightHaveNext) {
    401             mightHaveNext = FALSE;
    402             UChar** elem = nextArray(requiredLength);
    403             skipWhitespace();
    404             UBool haveComma = check(COMMA);
    405             if (elem) {
    406                 array.add(elem, ec);
    407                 if (haveComma) {
    408                     inc();
    409                     mightHaveNext = TRUE;
    410                 }
    411             } else if (haveComma) {
    412                 ERROR("Unexpected character");
    413             }
    414         }
    415 
    416         skipWhitespace();
    417         if (!checkInc(CLOSE_ANGLE)) {
    418             if (check(OPEN_ANGLE)) {
    419                 ERROR("Missing comma in outer array");
    420             } else {
    421                 ERROR("Missing close angle bracket in outer array");
    422             }
    423         }
    424 
    425         skipWhitespace();
    426         if (p != e) {
    427             ERROR("Extra text after close of localization data");
    428         }
    429 
    430         array.add(NULL, ec);
    431         if (U_SUCCESS(ec)) {
    432             int32_t numLocs = array.length() - 2; // subtract first, NULL
    433             UChar*** result = (UChar***)array.release();
    434 
    435             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
    436         }
    437     }
    438 
    439     ERROR("Unknown error");
    440 }
    441 
    442 UChar**
    443 LocDataParser::nextArray(int32_t& requiredLength) {
    444     if (U_FAILURE(ec)) {
    445         return NULL;
    446     }
    447 
    448     skipWhitespace();
    449     if (!checkInc(OPEN_ANGLE)) {
    450         ERROR("Missing open angle");
    451     }
    452 
    453     VArray array;
    454     UBool mightHaveNext = TRUE;
    455     while (mightHaveNext) {
    456         mightHaveNext = FALSE;
    457         UChar* elem = nextString();
    458         skipWhitespace();
    459         UBool haveComma = check(COMMA);
    460         if (elem) {
    461             array.add(elem, ec);
    462             if (haveComma) {
    463                 inc();
    464                 mightHaveNext = TRUE;
    465             }
    466         } else if (haveComma) {
    467             ERROR("Unexpected comma");
    468         }
    469     }
    470     skipWhitespace();
    471     if (!checkInc(CLOSE_ANGLE)) {
    472         if (check(OPEN_ANGLE)) {
    473             ERROR("Missing close angle bracket in inner array");
    474         } else {
    475             ERROR("Missing comma in inner array");
    476         }
    477     }
    478 
    479     array.add(NULL, ec);
    480     if (U_SUCCESS(ec)) {
    481         if (requiredLength == -1) {
    482             requiredLength = array.length() + 1;
    483         } else if (array.length() != requiredLength) {
    484             ec = U_ILLEGAL_ARGUMENT_ERROR;
    485             ERROR("Array not of required length");
    486         }
    487 
    488         return (UChar**)array.release();
    489     }
    490     ERROR("Unknown Error");
    491 }
    492 
    493 UChar*
    494 LocDataParser::nextString() {
    495     UChar* result = NULL;
    496 
    497     skipWhitespace();
    498     if (p < e) {
    499         const UChar* terminators;
    500         UChar c = *p;
    501         UBool haveQuote = c == QUOTE || c == TICK;
    502         if (haveQuote) {
    503             inc();
    504             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
    505         } else {
    506             terminators = NOQUOTE_STOPLIST;
    507         }
    508         UChar* start = p;
    509         while (p < e && !inList(*p, terminators)) ++p;
    510         if (p == e) {
    511             ERROR("Unexpected end of data");
    512         }
    513 
    514         UChar x = *p;
    515         if (p > start) {
    516             ch = x;
    517             *p = 0x0; // terminate by writing to data
    518             result = start; // just point into data
    519         }
    520         if (haveQuote) {
    521             if (x != c) {
    522                 ERROR("Missing matching quote");
    523             } else if (p == start) {
    524                 ERROR("Empty string");
    525             }
    526             inc();
    527         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
    528             ERROR("Unexpected character in string");
    529         }
    530     }
    531 
    532     // ok for there to be no next string
    533     return result;
    534 }
    535 
    536 void LocDataParser::parseError(const char* EXPLANATION_ARG)
    537 {
    538     if (!data) {
    539         return;
    540     }
    541 
    542     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
    543     if (start < data) {
    544         start = data;
    545     }
    546     for (UChar* x = p; --x >= start;) {
    547         if (!*x) {
    548             start = x+1;
    549             break;
    550         }
    551     }
    552     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
    553     if (limit > e) {
    554         limit = e;
    555     }
    556     u_strncpy(pe.preContext, start, (int32_t)(p-start));
    557     pe.preContext[p-start] = 0;
    558     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
    559     pe.postContext[limit-p] = 0;
    560     pe.offset = (int32_t)(p - data);
    561 
    562 #ifdef RBNF_DEBUG
    563     fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
    564 
    565     UnicodeString msg;
    566     msg.append(start, p - start);
    567     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
    568     msg.append(p, limit-p);
    569     msg.append(UNICODE_STRING_SIMPLE("'"));
    570 
    571     char buf[128];
    572     int32_t len = msg.extract(0, msg.length(), buf, 128);
    573     if (len >= 128) {
    574         buf[127] = 0;
    575     } else {
    576         buf[len] = 0;
    577     }
    578     fprintf(stderr, "%s\n", buf);
    579     fflush(stderr);
    580 #endif
    581 
    582     uprv_free(data);
    583     data = NULL;
    584     p = NULL;
    585     e = NULL;
    586 
    587     if (U_SUCCESS(ec)) {
    588         ec = U_PARSE_ERROR;
    589     }
    590 }
    591 
    592 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
    593 
    594 StringLocalizationInfo*
    595 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
    596     if (U_FAILURE(status)) {
    597         return NULL;
    598     }
    599 
    600     int32_t len = info.length();
    601     if (len == 0) {
    602         return NULL; // no error;
    603     }
    604 
    605     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
    606     if (!p) {
    607         status = U_MEMORY_ALLOCATION_ERROR;
    608         return NULL;
    609     }
    610     info.extract(p, len, status);
    611     if (!U_FAILURE(status)) {
    612         status = U_ZERO_ERROR; // clear warning about non-termination
    613     }
    614 
    615     LocDataParser parser(perror, status);
    616     return parser.parse(p, len);
    617 }
    618 
    619 StringLocalizationInfo::~StringLocalizationInfo() {
    620     for (UChar*** p = (UChar***)data; *p; ++p) {
    621         // remaining data is simply pointer into our unicode string data.
    622         if (*p) uprv_free(*p);
    623     }
    624     if (data) uprv_free(data);
    625     if (info) uprv_free(info);
    626 }
    627 
    628 
    629 const UChar*
    630 StringLocalizationInfo::getRuleSetName(int32_t index) const {
    631     if (index >= 0 && index < getNumberOfRuleSets()) {
    632         return data[0][index];
    633     }
    634     return NULL;
    635 }
    636 
    637 const UChar*
    638 StringLocalizationInfo::getLocaleName(int32_t index) const {
    639     if (index >= 0 && index < getNumberOfDisplayLocales()) {
    640         return data[index+1][0];
    641     }
    642     return NULL;
    643 }
    644 
    645 const UChar*
    646 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
    647     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
    648         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
    649         return data[localeIndex+1][ruleIndex+1];
    650     }
    651     return NULL;
    652 }
    653 
    654 // ----------
    655 
    656 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    657                                              const UnicodeString& locs,
    658                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    659   : ruleSets(NULL)
    660   , ruleSetDescriptions(NULL)
    661   , numRuleSets(0)
    662   , defaultRuleSet(NULL)
    663   , locale(alocale)
    664   , collator(NULL)
    665   , decimalFormatSymbols(NULL)
    666   , defaultInfinityRule(NULL)
    667   , defaultNaNRule(NULL)
    668   , lenient(FALSE)
    669   , lenientParseRules(NULL)
    670   , localizations(NULL)
    671   , capitalizationInfoSet(FALSE)
    672   , capitalizationForUIListMenu(FALSE)
    673   , capitalizationForStandAlone(FALSE)
    674   , capitalizationBrkIter(NULL)
    675 {
    676   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    677   init(description, locinfo, perror, status);
    678 }
    679 
    680 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    681                                              const UnicodeString& locs,
    682                                              UParseError& perror, UErrorCode& status)
    683   : ruleSets(NULL)
    684   , ruleSetDescriptions(NULL)
    685   , numRuleSets(0)
    686   , defaultRuleSet(NULL)
    687   , locale(Locale::getDefault())
    688   , collator(NULL)
    689   , decimalFormatSymbols(NULL)
    690   , defaultInfinityRule(NULL)
    691   , defaultNaNRule(NULL)
    692   , lenient(FALSE)
    693   , lenientParseRules(NULL)
    694   , localizations(NULL)
    695   , capitalizationInfoSet(FALSE)
    696   , capitalizationForUIListMenu(FALSE)
    697   , capitalizationForStandAlone(FALSE)
    698   , capitalizationBrkIter(NULL)
    699 {
    700   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    701   init(description, locinfo, perror, status);
    702 }
    703 
    704 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    705                                              LocalizationInfo* info,
    706                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    707   : ruleSets(NULL)
    708   , ruleSetDescriptions(NULL)
    709   , numRuleSets(0)
    710   , defaultRuleSet(NULL)
    711   , locale(alocale)
    712   , collator(NULL)
    713   , decimalFormatSymbols(NULL)
    714   , defaultInfinityRule(NULL)
    715   , defaultNaNRule(NULL)
    716   , lenient(FALSE)
    717   , lenientParseRules(NULL)
    718   , localizations(NULL)
    719   , capitalizationInfoSet(FALSE)
    720   , capitalizationForUIListMenu(FALSE)
    721   , capitalizationForStandAlone(FALSE)
    722   , capitalizationBrkIter(NULL)
    723 {
    724   init(description, info, perror, status);
    725 }
    726 
    727 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    728                          UParseError& perror,
    729                          UErrorCode& status)
    730   : ruleSets(NULL)
    731   , ruleSetDescriptions(NULL)
    732   , numRuleSets(0)
    733   , defaultRuleSet(NULL)
    734   , locale(Locale::getDefault())
    735   , collator(NULL)
    736   , decimalFormatSymbols(NULL)
    737   , defaultInfinityRule(NULL)
    738   , defaultNaNRule(NULL)
    739   , lenient(FALSE)
    740   , lenientParseRules(NULL)
    741   , localizations(NULL)
    742   , capitalizationInfoSet(FALSE)
    743   , capitalizationForUIListMenu(FALSE)
    744   , capitalizationForStandAlone(FALSE)
    745   , capitalizationBrkIter(NULL)
    746 {
    747     init(description, NULL, perror, status);
    748 }
    749 
    750 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    751                          const Locale& aLocale,
    752                          UParseError& perror,
    753                          UErrorCode& status)
    754   : ruleSets(NULL)
    755   , ruleSetDescriptions(NULL)
    756   , numRuleSets(0)
    757   , defaultRuleSet(NULL)
    758   , locale(aLocale)
    759   , collator(NULL)
    760   , decimalFormatSymbols(NULL)
    761   , defaultInfinityRule(NULL)
    762   , defaultNaNRule(NULL)
    763   , lenient(FALSE)
    764   , lenientParseRules(NULL)
    765   , localizations(NULL)
    766   , capitalizationInfoSet(FALSE)
    767   , capitalizationForUIListMenu(FALSE)
    768   , capitalizationForStandAlone(FALSE)
    769   , capitalizationBrkIter(NULL)
    770 {
    771     init(description, NULL, perror, status);
    772 }
    773 
    774 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
    775   : ruleSets(NULL)
    776   , ruleSetDescriptions(NULL)
    777   , numRuleSets(0)
    778   , defaultRuleSet(NULL)
    779   , locale(alocale)
    780   , collator(NULL)
    781   , decimalFormatSymbols(NULL)
    782   , defaultInfinityRule(NULL)
    783   , defaultNaNRule(NULL)
    784   , lenient(FALSE)
    785   , lenientParseRules(NULL)
    786   , localizations(NULL)
    787   , capitalizationInfoSet(FALSE)
    788   , capitalizationForUIListMenu(FALSE)
    789   , capitalizationForStandAlone(FALSE)
    790   , capitalizationBrkIter(NULL)
    791 {
    792     if (U_FAILURE(status)) {
    793         return;
    794     }
    795 
    796     const char* rules_tag = "RBNFRules";
    797     const char* fmt_tag = "";
    798     switch (tag) {
    799     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
    800     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
    801     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
    802     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
    803     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
    804     }
    805 
    806     // TODO: read localization info from resource
    807     LocalizationInfo* locinfo = NULL;
    808 
    809     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
    810     if (U_SUCCESS(status)) {
    811         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
    812                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
    813 
    814         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
    815         if (U_FAILURE(status)) {
    816             ures_close(nfrb);
    817         }
    818         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
    819         if (U_FAILURE(status)) {
    820             ures_close(rbnfRules);
    821             ures_close(nfrb);
    822             return;
    823         }
    824 
    825         UnicodeString desc;
    826         while (ures_hasNext(ruleSets)) {
    827            desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
    828         }
    829         UParseError perror;
    830 
    831         init(desc, locinfo, perror, status);
    832 
    833         ures_close(ruleSets);
    834         ures_close(rbnfRules);
    835     }
    836     ures_close(nfrb);
    837 }
    838 
    839 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
    840   : NumberFormat(rhs)
    841   , ruleSets(NULL)
    842   , ruleSetDescriptions(NULL)
    843   , numRuleSets(0)
    844   , defaultRuleSet(NULL)
    845   , locale(rhs.locale)
    846   , collator(NULL)
    847   , decimalFormatSymbols(NULL)
    848   , defaultInfinityRule(NULL)
    849   , defaultNaNRule(NULL)
    850   , lenient(FALSE)
    851   , lenientParseRules(NULL)
    852   , localizations(NULL)
    853   , capitalizationInfoSet(FALSE)
    854   , capitalizationForUIListMenu(FALSE)
    855   , capitalizationForStandAlone(FALSE)
    856   , capitalizationBrkIter(NULL)
    857 {
    858     this->operator=(rhs);
    859 }
    860 
    861 // --------
    862 
    863 RuleBasedNumberFormat&
    864 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
    865 {
    866     if (this == &rhs) {
    867         return *this;
    868     }
    869     NumberFormat::operator=(rhs);
    870     UErrorCode status = U_ZERO_ERROR;
    871     dispose();
    872     locale = rhs.locale;
    873     lenient = rhs.lenient;
    874 
    875     UParseError perror;
    876     setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
    877     init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
    878     setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
    879 
    880     capitalizationInfoSet = rhs.capitalizationInfoSet;
    881     capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
    882     capitalizationForStandAlone = rhs.capitalizationForStandAlone;
    883 #if !UCONFIG_NO_BREAK_ITERATION
    884     capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
    885 #endif
    886 
    887     return *this;
    888 }
    889 
    890 RuleBasedNumberFormat::~RuleBasedNumberFormat()
    891 {
    892     dispose();
    893 }
    894 
    895 Format*
    896 RuleBasedNumberFormat::clone(void) const
    897 {
    898     return new RuleBasedNumberFormat(*this);
    899 }
    900 
    901 UBool
    902 RuleBasedNumberFormat::operator==(const Format& other) const
    903 {
    904     if (this == &other) {
    905         return TRUE;
    906     }
    907 
    908     if (typeid(*this) == typeid(other)) {
    909         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
    910         // test for capitalization info equality is adequately handled
    911         // by the NumberFormat test for fCapitalizationContext equality;
    912         // the info here is just derived from that.
    913         if (locale == rhs.locale &&
    914             lenient == rhs.lenient &&
    915             (localizations == NULL
    916                 ? rhs.localizations == NULL
    917                 : (rhs.localizations == NULL
    918                     ? FALSE
    919                     : *localizations == rhs.localizations))) {
    920 
    921             NFRuleSet** p = ruleSets;
    922             NFRuleSet** q = rhs.ruleSets;
    923             if (p == NULL) {
    924                 return q == NULL;
    925             } else if (q == NULL) {
    926                 return FALSE;
    927             }
    928             while (*p && *q && (**p == **q)) {
    929                 ++p;
    930                 ++q;
    931             }
    932             return *q == NULL && *p == NULL;
    933         }
    934     }
    935 
    936     return FALSE;
    937 }
    938 
    939 UnicodeString
    940 RuleBasedNumberFormat::getRules() const
    941 {
    942     UnicodeString result;
    943     if (ruleSets != NULL) {
    944         for (NFRuleSet** p = ruleSets; *p; ++p) {
    945             (*p)->appendRules(result);
    946         }
    947     }
    948     return result;
    949 }
    950 
    951 UnicodeString
    952 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
    953 {
    954     if (localizations) {
    955         UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
    956         return string;
    957     }
    958     else if (ruleSets) {
    959         UnicodeString result;
    960         for (NFRuleSet** p = ruleSets; *p; ++p) {
    961             NFRuleSet* rs = *p;
    962             if (rs->isPublic()) {
    963                 if (--index == -1) {
    964                     rs->getName(result);
    965                     return result;
    966                 }
    967             }
    968         }
    969     }
    970     UnicodeString empty;
    971     return empty;
    972 }
    973 
    974 int32_t
    975 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
    976 {
    977     int32_t result = 0;
    978     if (localizations) {
    979         result = localizations->getNumberOfRuleSets();
    980     }
    981     else if (ruleSets) {
    982         for (NFRuleSet** p = ruleSets; *p; ++p) {
    983             if ((**p).isPublic()) {
    984                 ++result;
    985             }
    986         }
    987     }
    988     return result;
    989 }
    990 
    991 int32_t
    992 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
    993     if (localizations) {
    994         return localizations->getNumberOfDisplayLocales();
    995     }
    996     return 0;
    997 }
    998 
    999 Locale
   1000 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
   1001     if (U_FAILURE(status)) {
   1002         return Locale("");
   1003     }
   1004     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
   1005         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
   1006         char buffer[64];
   1007         int32_t cap = name.length() + 1;
   1008         char* bp = buffer;
   1009         if (cap > 64) {
   1010             bp = (char *)uprv_malloc(cap);
   1011             if (bp == NULL) {
   1012                 status = U_MEMORY_ALLOCATION_ERROR;
   1013                 return Locale("");
   1014             }
   1015         }
   1016         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
   1017         Locale retLocale(bp);
   1018         if (bp != buffer) {
   1019             uprv_free(bp);
   1020         }
   1021         return retLocale;
   1022     }
   1023     status = U_ILLEGAL_ARGUMENT_ERROR;
   1024     Locale retLocale;
   1025     return retLocale;
   1026 }
   1027 
   1028 UnicodeString
   1029 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
   1030     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
   1031         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
   1032         int32_t len = localeName.length();
   1033         UChar* localeStr = localeName.getBuffer(len + 1);
   1034         while (len >= 0) {
   1035             localeStr[len] = 0;
   1036             int32_t ix = localizations->indexForLocale(localeStr);
   1037             if (ix >= 0) {
   1038                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
   1039                 return name;
   1040             }
   1041 
   1042             // trim trailing portion, skipping over ommitted sections
   1043             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
   1044             while (len > 0 && localeStr[len-1] == 0x005F) --len;
   1045         }
   1046         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
   1047         return name;
   1048     }
   1049     UnicodeString bogus;
   1050     bogus.setToBogus();
   1051     return bogus;
   1052 }
   1053 
   1054 UnicodeString
   1055 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
   1056     if (localizations) {
   1057         UnicodeString rsn(ruleSetName);
   1058         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
   1059         return getRuleSetDisplayName(ix, localeParam);
   1060     }
   1061     UnicodeString bogus;
   1062     bogus.setToBogus();
   1063     return bogus;
   1064 }
   1065 
   1066 NFRuleSet*
   1067 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
   1068 {
   1069     if (U_SUCCESS(status) && ruleSets) {
   1070         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1071             NFRuleSet* rs = *p;
   1072             if (rs->isNamed(name)) {
   1073                 return rs;
   1074             }
   1075         }
   1076         status = U_ILLEGAL_ARGUMENT_ERROR;
   1077     }
   1078     return NULL;
   1079 }
   1080 
   1081 UnicodeString&
   1082 RuleBasedNumberFormat::format(int32_t number,
   1083                               UnicodeString& toAppendTo,
   1084                               FieldPosition& /* pos */) const
   1085 {
   1086     if (defaultRuleSet) {
   1087         UErrorCode status = U_ZERO_ERROR;
   1088         int32_t startPos = toAppendTo.length();
   1089         defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
   1090         adjustForCapitalizationContext(startPos, toAppendTo);
   1091     }
   1092     return toAppendTo;
   1093 }
   1094 
   1095 
   1096 UnicodeString&
   1097 RuleBasedNumberFormat::format(int64_t number,
   1098                               UnicodeString& toAppendTo,
   1099                               FieldPosition& /* pos */) const
   1100 {
   1101     if (defaultRuleSet) {
   1102         UErrorCode status = U_ZERO_ERROR;
   1103         int32_t startPos = toAppendTo.length();
   1104         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1105         adjustForCapitalizationContext(startPos, toAppendTo);
   1106     }
   1107     return toAppendTo;
   1108 }
   1109 
   1110 
   1111 UnicodeString&
   1112 RuleBasedNumberFormat::format(double number,
   1113                               UnicodeString& toAppendTo,
   1114                               FieldPosition& /* pos */) const
   1115 {
   1116     int32_t startPos = toAppendTo.length();
   1117     if (defaultRuleSet) {
   1118         UErrorCode status = U_ZERO_ERROR;
   1119         defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1120     }
   1121     return adjustForCapitalizationContext(startPos, toAppendTo);
   1122 }
   1123 
   1124 
   1125 UnicodeString&
   1126 RuleBasedNumberFormat::format(int32_t number,
   1127                               const UnicodeString& ruleSetName,
   1128                               UnicodeString& toAppendTo,
   1129                               FieldPosition& /* pos */,
   1130                               UErrorCode& status) const
   1131 {
   1132     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
   1133     if (U_SUCCESS(status)) {
   1134         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1135             // throw new IllegalArgumentException("Can't use internal rule set");
   1136             status = U_ILLEGAL_ARGUMENT_ERROR;
   1137         } else {
   1138             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1139             if (rs) {
   1140                 int32_t startPos = toAppendTo.length();
   1141                 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
   1142                 adjustForCapitalizationContext(startPos, toAppendTo);
   1143             }
   1144         }
   1145     }
   1146     return toAppendTo;
   1147 }
   1148 
   1149 
   1150 UnicodeString&
   1151 RuleBasedNumberFormat::format(int64_t number,
   1152                               const UnicodeString& ruleSetName,
   1153                               UnicodeString& toAppendTo,
   1154                               FieldPosition& /* pos */,
   1155                               UErrorCode& status) const
   1156 {
   1157     if (U_SUCCESS(status)) {
   1158         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1159             // throw new IllegalArgumentException("Can't use internal rule set");
   1160             status = U_ILLEGAL_ARGUMENT_ERROR;
   1161         } else {
   1162             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1163             if (rs) {
   1164                 int32_t startPos = toAppendTo.length();
   1165                 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1166                 adjustForCapitalizationContext(startPos, toAppendTo);
   1167             }
   1168         }
   1169     }
   1170     return toAppendTo;
   1171 }
   1172 
   1173 
   1174 UnicodeString&
   1175 RuleBasedNumberFormat::format(double number,
   1176                               const UnicodeString& ruleSetName,
   1177                               UnicodeString& toAppendTo,
   1178                               FieldPosition& /* pos */,
   1179                               UErrorCode& status) const
   1180 {
   1181     if (U_SUCCESS(status)) {
   1182         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
   1183             // throw new IllegalArgumentException("Can't use internal rule set");
   1184             status = U_ILLEGAL_ARGUMENT_ERROR;
   1185         } else {
   1186             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1187             if (rs) {
   1188                 int32_t startPos = toAppendTo.length();
   1189                 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
   1190                 adjustForCapitalizationContext(startPos, toAppendTo);
   1191             }
   1192         }
   1193     }
   1194     return toAppendTo;
   1195 }
   1196 
   1197 UnicodeString&
   1198 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
   1199                                                       UnicodeString& currentResult) const
   1200 {
   1201 #if !UCONFIG_NO_BREAK_ITERATION
   1202     if (startPos==0 && currentResult.length() > 0) {
   1203         // capitalize currentResult according to context
   1204         UChar32 ch = currentResult.char32At(0);
   1205         UErrorCode status = U_ZERO_ERROR;
   1206         UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
   1207         if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
   1208               ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1209                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
   1210                 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
   1211             // titlecase first word of currentResult, here use sentence iterator unlike current implementations
   1212             // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
   1213             currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
   1214         }
   1215     }
   1216 #endif
   1217     return currentResult;
   1218 }
   1219 
   1220 
   1221 void
   1222 RuleBasedNumberFormat::parse(const UnicodeString& text,
   1223                              Formattable& result,
   1224                              ParsePosition& parsePosition) const
   1225 {
   1226     if (!ruleSets) {
   1227         parsePosition.setErrorIndex(0);
   1228         return;
   1229     }
   1230 
   1231     UnicodeString workingText(text, parsePosition.getIndex());
   1232     ParsePosition workingPos(0);
   1233 
   1234     ParsePosition high_pp(0);
   1235     Formattable high_result;
   1236 
   1237     for (NFRuleSet** p = ruleSets; *p; ++p) {
   1238         NFRuleSet *rp = *p;
   1239         if (rp->isPublic() && rp->isParseable()) {
   1240             ParsePosition working_pp(0);
   1241             Formattable working_result;
   1242 
   1243             rp->parse(workingText, working_pp, kMaxDouble, working_result);
   1244             if (working_pp.getIndex() > high_pp.getIndex()) {
   1245                 high_pp = working_pp;
   1246                 high_result = working_result;
   1247 
   1248                 if (high_pp.getIndex() == workingText.length()) {
   1249                     break;
   1250                 }
   1251             }
   1252         }
   1253     }
   1254 
   1255     int32_t startIndex = parsePosition.getIndex();
   1256     parsePosition.setIndex(startIndex + high_pp.getIndex());
   1257     if (high_pp.getIndex() > 0) {
   1258         parsePosition.setErrorIndex(-1);
   1259     } else {
   1260         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
   1261         parsePosition.setErrorIndex(startIndex + errorIndex);
   1262     }
   1263     result = high_result;
   1264     if (result.getType() == Formattable::kDouble) {
   1265         double d = result.getDouble();
   1266         if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
   1267             // Note: casting a double to an int when the double is too large or small
   1268             //       to fit the destination is undefined behavior. The explicit range checks,
   1269             //       above, are required. Just casting and checking the result value is undefined.
   1270             result.setLong(static_cast<int32_t>(d));
   1271         }
   1272     }
   1273 }
   1274 
   1275 #if !UCONFIG_NO_COLLATION
   1276 
   1277 void
   1278 RuleBasedNumberFormat::setLenient(UBool enabled)
   1279 {
   1280     lenient = enabled;
   1281     if (!enabled && collator) {
   1282         delete collator;
   1283         collator = NULL;
   1284     }
   1285 }
   1286 
   1287 #endif
   1288 
   1289 void
   1290 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
   1291     if (U_SUCCESS(status)) {
   1292         if (ruleSetName.isEmpty()) {
   1293           if (localizations) {
   1294               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
   1295               defaultRuleSet = findRuleSet(name, status);
   1296           } else {
   1297             initDefaultRuleSet();
   1298           }
   1299         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
   1300             status = U_ILLEGAL_ARGUMENT_ERROR;
   1301         } else {
   1302             NFRuleSet* result = findRuleSet(ruleSetName, status);
   1303             if (result != NULL) {
   1304                 defaultRuleSet = result;
   1305             }
   1306         }
   1307     }
   1308 }
   1309 
   1310 UnicodeString
   1311 RuleBasedNumberFormat::getDefaultRuleSetName() const {
   1312     UnicodeString result;
   1313     if (defaultRuleSet && defaultRuleSet->isPublic()) {
   1314         defaultRuleSet->getName(result);
   1315     } else {
   1316         result.setToBogus();
   1317     }
   1318     return result;
   1319 }
   1320 
   1321 void
   1322 RuleBasedNumberFormat::initDefaultRuleSet()
   1323 {
   1324     defaultRuleSet = NULL;
   1325     if (!ruleSets) {
   1326         return;
   1327     }
   1328 
   1329     const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
   1330     const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
   1331     const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
   1332 
   1333     NFRuleSet**p = &ruleSets[0];
   1334     while (*p) {
   1335         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
   1336             defaultRuleSet = *p;
   1337             return;
   1338         } else {
   1339             ++p;
   1340         }
   1341     }
   1342 
   1343     defaultRuleSet = *--p;
   1344     if (!defaultRuleSet->isPublic()) {
   1345         while (p != ruleSets) {
   1346             if ((*--p)->isPublic()) {
   1347                 defaultRuleSet = *p;
   1348                 break;
   1349             }
   1350         }
   1351     }
   1352 }
   1353 
   1354 
   1355 void
   1356 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
   1357                             UParseError& pErr, UErrorCode& status)
   1358 {
   1359     // TODO: implement UParseError
   1360     uprv_memset(&pErr, 0, sizeof(UParseError));
   1361     // Note: this can leave ruleSets == NULL, so remaining code should check
   1362     if (U_FAILURE(status)) {
   1363         return;
   1364     }
   1365 
   1366     initializeDecimalFormatSymbols(status);
   1367     initializeDefaultInfinityRule(status);
   1368     initializeDefaultNaNRule(status);
   1369     if (U_FAILURE(status)) {
   1370         return;
   1371     }
   1372 
   1373     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
   1374 
   1375     UnicodeString description(rules);
   1376     if (!description.length()) {
   1377         status = U_MEMORY_ALLOCATION_ERROR;
   1378         return;
   1379     }
   1380 
   1381     // start by stripping the trailing whitespace from all the rules
   1382     // (this is all the whitespace follwing each semicolon in the
   1383     // description).  This allows us to look for rule-set boundaries
   1384     // by searching for ";%" without having to worry about whitespace
   1385     // between the ; and the %
   1386     stripWhitespace(description);
   1387 
   1388     // check to see if there's a set of lenient-parse rules.  If there
   1389     // is, pull them out into our temporary holding place for them,
   1390     // and delete them from the description before the real desciption-
   1391     // parsing code sees them
   1392     int32_t lp = description.indexOf(gLenientParse, -1, 0);
   1393     if (lp != -1) {
   1394         // we've got to make sure we're not in the middle of a rule
   1395         // (where "%%lenient-parse" would actually get treated as
   1396         // rule text)
   1397         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
   1398             // locate the beginning and end of the actual collation
   1399             // rules (there may be whitespace between the name and
   1400             // the first token in the description)
   1401             int lpEnd = description.indexOf(gSemiPercent, 2, lp);
   1402 
   1403             if (lpEnd == -1) {
   1404                 lpEnd = description.length() - 1;
   1405             }
   1406             int lpStart = lp + u_strlen(gLenientParse);
   1407             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
   1408                 ++lpStart;
   1409             }
   1410 
   1411             // copy out the lenient-parse rules and delete them
   1412             // from the description
   1413             lenientParseRules = new UnicodeString();
   1414             /* test for NULL */
   1415             if (lenientParseRules == 0) {
   1416                 status = U_MEMORY_ALLOCATION_ERROR;
   1417                 return;
   1418             }
   1419             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
   1420 
   1421             description.remove(lp, lpEnd + 1 - lp);
   1422         }
   1423     }
   1424 
   1425     // pre-flight parsing the description and count the number of
   1426     // rule sets (";%" marks the end of one rule set and the beginning
   1427     // of the next)
   1428     numRuleSets = 0;
   1429     for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
   1430         ++numRuleSets;
   1431         ++p;
   1432     }
   1433     ++numRuleSets;
   1434 
   1435     // our rule list is an array of the appropriate size
   1436     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
   1437     /* test for NULL */
   1438     if (ruleSets == 0) {
   1439         status = U_MEMORY_ALLOCATION_ERROR;
   1440         return;
   1441     }
   1442 
   1443     for (int i = 0; i <= numRuleSets; ++i) {
   1444         ruleSets[i] = NULL;
   1445     }
   1446 
   1447     // divide up the descriptions into individual rule-set descriptions
   1448     // and store them in a temporary array.  At each step, we also
   1449     // new up a rule set, but all this does is initialize its name
   1450     // and remove it from its description.  We can't actually parse
   1451     // the rest of the descriptions and finish initializing everything
   1452     // because we have to know the names and locations of all the rule
   1453     // sets before we can actually set everything up
   1454     if(!numRuleSets) {
   1455         status = U_ILLEGAL_ARGUMENT_ERROR;
   1456         return;
   1457     }
   1458 
   1459     ruleSetDescriptions = new UnicodeString[numRuleSets];
   1460     if (ruleSetDescriptions == 0) {
   1461         status = U_MEMORY_ALLOCATION_ERROR;
   1462         return;
   1463     }
   1464 
   1465     {
   1466         int curRuleSet = 0;
   1467         int32_t start = 0;
   1468         for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
   1469             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
   1470             ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
   1471             if (ruleSets[curRuleSet] == 0) {
   1472                 status = U_MEMORY_ALLOCATION_ERROR;
   1473                 return;
   1474             }
   1475             ++curRuleSet;
   1476             start = p + 1;
   1477         }
   1478         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
   1479         ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
   1480         if (ruleSets[curRuleSet] == 0) {
   1481             status = U_MEMORY_ALLOCATION_ERROR;
   1482             return;
   1483         }
   1484     }
   1485 
   1486     // now we can take note of the formatter's default rule set, which
   1487     // is the last public rule set in the description (it's the last
   1488     // rather than the first so that a user can create a new formatter
   1489     // from an existing formatter and change its default behavior just
   1490     // by appending more rule sets to the end)
   1491 
   1492     // {dlf} Initialization of a fraction rule set requires the default rule
   1493     // set to be known.  For purposes of initialization, this is always the
   1494     // last public rule set, no matter what the localization data says.
   1495     initDefaultRuleSet();
   1496 
   1497     // finally, we can go back through the temporary descriptions
   1498     // list and finish seting up the substructure (and we throw
   1499     // away the temporary descriptions as we go)
   1500     {
   1501         for (int i = 0; i < numRuleSets; i++) {
   1502             ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
   1503         }
   1504     }
   1505 
   1506     // Now that the rules are initialized, the 'real' default rule
   1507     // set can be adjusted by the localization data.
   1508 
   1509     // The C code keeps the localization array as is, rather than building
   1510     // a separate array of the public rule set names, so we have less work
   1511     // to do here-- but we still need to check the names.
   1512 
   1513     if (localizationInfos) {
   1514         // confirm the names, if any aren't in the rules, that's an error
   1515         // it is ok if the rules contain public rule sets that are not in this list
   1516         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
   1517             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
   1518             NFRuleSet* rs = findRuleSet(name, status);
   1519             if (rs == NULL) {
   1520                 break; // error
   1521             }
   1522             if (i == 0) {
   1523                 defaultRuleSet = rs;
   1524             }
   1525         }
   1526     } else {
   1527         defaultRuleSet = getDefaultRuleSet();
   1528     }
   1529     originalDescription = rules;
   1530 }
   1531 
   1532 // override the NumberFormat implementation in order to
   1533 // lazily initialize relevant items
   1534 void
   1535 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
   1536 {
   1537     NumberFormat::setContext(value, status);
   1538     if (U_SUCCESS(status)) {
   1539     	if (!capitalizationInfoSet &&
   1540     	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
   1541     	    initCapitalizationContextInfo(locale);
   1542     	    capitalizationInfoSet = TRUE;
   1543         }
   1544 #if !UCONFIG_NO_BREAK_ITERATION
   1545         if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1546                 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
   1547                 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
   1548             UErrorCode status = U_ZERO_ERROR;
   1549             capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
   1550             if (U_FAILURE(status)) {
   1551                 delete capitalizationBrkIter;
   1552                 capitalizationBrkIter = NULL;
   1553             }
   1554         }
   1555 #endif
   1556     }
   1557 }
   1558 
   1559 void
   1560 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
   1561 {
   1562 #if !UCONFIG_NO_BREAK_ITERATION
   1563     const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
   1564     UErrorCode status = U_ZERO_ERROR;
   1565     UResourceBundle *rb = ures_open(NULL, localeID, &status);
   1566     rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
   1567     rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
   1568     if (U_SUCCESS(status) && rb != NULL) {
   1569         int32_t len = 0;
   1570         const int32_t * intVector = ures_getIntVector(rb, &len, &status);
   1571         if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
   1572             capitalizationForUIListMenu = intVector[0];
   1573             capitalizationForStandAlone = intVector[1];
   1574         }
   1575     }
   1576     ures_close(rb);
   1577 #endif
   1578 }
   1579 
   1580 void
   1581 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
   1582 {
   1583     // iterate through the characters...
   1584     UnicodeString result;
   1585 
   1586     int start = 0;
   1587     while (start != -1 && start < description.length()) {
   1588         // seek to the first non-whitespace character...
   1589         while (start < description.length()
   1590             && PatternProps::isWhiteSpace(description.charAt(start))) {
   1591             ++start;
   1592         }
   1593 
   1594         // locate the next semicolon in the text and copy the text from
   1595         // our current position up to that semicolon into the result
   1596         int32_t p = description.indexOf(gSemiColon, start);
   1597         if (p == -1) {
   1598             // or if we don't find a semicolon, just copy the rest of
   1599             // the string into the result
   1600             result.append(description, start, description.length() - start);
   1601             start = -1;
   1602         }
   1603         else if (p < description.length()) {
   1604             result.append(description, start, p + 1 - start);
   1605             start = p + 1;
   1606         }
   1607 
   1608         // when we get here, we've seeked off the end of the sring, and
   1609         // we terminate the loop (we continue until *start* is -1 rather
   1610         // than until *p* is -1, because otherwise we'd miss the last
   1611         // rule in the description)
   1612         else {
   1613             start = -1;
   1614         }
   1615     }
   1616 
   1617     description.setTo(result);
   1618 }
   1619 
   1620 
   1621 void
   1622 RuleBasedNumberFormat::dispose()
   1623 {
   1624     if (ruleSets) {
   1625         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1626             delete *p;
   1627         }
   1628         uprv_free(ruleSets);
   1629         ruleSets = NULL;
   1630     }
   1631 
   1632     if (ruleSetDescriptions) {
   1633         delete [] ruleSetDescriptions;
   1634         ruleSetDescriptions = NULL;
   1635     }
   1636 
   1637 #if !UCONFIG_NO_COLLATION
   1638     delete collator;
   1639 #endif
   1640     collator = NULL;
   1641 
   1642     delete decimalFormatSymbols;
   1643     decimalFormatSymbols = NULL;
   1644 
   1645     delete defaultInfinityRule;
   1646     defaultInfinityRule = NULL;
   1647 
   1648     delete defaultNaNRule;
   1649     defaultNaNRule = NULL;
   1650 
   1651     delete lenientParseRules;
   1652     lenientParseRules = NULL;
   1653 
   1654 #if !UCONFIG_NO_BREAK_ITERATION
   1655     delete capitalizationBrkIter;
   1656     capitalizationBrkIter = NULL;
   1657 #endif
   1658 
   1659     if (localizations) {
   1660         localizations = localizations->unref();
   1661     }
   1662 }
   1663 
   1664 
   1665 //-----------------------------------------------------------------------
   1666 // package-internal API
   1667 //-----------------------------------------------------------------------
   1668 
   1669 /**
   1670  * Returns the collator to use for lenient parsing.  The collator is lazily created:
   1671  * this function creates it the first time it's called.
   1672  * @return The collator to use for lenient parsing, or null if lenient parsing
   1673  * is turned off.
   1674 */
   1675 const RuleBasedCollator*
   1676 RuleBasedNumberFormat::getCollator() const
   1677 {
   1678 #if !UCONFIG_NO_COLLATION
   1679     if (!ruleSets) {
   1680         return NULL;
   1681     }
   1682 
   1683     // lazy-evaluate the collator
   1684     if (collator == NULL && lenient) {
   1685         // create a default collator based on the formatter's locale,
   1686         // then pull out that collator's rules, append any additional
   1687         // rules specified in the description, and create a _new_
   1688         // collator based on the combinaiton of those rules
   1689 
   1690         UErrorCode status = U_ZERO_ERROR;
   1691 
   1692         Collator* temp = Collator::createInstance(locale, status);
   1693         RuleBasedCollator* newCollator;
   1694         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
   1695             if (lenientParseRules) {
   1696                 UnicodeString rules(newCollator->getRules());
   1697                 rules.append(*lenientParseRules);
   1698 
   1699                 newCollator = new RuleBasedCollator(rules, status);
   1700                 // Exit if newCollator could not be created.
   1701                 if (newCollator == NULL) {
   1702                     return NULL;
   1703                 }
   1704             } else {
   1705                 temp = NULL;
   1706             }
   1707             if (U_SUCCESS(status)) {
   1708                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
   1709                 // cast away const
   1710                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
   1711             } else {
   1712                 delete newCollator;
   1713             }
   1714         }
   1715         delete temp;
   1716     }
   1717 #endif
   1718 
   1719     // if lenient-parse mode is off, this will be null
   1720     // (see setLenientParseMode())
   1721     return collator;
   1722 }
   1723 
   1724 
   1725 DecimalFormatSymbols*
   1726 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
   1727 {
   1728     // lazy-evaluate the DecimalFormatSymbols object.  This object
   1729     // is shared by all DecimalFormat instances belonging to this
   1730     // formatter
   1731     if (decimalFormatSymbols == NULL) {
   1732         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
   1733         if (U_SUCCESS(status)) {
   1734             decimalFormatSymbols = temp;
   1735         }
   1736         else {
   1737             delete temp;
   1738         }
   1739     }
   1740     return decimalFormatSymbols;
   1741 }
   1742 
   1743 /**
   1744  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
   1745  * instances owned by this formatter.
   1746 */
   1747 const DecimalFormatSymbols*
   1748 RuleBasedNumberFormat::getDecimalFormatSymbols() const
   1749 {
   1750     return decimalFormatSymbols;
   1751 }
   1752 
   1753 NFRule*
   1754 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
   1755 {
   1756     if (U_FAILURE(status)) {
   1757         return NULL;
   1758     }
   1759     if (defaultInfinityRule == NULL) {
   1760         UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
   1761         rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
   1762         NFRule* temp = new NFRule(this, rule, status);
   1763         if (U_SUCCESS(status)) {
   1764             defaultInfinityRule = temp;
   1765         }
   1766         else {
   1767             delete temp;
   1768         }
   1769     }
   1770     return defaultInfinityRule;
   1771 }
   1772 
   1773 const NFRule*
   1774 RuleBasedNumberFormat::getDefaultInfinityRule() const
   1775 {
   1776     return defaultInfinityRule;
   1777 }
   1778 
   1779 NFRule*
   1780 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
   1781 {
   1782     if (U_FAILURE(status)) {
   1783         return NULL;
   1784     }
   1785     if (defaultNaNRule == NULL) {
   1786         UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
   1787         rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
   1788         NFRule* temp = new NFRule(this, rule, status);
   1789         if (U_SUCCESS(status)) {
   1790             defaultNaNRule = temp;
   1791         }
   1792         else {
   1793             delete temp;
   1794         }
   1795     }
   1796     return defaultNaNRule;
   1797 }
   1798 
   1799 const NFRule*
   1800 RuleBasedNumberFormat::getDefaultNaNRule() const
   1801 {
   1802     return defaultNaNRule;
   1803 }
   1804 
   1805 // De-owning the current localized symbols and adopt the new symbols.
   1806 void
   1807 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
   1808 {
   1809     if (symbolsToAdopt == NULL) {
   1810         return; // do not allow caller to set decimalFormatSymbols to NULL
   1811     }
   1812 
   1813     if (decimalFormatSymbols != NULL) {
   1814         delete decimalFormatSymbols;
   1815     }
   1816 
   1817     decimalFormatSymbols = symbolsToAdopt;
   1818 
   1819     {
   1820         // Apply the new decimalFormatSymbols by reparsing the rulesets
   1821         UErrorCode status = U_ZERO_ERROR;
   1822 
   1823         delete defaultInfinityRule;
   1824         defaultInfinityRule = NULL;
   1825         initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
   1826 
   1827         delete defaultNaNRule;
   1828         defaultNaNRule = NULL;
   1829         initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
   1830 
   1831         if (ruleSets) {
   1832             for (int32_t i = 0; i < numRuleSets; i++) {
   1833                 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
   1834             }
   1835         }
   1836     }
   1837 }
   1838 
   1839 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
   1840 void
   1841 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
   1842 {
   1843     adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
   1844 }
   1845 
   1846 PluralFormat *
   1847 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
   1848                                           const UnicodeString &pattern,
   1849                                           UErrorCode& status) const
   1850 {
   1851     return new PluralFormat(locale, pluralType, pattern, status);
   1852 }
   1853 
   1854 U_NAMESPACE_END
   1855 
   1856 /* U_HAVE_RBNF */
   1857 #endif
   1858