Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1997-2009, International Business Machines Corporation
      4 * and others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 #include "unicode/rbnf.h"
      9 
     10 #if U_HAVE_RBNF
     11 
     12 #include "unicode/normlzr.h"
     13 #include "unicode/tblcoll.h"
     14 #include "unicode/uchar.h"
     15 #include "unicode/ucol.h"
     16 #include "unicode/uloc.h"
     17 #include "unicode/unum.h"
     18 #include "unicode/ures.h"
     19 #include "unicode/ustring.h"
     20 #include "unicode/utf16.h"
     21 #include "unicode/udata.h"
     22 #include "nfrs.h"
     23 
     24 #include "cmemory.h"
     25 #include "cstring.h"
     26 #include "../common/util.h"
     27 #include "uresimp.h"
     28 
     29 // debugging
     30 // #define DEBUG
     31 
     32 #ifdef DEBUG
     33 #include "stdio.h"
     34 #endif
     35 
     36 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
     37 
     38 static const UChar gPercentPercent[] =
     39 {
     40     0x25, 0x25, 0
     41 }; /* "%%" */
     42 
     43 // All urbnf objects are created through openRules, so we init all of the
     44 // Unicode string constants required by rbnf, nfrs, or nfr here.
     45 static const UChar gLenientParse[] =
     46 {
     47     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
     48 }; /* "%%lenient-parse:" */
     49 static const UChar gSemiColon = 0x003B;
     50 static const UChar gSemiPercent[] =
     51 {
     52     0x3B, 0x25, 0
     53 }; /* ";%" */
     54 
     55 #define kSomeNumberOfBitsDiv2 22
     56 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
     57 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
     58 
     59 // Temporary workaround - when noParse is true, do noting in parse.
     60 // TODO: We need a real fix - see #6895/#6896
     61 static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
     62 
     63 U_NAMESPACE_BEGIN
     64 
     65 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
     66 
     67 /*
     68 This is a utility class. It does not use ICU's RTTI.
     69 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
     70 Please make sure that intltest passes on Windows in Release mode,
     71 since the string pooling per compilation unit will mess up how RTTI works.
     72 The RTTI code was also removed due to lack of code coverage.
     73 */
     74 class LocalizationInfo : public UMemory {
     75 protected:
     76     virtual ~LocalizationInfo() {};
     77     uint32_t refcount;
     78 
     79 public:
     80     LocalizationInfo() : refcount(0) {}
     81 
     82     LocalizationInfo* ref(void) {
     83         ++refcount;
     84         return this;
     85     }
     86 
     87     LocalizationInfo* unref(void) {
     88         if (refcount && --refcount == 0) {
     89             delete this;
     90         }
     91         return NULL;
     92     }
     93 
     94     virtual UBool operator==(const LocalizationInfo* rhs) const;
     95     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
     96 
     97     virtual int32_t getNumberOfRuleSets(void) const = 0;
     98     virtual const UChar* getRuleSetName(int32_t index) const = 0;
     99     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
    100     virtual const UChar* getLocaleName(int32_t index) const = 0;
    101     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
    102 
    103     virtual int32_t indexForLocale(const UChar* locale) const;
    104     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
    105 
    106 //    virtual UClassID getDynamicClassID() const = 0;
    107 //    static UClassID getStaticClassID(void);
    108 };
    109 
    110 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
    111 
    112 // if both strings are NULL, this returns TRUE
    113 static UBool
    114 streq(const UChar* lhs, const UChar* rhs) {
    115     if (rhs == lhs) {
    116         return TRUE;
    117     }
    118     if (lhs && rhs) {
    119         return u_strcmp(lhs, rhs) == 0;
    120     }
    121     return FALSE;
    122 }
    123 
    124 UBool
    125 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
    126     if (rhs) {
    127         if (this == rhs) {
    128             return TRUE;
    129         }
    130 
    131         int32_t rsc = getNumberOfRuleSets();
    132         if (rsc == rhs->getNumberOfRuleSets()) {
    133             for (int i = 0; i < rsc; ++i) {
    134                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
    135                     return FALSE;
    136                 }
    137             }
    138             int32_t dlc = getNumberOfDisplayLocales();
    139             if (dlc == rhs->getNumberOfDisplayLocales()) {
    140                 for (int i = 0; i < dlc; ++i) {
    141                     const UChar* locale = getLocaleName(i);
    142                     int32_t ix = rhs->indexForLocale(locale);
    143                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
    144                     if (!streq(locale, rhs->getLocaleName(ix))) {
    145                         return FALSE;
    146                     }
    147                     for (int j = 0; j < rsc; ++j) {
    148                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
    149                             return FALSE;
    150                         }
    151                     }
    152                 }
    153                 return TRUE;
    154             }
    155         }
    156     }
    157     return FALSE;
    158 }
    159 
    160 int32_t
    161 LocalizationInfo::indexForLocale(const UChar* locale) const {
    162     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
    163         if (streq(locale, getLocaleName(i))) {
    164             return i;
    165         }
    166     }
    167     return -1;
    168 }
    169 
    170 int32_t
    171 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
    172     if (ruleset) {
    173         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
    174             if (streq(ruleset, getRuleSetName(i))) {
    175                 return i;
    176             }
    177         }
    178     }
    179     return -1;
    180 }
    181 
    182 
    183 typedef void (*Fn_Deleter)(void*);
    184 
    185 class VArray {
    186     void** buf;
    187     int32_t cap;
    188     int32_t size;
    189     Fn_Deleter deleter;
    190 public:
    191     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
    192 
    193     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
    194 
    195     ~VArray() {
    196         if (deleter) {
    197             for (int i = 0; i < size; ++i) {
    198                 (*deleter)(buf[i]);
    199             }
    200         }
    201         uprv_free(buf);
    202     }
    203 
    204     int32_t length() {
    205         return size;
    206     }
    207 
    208     void add(void* elem, UErrorCode& status) {
    209         if (U_SUCCESS(status)) {
    210             if (size == cap) {
    211                 if (cap == 0) {
    212                     cap = 1;
    213                 } else if (cap < 256) {
    214                     cap *= 2;
    215                 } else {
    216                     cap += 256;
    217                 }
    218                 if (buf == NULL) {
    219                     buf = (void**)uprv_malloc(cap * sizeof(void*));
    220                 } else {
    221                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
    222                 }
    223                 if (buf == NULL) {
    224                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
    225                     status = U_MEMORY_ALLOCATION_ERROR;
    226                     return;
    227                 }
    228                 void* start = &buf[size];
    229                 size_t count = (cap - size) * sizeof(void*);
    230                 uprv_memset(start, 0, count); // fill with nulls, just because
    231             }
    232             buf[size++] = elem;
    233         }
    234     }
    235 
    236     void** release(void) {
    237         void** result = buf;
    238         buf = NULL;
    239         cap = 0;
    240         size = 0;
    241         return result;
    242     }
    243 };
    244 
    245 class LocDataParser;
    246 
    247 class StringLocalizationInfo : public LocalizationInfo {
    248     UChar* info;
    249     UChar*** data;
    250     int32_t numRuleSets;
    251     int32_t numLocales;
    252 
    253 friend class LocDataParser;
    254 
    255     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
    256         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
    257     {
    258     }
    259 
    260 public:
    261     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
    262 
    263     virtual ~StringLocalizationInfo();
    264     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
    265     virtual const UChar* getRuleSetName(int32_t index) const;
    266     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
    267     virtual const UChar* getLocaleName(int32_t index) const;
    268     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
    269 
    270 //    virtual UClassID getDynamicClassID() const;
    271 //    static UClassID getStaticClassID(void);
    272 
    273 private:
    274     void init(UErrorCode& status) const;
    275 };
    276 
    277 
    278 enum {
    279     OPEN_ANGLE = 0x003c, /* '<' */
    280     CLOSE_ANGLE = 0x003e, /* '>' */
    281     COMMA = 0x002c,
    282     TICK = 0x0027,
    283     QUOTE = 0x0022,
    284     SPACE = 0x0020
    285 };
    286 
    287 /**
    288  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
    289  */
    290 class LocDataParser {
    291     UChar* data;
    292     const UChar* e;
    293     UChar* p;
    294     UChar ch;
    295     UParseError& pe;
    296     UErrorCode& ec;
    297 
    298 public:
    299     LocDataParser(UParseError& parseError, UErrorCode& status)
    300         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
    301     ~LocDataParser() {}
    302 
    303     /*
    304     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
    305     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
    306     */
    307     StringLocalizationInfo* parse(UChar* data, int32_t len);
    308 
    309 private:
    310 
    311     void inc(void) { ++p; ch = 0xffff; }
    312     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
    313     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
    314     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
    315     UBool inList(UChar c, const UChar* list) const {
    316         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
    317         while (*list && *list != c) ++list; return *list == c;
    318     }
    319     void parseError(const char* msg);
    320 
    321     StringLocalizationInfo* doParse(void);
    322 
    323     UChar** nextArray(int32_t& requiredLength);
    324     UChar*  nextString(void);
    325 };
    326 
    327 #ifdef DEBUG
    328 #define ERROR(msg) parseError(msg); return NULL;
    329 #else
    330 #define ERROR(msg) parseError(NULL); return NULL;
    331 #endif
    332 
    333 
    334 static const UChar DQUOTE_STOPLIST[] = {
    335     QUOTE, 0
    336 };
    337 
    338 static const UChar SQUOTE_STOPLIST[] = {
    339     TICK, 0
    340 };
    341 
    342 static const UChar NOQUOTE_STOPLIST[] = {
    343     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
    344 };
    345 
    346 static void
    347 DeleteFn(void* p) {
    348   uprv_free(p);
    349 }
    350 
    351 StringLocalizationInfo*
    352 LocDataParser::parse(UChar* _data, int32_t len) {
    353     if (U_FAILURE(ec)) {
    354         if (_data) uprv_free(_data);
    355         return NULL;
    356     }
    357 
    358     pe.line = 0;
    359     pe.offset = -1;
    360     pe.postContext[0] = 0;
    361     pe.preContext[0] = 0;
    362 
    363     if (_data == NULL) {
    364         ec = U_ILLEGAL_ARGUMENT_ERROR;
    365         return NULL;
    366     }
    367 
    368     if (len <= 0) {
    369         ec = U_ILLEGAL_ARGUMENT_ERROR;
    370         uprv_free(_data);
    371         return NULL;
    372     }
    373 
    374     data = _data;
    375     e = data + len;
    376     p = _data;
    377     ch = 0xffff;
    378 
    379     return doParse();
    380 }
    381 
    382 
    383 StringLocalizationInfo*
    384 LocDataParser::doParse(void) {
    385     skipWhitespace();
    386     if (!checkInc(OPEN_ANGLE)) {
    387         ERROR("Missing open angle");
    388     } else {
    389         VArray array(DeleteFn);
    390         UBool mightHaveNext = TRUE;
    391         int32_t requiredLength = -1;
    392         while (mightHaveNext) {
    393             mightHaveNext = FALSE;
    394             UChar** elem = nextArray(requiredLength);
    395             skipWhitespace();
    396             UBool haveComma = check(COMMA);
    397             if (elem) {
    398                 array.add(elem, ec);
    399                 if (haveComma) {
    400                     inc();
    401                     mightHaveNext = TRUE;
    402                 }
    403             } else if (haveComma) {
    404                 ERROR("Unexpected character");
    405             }
    406         }
    407 
    408         skipWhitespace();
    409         if (!checkInc(CLOSE_ANGLE)) {
    410             if (check(OPEN_ANGLE)) {
    411                 ERROR("Missing comma in outer array");
    412             } else {
    413                 ERROR("Missing close angle bracket in outer array");
    414             }
    415         }
    416 
    417         skipWhitespace();
    418         if (p != e) {
    419             ERROR("Extra text after close of localization data");
    420         }
    421 
    422         array.add(NULL, ec);
    423         if (U_SUCCESS(ec)) {
    424             int32_t numLocs = array.length() - 2; // subtract first, NULL
    425             UChar*** result = (UChar***)array.release();
    426 
    427             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
    428         }
    429     }
    430 
    431     ERROR("Unknown error");
    432 }
    433 
    434 UChar**
    435 LocDataParser::nextArray(int32_t& requiredLength) {
    436     if (U_FAILURE(ec)) {
    437         return NULL;
    438     }
    439 
    440     skipWhitespace();
    441     if (!checkInc(OPEN_ANGLE)) {
    442         ERROR("Missing open angle");
    443     }
    444 
    445     VArray array;
    446     UBool mightHaveNext = TRUE;
    447     while (mightHaveNext) {
    448         mightHaveNext = FALSE;
    449         UChar* elem = nextString();
    450         skipWhitespace();
    451         UBool haveComma = check(COMMA);
    452         if (elem) {
    453             array.add(elem, ec);
    454             if (haveComma) {
    455                 inc();
    456                 mightHaveNext = TRUE;
    457             }
    458         } else if (haveComma) {
    459             ERROR("Unexpected comma");
    460         }
    461     }
    462     skipWhitespace();
    463     if (!checkInc(CLOSE_ANGLE)) {
    464         if (check(OPEN_ANGLE)) {
    465             ERROR("Missing close angle bracket in inner array");
    466         } else {
    467             ERROR("Missing comma in inner array");
    468         }
    469     }
    470 
    471     array.add(NULL, ec);
    472     if (U_SUCCESS(ec)) {
    473         if (requiredLength == -1) {
    474             requiredLength = array.length() + 1;
    475         } else if (array.length() != requiredLength) {
    476             ec = U_ILLEGAL_ARGUMENT_ERROR;
    477             ERROR("Array not of required length");
    478         }
    479 
    480         return (UChar**)array.release();
    481     }
    482     ERROR("Unknown Error");
    483 }
    484 
    485 UChar*
    486 LocDataParser::nextString() {
    487     UChar* result = NULL;
    488 
    489     skipWhitespace();
    490     if (p < e) {
    491         const UChar* terminators;
    492         UChar c = *p;
    493         UBool haveQuote = c == QUOTE || c == TICK;
    494         if (haveQuote) {
    495             inc();
    496             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
    497         } else {
    498             terminators = NOQUOTE_STOPLIST;
    499         }
    500         UChar* start = p;
    501         while (p < e && !inList(*p, terminators)) ++p;
    502         if (p == e) {
    503             ERROR("Unexpected end of data");
    504         }
    505 
    506         UChar x = *p;
    507         if (p > start) {
    508             ch = x;
    509             *p = 0x0; // terminate by writing to data
    510             result = start; // just point into data
    511         }
    512         if (haveQuote) {
    513             if (x != c) {
    514                 ERROR("Missing matching quote");
    515             } else if (p == start) {
    516                 ERROR("Empty string");
    517             }
    518             inc();
    519         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
    520             ERROR("Unexpected character in string");
    521         }
    522     }
    523 
    524     // ok for there to be no next string
    525     return result;
    526 }
    527 
    528 void
    529 LocDataParser::parseError(const char* /*str*/) {
    530     if (!data) {
    531         return;
    532     }
    533 
    534     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
    535     if (start < data) {
    536         start = data;
    537     }
    538     for (UChar* x = p; --x >= start;) {
    539         if (!*x) {
    540             start = x+1;
    541             break;
    542         }
    543     }
    544     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
    545     if (limit > e) {
    546         limit = e;
    547     }
    548     u_strncpy(pe.preContext, start, (int32_t)(p-start));
    549     pe.preContext[p-start] = 0;
    550     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
    551     pe.postContext[limit-p] = 0;
    552     pe.offset = (int32_t)(p - data);
    553 
    554 #ifdef DEBUG
    555     fprintf(stderr, "%s at or near character %d: ", str, p-data);
    556 
    557     UnicodeString msg;
    558     msg.append(start, p - start);
    559     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
    560     msg.append(p, limit-p);
    561     msg.append("'");
    562 
    563     char buf[128];
    564     int32_t len = msg.extract(0, msg.length(), buf, 128);
    565     if (len >= 128) {
    566         buf[127] = 0;
    567     } else {
    568         buf[len] = 0;
    569     }
    570     fprintf(stderr, "%s\n", buf);
    571     fflush(stderr);
    572 #endif
    573 
    574     uprv_free(data);
    575     data = NULL;
    576     p = NULL;
    577     e = NULL;
    578 
    579     if (U_SUCCESS(ec)) {
    580         ec = U_PARSE_ERROR;
    581     }
    582 }
    583 
    584 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
    585 
    586 StringLocalizationInfo*
    587 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
    588     if (U_FAILURE(status)) {
    589         return NULL;
    590     }
    591 
    592     int32_t len = info.length();
    593     if (len == 0) {
    594         return NULL; // no error;
    595     }
    596 
    597     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
    598     if (!p) {
    599         status = U_MEMORY_ALLOCATION_ERROR;
    600         return NULL;
    601     }
    602     info.extract(p, len, status);
    603     if (!U_FAILURE(status)) {
    604         status = U_ZERO_ERROR; // clear warning about non-termination
    605     }
    606 
    607     LocDataParser parser(perror, status);
    608     return parser.parse(p, len);
    609 }
    610 
    611 StringLocalizationInfo::~StringLocalizationInfo() {
    612     for (UChar*** p = (UChar***)data; *p; ++p) {
    613         // remaining data is simply pointer into our unicode string data.
    614         if (*p) uprv_free(*p);
    615     }
    616     if (data) uprv_free(data);
    617     if (info) uprv_free(info);
    618 }
    619 
    620 
    621 const UChar*
    622 StringLocalizationInfo::getRuleSetName(int32_t index) const {
    623     if (index >= 0 && index < getNumberOfRuleSets()) {
    624         return data[0][index];
    625     }
    626     return NULL;
    627 }
    628 
    629 const UChar*
    630 StringLocalizationInfo::getLocaleName(int32_t index) const {
    631     if (index >= 0 && index < getNumberOfDisplayLocales()) {
    632         return data[index+1][0];
    633     }
    634     return NULL;
    635 }
    636 
    637 const UChar*
    638 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
    639     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
    640         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
    641         return data[localeIndex+1][ruleIndex+1];
    642     }
    643     return NULL;
    644 }
    645 
    646 // ----------
    647 
    648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    649                                              const UnicodeString& locs,
    650                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    651   : ruleSets(NULL)
    652   , defaultRuleSet(NULL)
    653   , locale(alocale)
    654   , collator(NULL)
    655   , decimalFormatSymbols(NULL)
    656   , lenient(FALSE)
    657   , lenientParseRules(NULL)
    658   , localizations(NULL)
    659   , noParse(FALSE) //TODO: to be removed after #6895
    660 {
    661   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    662   init(description, locinfo, perror, status);
    663 }
    664 
    665 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    666                                              const UnicodeString& locs,
    667                                              UParseError& perror, UErrorCode& status)
    668   : ruleSets(NULL)
    669   , defaultRuleSet(NULL)
    670   , locale(Locale::getDefault())
    671   , collator(NULL)
    672   , decimalFormatSymbols(NULL)
    673   , lenient(FALSE)
    674   , lenientParseRules(NULL)
    675   , localizations(NULL)
    676   , noParse(FALSE) //TODO: to be removed after #6895
    677 {
    678   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    679   init(description, locinfo, perror, status);
    680 }
    681 
    682 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    683                                              LocalizationInfo* info,
    684                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    685   : ruleSets(NULL)
    686   , defaultRuleSet(NULL)
    687   , locale(alocale)
    688   , collator(NULL)
    689   , decimalFormatSymbols(NULL)
    690   , lenient(FALSE)
    691   , lenientParseRules(NULL)
    692   , localizations(NULL)
    693   , noParse(FALSE) //TODO: to be removed after #6895
    694 {
    695   init(description, info, perror, status);
    696 }
    697 
    698 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    699                          UParseError& perror,
    700                          UErrorCode& status)
    701   : ruleSets(NULL)
    702   , defaultRuleSet(NULL)
    703   , locale(Locale::getDefault())
    704   , collator(NULL)
    705   , decimalFormatSymbols(NULL)
    706   , lenient(FALSE)
    707   , lenientParseRules(NULL)
    708   , localizations(NULL)
    709   , noParse(FALSE) //TODO: to be removed after #6895
    710 {
    711     init(description, NULL, perror, status);
    712 }
    713 
    714 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    715                          const Locale& aLocale,
    716                          UParseError& perror,
    717                          UErrorCode& status)
    718   : ruleSets(NULL)
    719   , defaultRuleSet(NULL)
    720   , locale(aLocale)
    721   , collator(NULL)
    722   , decimalFormatSymbols(NULL)
    723   , lenient(FALSE)
    724   , lenientParseRules(NULL)
    725   , localizations(NULL)
    726   , noParse(FALSE) //TODO: to be removed after #6895
    727 {
    728     init(description, NULL, perror, status);
    729 }
    730 
    731 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
    732   : ruleSets(NULL)
    733   , defaultRuleSet(NULL)
    734   , locale(alocale)
    735   , collator(NULL)
    736   , decimalFormatSymbols(NULL)
    737   , lenient(FALSE)
    738   , lenientParseRules(NULL)
    739   , localizations(NULL)
    740 {
    741     if (U_FAILURE(status)) {
    742         return;
    743     }
    744 
    745     const char* rules_tag = "RBNFRules";
    746     const char* fmt_tag = "";
    747     switch (tag) {
    748     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
    749     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
    750     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
    751     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
    752     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
    753     }
    754 
    755     // TODO: read localization info from resource
    756     LocalizationInfo* locinfo = NULL;
    757 
    758     int32_t len = 0;
    759     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
    760     if (U_SUCCESS(status)) {
    761         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
    762                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
    763 
    764         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
    765         if (U_FAILURE(status)) {
    766             ures_close(nfrb);
    767         }
    768         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
    769         if (U_FAILURE(status)) {
    770             ures_close(rbnfRules);
    771             ures_close(nfrb);
    772             return;
    773         }
    774 
    775         UnicodeString desc;
    776         while (ures_hasNext(ruleSets)) {
    777            const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
    778            desc.append(currentString);
    779         }
    780         UParseError perror;
    781 
    782 
    783         init (desc, locinfo, perror, status);
    784 
    785         //TODO: we need a real fix - see #6895 / #6896
    786         noParse = FALSE;
    787         if (tag == URBNF_SPELLOUT) {
    788             const char *lang = alocale.getLanguage();
    789             for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
    790                 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
    791                     noParse = TRUE;
    792                     break;
    793                 }
    794             }
    795         }
    796         //TODO: end
    797 
    798         ures_close(ruleSets);
    799         ures_close(rbnfRules);
    800     }
    801     ures_close(nfrb);
    802 }
    803 
    804 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
    805   : NumberFormat(rhs)
    806   , ruleSets(NULL)
    807   , defaultRuleSet(NULL)
    808   , locale(rhs.locale)
    809   , collator(NULL)
    810   , decimalFormatSymbols(NULL)
    811   , lenient(FALSE)
    812   , lenientParseRules(NULL)
    813   , localizations(NULL)
    814 {
    815     this->operator=(rhs);
    816 }
    817 
    818 // --------
    819 
    820 RuleBasedNumberFormat&
    821 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
    822 {
    823     UErrorCode status = U_ZERO_ERROR;
    824     dispose();
    825     locale = rhs.locale;
    826     lenient = rhs.lenient;
    827 
    828     UnicodeString rules = rhs.getRules();
    829     UParseError perror;
    830     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
    831 
    832     //TODO: remove below when we fix the parse bug - See #6895 / #6896
    833     noParse = rhs.noParse;
    834 
    835     return *this;
    836 }
    837 
    838 RuleBasedNumberFormat::~RuleBasedNumberFormat()
    839 {
    840     dispose();
    841 }
    842 
    843 Format*
    844 RuleBasedNumberFormat::clone(void) const
    845 {
    846     RuleBasedNumberFormat * result = NULL;
    847     UnicodeString rules = getRules();
    848     UErrorCode status = U_ZERO_ERROR;
    849     UParseError perror;
    850     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
    851     /* test for NULL */
    852     if (result == 0) {
    853         status = U_MEMORY_ALLOCATION_ERROR;
    854         return 0;
    855     }
    856     if (U_FAILURE(status)) {
    857         delete result;
    858         result = 0;
    859     } else {
    860         result->lenient = lenient;
    861 
    862         //TODO: remove below when we fix the parse bug - See #6895 / #6896
    863         result->noParse = noParse;
    864     }
    865     return result;
    866 }
    867 
    868 UBool
    869 RuleBasedNumberFormat::operator==(const Format& other) const
    870 {
    871     if (this == &other) {
    872         return TRUE;
    873     }
    874 
    875     if (other.getDynamicClassID() == getStaticClassID()) {
    876         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
    877         if (locale == rhs.locale &&
    878             lenient == rhs.lenient &&
    879             (localizations == NULL
    880                 ? rhs.localizations == NULL
    881                 : (rhs.localizations == NULL
    882                     ? FALSE
    883                     : *localizations == rhs.localizations))) {
    884 
    885             NFRuleSet** p = ruleSets;
    886             NFRuleSet** q = rhs.ruleSets;
    887             if (p == NULL) {
    888                 return q == NULL;
    889             } else if (q == NULL) {
    890                 return FALSE;
    891             }
    892             while (*p && *q && (**p == **q)) {
    893                 ++p;
    894                 ++q;
    895             }
    896             return *q == NULL && *p == NULL;
    897         }
    898     }
    899 
    900     return FALSE;
    901 }
    902 
    903 UnicodeString
    904 RuleBasedNumberFormat::getRules() const
    905 {
    906     UnicodeString result;
    907     if (ruleSets != NULL) {
    908         for (NFRuleSet** p = ruleSets; *p; ++p) {
    909             (*p)->appendRules(result);
    910         }
    911     }
    912     return result;
    913 }
    914 
    915 UnicodeString
    916 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
    917 {
    918     if (localizations) {
    919       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
    920       return string;
    921     } else if (ruleSets) {
    922         UnicodeString result;
    923         for (NFRuleSet** p = ruleSets; *p; ++p) {
    924             NFRuleSet* rs = *p;
    925             if (rs->isPublic()) {
    926                 if (--index == -1) {
    927                     rs->getName(result);
    928                     return result;
    929                 }
    930             }
    931         }
    932     }
    933     UnicodeString empty;
    934     return empty;
    935 }
    936 
    937 int32_t
    938 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
    939 {
    940     int32_t result = 0;
    941     if (localizations) {
    942       result = localizations->getNumberOfRuleSets();
    943     } else if (ruleSets) {
    944         for (NFRuleSet** p = ruleSets; *p; ++p) {
    945             if ((**p).isPublic()) {
    946                 ++result;
    947             }
    948         }
    949     }
    950     return result;
    951 }
    952 
    953 int32_t
    954 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
    955     if (localizations) {
    956         return localizations->getNumberOfDisplayLocales();
    957     }
    958     return 0;
    959 }
    960 
    961 Locale
    962 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
    963     if (U_FAILURE(status)) {
    964         return Locale("");
    965     }
    966     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
    967         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
    968         char buffer[64];
    969         int32_t cap = name.length() + 1;
    970         char* bp = buffer;
    971         if (cap > 64) {
    972             bp = (char *)uprv_malloc(cap);
    973             if (bp == NULL) {
    974                 status = U_MEMORY_ALLOCATION_ERROR;
    975                 return Locale("");
    976             }
    977         }
    978         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
    979         Locale retLocale(bp);
    980         if (bp != buffer) {
    981             uprv_free(bp);
    982         }
    983         return retLocale;
    984     }
    985     status = U_ILLEGAL_ARGUMENT_ERROR;
    986     Locale retLocale;
    987     return retLocale;
    988 }
    989 
    990 UnicodeString
    991 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
    992     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
    993         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
    994         int32_t len = localeName.length();
    995         UChar* localeStr = localeName.getBuffer(len + 1);
    996         while (len >= 0) {
    997             localeStr[len] = 0;
    998             int32_t ix = localizations->indexForLocale(localeStr);
    999             if (ix >= 0) {
   1000                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
   1001                 return name;
   1002             }
   1003 
   1004             // trim trailing portion, skipping over ommitted sections
   1005             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
   1006             while (len > 0 && localeStr[len-1] == 0x005F) --len;
   1007         }
   1008         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
   1009         return name;
   1010     }
   1011     UnicodeString bogus;
   1012     bogus.setToBogus();
   1013     return bogus;
   1014 }
   1015 
   1016 UnicodeString
   1017 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
   1018     if (localizations) {
   1019         UnicodeString rsn(ruleSetName);
   1020         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
   1021         return getRuleSetDisplayName(ix, localeParam);
   1022     }
   1023     UnicodeString bogus;
   1024     bogus.setToBogus();
   1025     return bogus;
   1026 }
   1027 
   1028 NFRuleSet*
   1029 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
   1030 {
   1031     if (U_SUCCESS(status) && ruleSets) {
   1032         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1033             NFRuleSet* rs = *p;
   1034             if (rs->isNamed(name)) {
   1035                 return rs;
   1036             }
   1037         }
   1038         status = U_ILLEGAL_ARGUMENT_ERROR;
   1039     }
   1040     return NULL;
   1041 }
   1042 
   1043 UnicodeString&
   1044 RuleBasedNumberFormat::format(int32_t number,
   1045                               UnicodeString& toAppendTo,
   1046                               FieldPosition& /* pos */) const
   1047 {
   1048     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
   1049     return toAppendTo;
   1050 }
   1051 
   1052 
   1053 UnicodeString&
   1054 RuleBasedNumberFormat::format(int64_t number,
   1055                               UnicodeString& toAppendTo,
   1056                               FieldPosition& /* pos */) const
   1057 {
   1058     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
   1059     return toAppendTo;
   1060 }
   1061 
   1062 
   1063 UnicodeString&
   1064 RuleBasedNumberFormat::format(double number,
   1065                               UnicodeString& toAppendTo,
   1066                               FieldPosition& /* pos */) const
   1067 {
   1068     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
   1069     return toAppendTo;
   1070 }
   1071 
   1072 
   1073 UnicodeString&
   1074 RuleBasedNumberFormat::format(int32_t number,
   1075                               const UnicodeString& ruleSetName,
   1076                               UnicodeString& toAppendTo,
   1077                               FieldPosition& /* pos */,
   1078                               UErrorCode& status) const
   1079 {
   1080     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
   1081     if (U_SUCCESS(status)) {
   1082         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1083             // throw new IllegalArgumentException("Can't use internal rule set");
   1084             status = U_ILLEGAL_ARGUMENT_ERROR;
   1085         } else {
   1086             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1087             if (rs) {
   1088                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
   1089             }
   1090         }
   1091     }
   1092     return toAppendTo;
   1093 }
   1094 
   1095 
   1096 UnicodeString&
   1097 RuleBasedNumberFormat::format(int64_t number,
   1098                               const UnicodeString& ruleSetName,
   1099                               UnicodeString& toAppendTo,
   1100                               FieldPosition& /* pos */,
   1101                               UErrorCode& status) const
   1102 {
   1103     if (U_SUCCESS(status)) {
   1104         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1105             // throw new IllegalArgumentException("Can't use internal rule set");
   1106             status = U_ILLEGAL_ARGUMENT_ERROR;
   1107         } else {
   1108             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1109             if (rs) {
   1110                 rs->format(number, toAppendTo, toAppendTo.length());
   1111             }
   1112         }
   1113     }
   1114     return toAppendTo;
   1115 }
   1116 
   1117 
   1118 // make linker happy
   1119 UnicodeString&
   1120 RuleBasedNumberFormat::format(const Formattable& obj,
   1121                               UnicodeString& toAppendTo,
   1122                               FieldPosition& pos,
   1123                               UErrorCode& status) const
   1124 {
   1125     return NumberFormat::format(obj, toAppendTo, pos, status);
   1126 }
   1127 
   1128 UnicodeString&
   1129 RuleBasedNumberFormat::format(double number,
   1130                               const UnicodeString& ruleSetName,
   1131                               UnicodeString& toAppendTo,
   1132                               FieldPosition& /* pos */,
   1133                               UErrorCode& status) const
   1134 {
   1135     if (U_SUCCESS(status)) {
   1136         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1137             // throw new IllegalArgumentException("Can't use internal rule set");
   1138             status = U_ILLEGAL_ARGUMENT_ERROR;
   1139         } else {
   1140             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1141             if (rs) {
   1142                 rs->format(number, toAppendTo, toAppendTo.length());
   1143             }
   1144         }
   1145     }
   1146     return toAppendTo;
   1147 }
   1148 
   1149 void
   1150 RuleBasedNumberFormat::parse(const UnicodeString& text,
   1151                              Formattable& result,
   1152                              ParsePosition& parsePosition) const
   1153 {
   1154     //TODO: We need a real fix.  See #6895 / #6896
   1155     if (noParse) {
   1156         // skip parsing
   1157         parsePosition.setErrorIndex(0);
   1158         return;
   1159     }
   1160 
   1161     if (!ruleSets) {
   1162         parsePosition.setErrorIndex(0);
   1163         return;
   1164     }
   1165 
   1166     UnicodeString workingText(text, parsePosition.getIndex());
   1167     ParsePosition workingPos(0);
   1168 
   1169     ParsePosition high_pp(0);
   1170     Formattable high_result;
   1171 
   1172     for (NFRuleSet** p = ruleSets; *p; ++p) {
   1173         NFRuleSet *rp = *p;
   1174         if (rp->isPublic() && rp->isParseable()) {
   1175             ParsePosition working_pp(0);
   1176             Formattable working_result;
   1177 
   1178             rp->parse(workingText, working_pp, kMaxDouble, working_result);
   1179             if (working_pp.getIndex() > high_pp.getIndex()) {
   1180                 high_pp = working_pp;
   1181                 high_result = working_result;
   1182 
   1183                 if (high_pp.getIndex() == workingText.length()) {
   1184                     break;
   1185                 }
   1186             }
   1187         }
   1188     }
   1189 
   1190     int32_t startIndex = parsePosition.getIndex();
   1191     parsePosition.setIndex(startIndex + high_pp.getIndex());
   1192     if (high_pp.getIndex() > 0) {
   1193         parsePosition.setErrorIndex(-1);
   1194     } else {
   1195         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
   1196         parsePosition.setErrorIndex(startIndex + errorIndex);
   1197     }
   1198     result = high_result;
   1199     if (result.getType() == Formattable::kDouble) {
   1200         int32_t r = (int32_t)result.getDouble();
   1201         if ((double)r == result.getDouble()) {
   1202             result.setLong(r);
   1203         }
   1204     }
   1205 }
   1206 
   1207 #if !UCONFIG_NO_COLLATION
   1208 
   1209 void
   1210 RuleBasedNumberFormat::setLenient(UBool enabled)
   1211 {
   1212     lenient = enabled;
   1213     if (!enabled && collator) {
   1214         delete collator;
   1215         collator = NULL;
   1216     }
   1217 }
   1218 
   1219 #endif
   1220 
   1221 void
   1222 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
   1223     if (U_SUCCESS(status)) {
   1224         if (ruleSetName.isEmpty()) {
   1225           if (localizations) {
   1226               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
   1227               defaultRuleSet = findRuleSet(name, status);
   1228           } else {
   1229             initDefaultRuleSet();
   1230           }
   1231         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
   1232             status = U_ILLEGAL_ARGUMENT_ERROR;
   1233         } else {
   1234             NFRuleSet* result = findRuleSet(ruleSetName, status);
   1235             if (result != NULL) {
   1236                 defaultRuleSet = result;
   1237             }
   1238         }
   1239     }
   1240 }
   1241 
   1242 UnicodeString
   1243 RuleBasedNumberFormat::getDefaultRuleSetName() const {
   1244   UnicodeString result;
   1245   if (defaultRuleSet && defaultRuleSet->isPublic()) {
   1246     defaultRuleSet->getName(result);
   1247   } else {
   1248     result.setToBogus();
   1249   }
   1250   return result;
   1251 }
   1252 
   1253 void
   1254 RuleBasedNumberFormat::initDefaultRuleSet()
   1255 {
   1256     defaultRuleSet = NULL;
   1257     if (!ruleSets) {
   1258       return;
   1259     }
   1260 
   1261     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
   1262     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
   1263     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
   1264 
   1265     NFRuleSet**p = &ruleSets[0];
   1266     while (*p) {
   1267         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
   1268             defaultRuleSet = *p;
   1269             return;
   1270         } else {
   1271             ++p;
   1272         }
   1273     }
   1274 
   1275     defaultRuleSet = *--p;
   1276     if (!defaultRuleSet->isPublic()) {
   1277         while (p != ruleSets) {
   1278             if ((*--p)->isPublic()) {
   1279                 defaultRuleSet = *p;
   1280                 break;
   1281             }
   1282         }
   1283     }
   1284 }
   1285 
   1286 
   1287 void
   1288 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
   1289                             UParseError& pErr, UErrorCode& status)
   1290 {
   1291     // TODO: implement UParseError
   1292     uprv_memset(&pErr, 0, sizeof(UParseError));
   1293     // Note: this can leave ruleSets == NULL, so remaining code should check
   1294     if (U_FAILURE(status)) {
   1295         return;
   1296     }
   1297 
   1298     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
   1299 
   1300     UnicodeString description(rules);
   1301     if (!description.length()) {
   1302         status = U_MEMORY_ALLOCATION_ERROR;
   1303         return;
   1304     }
   1305 
   1306     // start by stripping the trailing whitespace from all the rules
   1307     // (this is all the whitespace follwing each semicolon in the
   1308     // description).  This allows us to look for rule-set boundaries
   1309     // by searching for ";%" without having to worry about whitespace
   1310     // between the ; and the %
   1311     stripWhitespace(description);
   1312 
   1313     // check to see if there's a set of lenient-parse rules.  If there
   1314     // is, pull them out into our temporary holding place for them,
   1315     // and delete them from the description before the real desciption-
   1316     // parsing code sees them
   1317     int32_t lp = description.indexOf(gLenientParse);
   1318     if (lp != -1) {
   1319         // we've got to make sure we're not in the middle of a rule
   1320         // (where "%%lenient-parse" would actually get treated as
   1321         // rule text)
   1322         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
   1323             // locate the beginning and end of the actual collation
   1324             // rules (there may be whitespace between the name and
   1325             // the first token in the description)
   1326             int lpEnd = description.indexOf(gSemiPercent, lp);
   1327 
   1328             if (lpEnd == -1) {
   1329                 lpEnd = description.length() - 1;
   1330             }
   1331             int lpStart = lp + u_strlen(gLenientParse);
   1332             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
   1333                 ++lpStart;
   1334             }
   1335 
   1336             // copy out the lenient-parse rules and delete them
   1337             // from the description
   1338             lenientParseRules = new UnicodeString();
   1339             /* test for NULL */
   1340             if (lenientParseRules == 0) {
   1341                 status = U_MEMORY_ALLOCATION_ERROR;
   1342                 return;
   1343             }
   1344             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
   1345 
   1346             description.remove(lp, lpEnd + 1 - lp);
   1347         }
   1348     }
   1349 
   1350     // pre-flight parsing the description and count the number of
   1351     // rule sets (";%" marks the end of one rule set and the beginning
   1352     // of the next)
   1353     int numRuleSets = 0;
   1354     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
   1355         ++numRuleSets;
   1356         ++p;
   1357     }
   1358     ++numRuleSets;
   1359 
   1360     // our rule list is an array of the appropriate size
   1361     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
   1362     /* test for NULL */
   1363     if (ruleSets == 0) {
   1364         status = U_MEMORY_ALLOCATION_ERROR;
   1365         return;
   1366     }
   1367 
   1368     for (int i = 0; i <= numRuleSets; ++i) {
   1369         ruleSets[i] = NULL;
   1370     }
   1371 
   1372     // divide up the descriptions into individual rule-set descriptions
   1373     // and store them in a temporary array.  At each step, we also
   1374     // new up a rule set, but all this does is initialize its name
   1375     // and remove it from its description.  We can't actually parse
   1376     // the rest of the descriptions and finish initializing everything
   1377     // because we have to know the names and locations of all the rule
   1378     // sets before we can actually set everything up
   1379     if(!numRuleSets) {
   1380         status = U_ILLEGAL_ARGUMENT_ERROR;
   1381         return;
   1382     }
   1383     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
   1384     if (ruleSetDescriptions == 0) {
   1385         status = U_MEMORY_ALLOCATION_ERROR;
   1386         return;
   1387     }
   1388 
   1389     {
   1390         int curRuleSet = 0;
   1391         int32_t start = 0;
   1392         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
   1393             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
   1394             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1395             if (ruleSets[curRuleSet] == 0) {
   1396                 status = U_MEMORY_ALLOCATION_ERROR;
   1397                 goto cleanup;
   1398             }
   1399             ++curRuleSet;
   1400             start = p + 1;
   1401         }
   1402         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
   1403         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1404         if (ruleSets[curRuleSet] == 0) {
   1405             status = U_MEMORY_ALLOCATION_ERROR;
   1406             goto cleanup;
   1407         }
   1408     }
   1409 
   1410     // now we can take note of the formatter's default rule set, which
   1411     // is the last public rule set in the description (it's the last
   1412     // rather than the first so that a user can create a new formatter
   1413     // from an existing formatter and change its default behavior just
   1414     // by appending more rule sets to the end)
   1415 
   1416     // {dlf} Initialization of a fraction rule set requires the default rule
   1417     // set to be known.  For purposes of initialization, this is always the
   1418     // last public rule set, no matter what the localization data says.
   1419     initDefaultRuleSet();
   1420 
   1421     // finally, we can go back through the temporary descriptions
   1422     // list and finish seting up the substructure (and we throw
   1423     // away the temporary descriptions as we go)
   1424     {
   1425         for (int i = 0; i < numRuleSets; i++) {
   1426             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
   1427         }
   1428     }
   1429 
   1430     // Now that the rules are initialized, the 'real' default rule
   1431     // set can be adjusted by the localization data.
   1432 
   1433     // The C code keeps the localization array as is, rather than building
   1434     // a separate array of the public rule set names, so we have less work
   1435     // to do here-- but we still need to check the names.
   1436 
   1437     if (localizationInfos) {
   1438         // confirm the names, if any aren't in the rules, that's an error
   1439         // it is ok if the rules contain public rule sets that are not in this list
   1440         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
   1441             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
   1442             NFRuleSet* rs = findRuleSet(name, status);
   1443             if (rs == NULL) {
   1444                 break; // error
   1445             }
   1446             if (i == 0) {
   1447                 defaultRuleSet = rs;
   1448             }
   1449         }
   1450     } else {
   1451         defaultRuleSet = getDefaultRuleSet();
   1452     }
   1453 
   1454 cleanup:
   1455     delete[] ruleSetDescriptions;
   1456 }
   1457 
   1458 void
   1459 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
   1460 {
   1461     // iterate through the characters...
   1462     UnicodeString result;
   1463 
   1464     int start = 0;
   1465     while (start != -1 && start < description.length()) {
   1466         // seek to the first non-whitespace character...
   1467         while (start < description.length()
   1468             && uprv_isRuleWhiteSpace(description.charAt(start))) {
   1469             ++start;
   1470         }
   1471 
   1472         // locate the next semicolon in the text and copy the text from
   1473         // our current position up to that semicolon into the result
   1474         int32_t p = description.indexOf(gSemiColon, start);
   1475         if (p == -1) {
   1476             // or if we don't find a semicolon, just copy the rest of
   1477             // the string into the result
   1478             result.append(description, start, description.length() - start);
   1479             start = -1;
   1480         }
   1481         else if (p < description.length()) {
   1482             result.append(description, start, p + 1 - start);
   1483             start = p + 1;
   1484         }
   1485 
   1486         // when we get here, we've seeked off the end of the sring, and
   1487         // we terminate the loop (we continue until *start* is -1 rather
   1488         // than until *p* is -1, because otherwise we'd miss the last
   1489         // rule in the description)
   1490         else {
   1491             start = -1;
   1492         }
   1493     }
   1494 
   1495     description.setTo(result);
   1496 }
   1497 
   1498 
   1499 void
   1500 RuleBasedNumberFormat::dispose()
   1501 {
   1502     if (ruleSets) {
   1503         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1504             delete *p;
   1505         }
   1506         uprv_free(ruleSets);
   1507         ruleSets = NULL;
   1508     }
   1509 
   1510 #if !UCONFIG_NO_COLLATION
   1511     delete collator;
   1512 #endif
   1513     collator = NULL;
   1514 
   1515     delete decimalFormatSymbols;
   1516     decimalFormatSymbols = NULL;
   1517 
   1518     delete lenientParseRules;
   1519     lenientParseRules = NULL;
   1520 
   1521     if (localizations) localizations = localizations->unref();
   1522 }
   1523 
   1524 
   1525 //-----------------------------------------------------------------------
   1526 // package-internal API
   1527 //-----------------------------------------------------------------------
   1528 
   1529 /**
   1530  * Returns the collator to use for lenient parsing.  The collator is lazily created:
   1531  * this function creates it the first time it's called.
   1532  * @return The collator to use for lenient parsing, or null if lenient parsing
   1533  * is turned off.
   1534 */
   1535 Collator*
   1536 RuleBasedNumberFormat::getCollator() const
   1537 {
   1538 #if !UCONFIG_NO_COLLATION
   1539     if (!ruleSets) {
   1540         return NULL;
   1541     }
   1542 
   1543     // lazy-evaulate the collator
   1544     if (collator == NULL && lenient) {
   1545         // create a default collator based on the formatter's locale,
   1546         // then pull out that collator's rules, append any additional
   1547         // rules specified in the description, and create a _new_
   1548         // collator based on the combinaiton of those rules
   1549 
   1550         UErrorCode status = U_ZERO_ERROR;
   1551 
   1552         Collator* temp = Collator::createInstance(locale, status);
   1553         if (U_SUCCESS(status) &&
   1554             temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
   1555 
   1556             RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
   1557             if (lenientParseRules) {
   1558                 UnicodeString rules(newCollator->getRules());
   1559                 rules.append(*lenientParseRules);
   1560 
   1561                 newCollator = new RuleBasedCollator(rules, status);
   1562                 // Exit if newCollator could not be created.
   1563                 if (newCollator == NULL) {
   1564                 	return NULL;
   1565                 }
   1566             } else {
   1567                 temp = NULL;
   1568             }
   1569             if (U_SUCCESS(status)) {
   1570                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
   1571                 // cast away const
   1572                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
   1573             } else {
   1574                 delete newCollator;
   1575             }
   1576         }
   1577         delete temp;
   1578     }
   1579 #endif
   1580 
   1581     // if lenient-parse mode is off, this will be null
   1582     // (see setLenientParseMode())
   1583     return collator;
   1584 }
   1585 
   1586 
   1587 /**
   1588  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
   1589  * instances owned by this formatter.  This object is lazily created: this function
   1590  * creates it the first time it's called.
   1591  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
   1592  * instances owned by this formatter.
   1593 */
   1594 DecimalFormatSymbols*
   1595 RuleBasedNumberFormat::getDecimalFormatSymbols() const
   1596 {
   1597     // lazy-evaluate the DecimalFormatSymbols object.  This object
   1598     // is shared by all DecimalFormat instances belonging to this
   1599     // formatter
   1600     if (decimalFormatSymbols == NULL) {
   1601         UErrorCode status = U_ZERO_ERROR;
   1602         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
   1603         if (U_SUCCESS(status)) {
   1604             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
   1605         } else {
   1606             delete temp;
   1607         }
   1608     }
   1609     return decimalFormatSymbols;
   1610 }
   1611 
   1612 U_NAMESPACE_END
   1613 
   1614 /* U_HAVE_RBNF */
   1615 #endif
   1616