Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1997-2010, International Business Machines Corporation
      4 * and others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 #include <typeinfo>  // for 'typeid' to work
      9 
     10 #include "unicode/rbnf.h"
     11 
     12 #if U_HAVE_RBNF
     13 
     14 #include "unicode/normlzr.h"
     15 #include "unicode/tblcoll.h"
     16 #include "unicode/uchar.h"
     17 #include "unicode/ucol.h"
     18 #include "unicode/uloc.h"
     19 #include "unicode/unum.h"
     20 #include "unicode/ures.h"
     21 #include "unicode/ustring.h"
     22 #include "unicode/utf16.h"
     23 #include "unicode/udata.h"
     24 #include "nfrs.h"
     25 
     26 #include "cmemory.h"
     27 #include "cstring.h"
     28 #include "util.h"
     29 #include "uresimp.h"
     30 
     31 // debugging
     32 // #define DEBUG
     33 
     34 #ifdef DEBUG
     35 #include "stdio.h"
     36 #endif
     37 
     38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
     39 
     40 static const UChar gPercentPercent[] =
     41 {
     42     0x25, 0x25, 0
     43 }; /* "%%" */
     44 
     45 // All urbnf objects are created through openRules, so we init all of the
     46 // Unicode string constants required by rbnf, nfrs, or nfr here.
     47 static const UChar gLenientParse[] =
     48 {
     49     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
     50 }; /* "%%lenient-parse:" */
     51 static const UChar gSemiColon = 0x003B;
     52 static const UChar gSemiPercent[] =
     53 {
     54     0x3B, 0x25, 0
     55 }; /* ";%" */
     56 
     57 #define kSomeNumberOfBitsDiv2 22
     58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
     59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
     60 
     61 // Temporary workaround - when noParse is true, do noting in parse.
     62 // TODO: We need a real fix - see #6895/#6896
     63 static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
     64 
     65 U_NAMESPACE_BEGIN
     66 
     67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
     68 
     69 /*
     70 This is a utility class. It does not use ICU's RTTI.
     71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
     72 Please make sure that intltest passes on Windows in Release mode,
     73 since the string pooling per compilation unit will mess up how RTTI works.
     74 The RTTI code was also removed due to lack of code coverage.
     75 */
     76 class LocalizationInfo : public UMemory {
     77 protected:
     78     virtual ~LocalizationInfo() {};
     79     uint32_t refcount;
     80 
     81 public:
     82     LocalizationInfo() : refcount(0) {}
     83 
     84     LocalizationInfo* ref(void) {
     85         ++refcount;
     86         return this;
     87     }
     88 
     89     LocalizationInfo* unref(void) {
     90         if (refcount && --refcount == 0) {
     91             delete this;
     92         }
     93         return NULL;
     94     }
     95 
     96     virtual UBool operator==(const LocalizationInfo* rhs) const;
     97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
     98 
     99     virtual int32_t getNumberOfRuleSets(void) const = 0;
    100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
    101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
    102     virtual const UChar* getLocaleName(int32_t index) const = 0;
    103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
    104 
    105     virtual int32_t indexForLocale(const UChar* locale) const;
    106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
    107 
    108 //    virtual UClassID getDynamicClassID() const = 0;
    109 //    static UClassID getStaticClassID(void);
    110 };
    111 
    112 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
    113 
    114 // if both strings are NULL, this returns TRUE
    115 static UBool
    116 streq(const UChar* lhs, const UChar* rhs) {
    117     if (rhs == lhs) {
    118         return TRUE;
    119     }
    120     if (lhs && rhs) {
    121         return u_strcmp(lhs, rhs) == 0;
    122     }
    123     return FALSE;
    124 }
    125 
    126 UBool
    127 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
    128     if (rhs) {
    129         if (this == rhs) {
    130             return TRUE;
    131         }
    132 
    133         int32_t rsc = getNumberOfRuleSets();
    134         if (rsc == rhs->getNumberOfRuleSets()) {
    135             for (int i = 0; i < rsc; ++i) {
    136                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
    137                     return FALSE;
    138                 }
    139             }
    140             int32_t dlc = getNumberOfDisplayLocales();
    141             if (dlc == rhs->getNumberOfDisplayLocales()) {
    142                 for (int i = 0; i < dlc; ++i) {
    143                     const UChar* locale = getLocaleName(i);
    144                     int32_t ix = rhs->indexForLocale(locale);
    145                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
    146                     if (!streq(locale, rhs->getLocaleName(ix))) {
    147                         return FALSE;
    148                     }
    149                     for (int j = 0; j < rsc; ++j) {
    150                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
    151                             return FALSE;
    152                         }
    153                     }
    154                 }
    155                 return TRUE;
    156             }
    157         }
    158     }
    159     return FALSE;
    160 }
    161 
    162 int32_t
    163 LocalizationInfo::indexForLocale(const UChar* locale) const {
    164     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
    165         if (streq(locale, getLocaleName(i))) {
    166             return i;
    167         }
    168     }
    169     return -1;
    170 }
    171 
    172 int32_t
    173 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
    174     if (ruleset) {
    175         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
    176             if (streq(ruleset, getRuleSetName(i))) {
    177                 return i;
    178             }
    179         }
    180     }
    181     return -1;
    182 }
    183 
    184 
    185 typedef void (*Fn_Deleter)(void*);
    186 
    187 class VArray {
    188     void** buf;
    189     int32_t cap;
    190     int32_t size;
    191     Fn_Deleter deleter;
    192 public:
    193     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
    194 
    195     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
    196 
    197     ~VArray() {
    198         if (deleter) {
    199             for (int i = 0; i < size; ++i) {
    200                 (*deleter)(buf[i]);
    201             }
    202         }
    203         uprv_free(buf);
    204     }
    205 
    206     int32_t length() {
    207         return size;
    208     }
    209 
    210     void add(void* elem, UErrorCode& status) {
    211         if (U_SUCCESS(status)) {
    212             if (size == cap) {
    213                 if (cap == 0) {
    214                     cap = 1;
    215                 } else if (cap < 256) {
    216                     cap *= 2;
    217                 } else {
    218                     cap += 256;
    219                 }
    220                 if (buf == NULL) {
    221                     buf = (void**)uprv_malloc(cap * sizeof(void*));
    222                 } else {
    223                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
    224                 }
    225                 if (buf == NULL) {
    226                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
    227                     status = U_MEMORY_ALLOCATION_ERROR;
    228                     return;
    229                 }
    230                 void* start = &buf[size];
    231                 size_t count = (cap - size) * sizeof(void*);
    232                 uprv_memset(start, 0, count); // fill with nulls, just because
    233             }
    234             buf[size++] = elem;
    235         }
    236     }
    237 
    238     void** release(void) {
    239         void** result = buf;
    240         buf = NULL;
    241         cap = 0;
    242         size = 0;
    243         return result;
    244     }
    245 };
    246 
    247 class LocDataParser;
    248 
    249 class StringLocalizationInfo : public LocalizationInfo {
    250     UChar* info;
    251     UChar*** data;
    252     int32_t numRuleSets;
    253     int32_t numLocales;
    254 
    255 friend class LocDataParser;
    256 
    257     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
    258         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
    259     {
    260     }
    261 
    262 public:
    263     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
    264 
    265     virtual ~StringLocalizationInfo();
    266     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
    267     virtual const UChar* getRuleSetName(int32_t index) const;
    268     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
    269     virtual const UChar* getLocaleName(int32_t index) const;
    270     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
    271 
    272 //    virtual UClassID getDynamicClassID() const;
    273 //    static UClassID getStaticClassID(void);
    274 
    275 private:
    276     void init(UErrorCode& status) const;
    277 };
    278 
    279 
    280 enum {
    281     OPEN_ANGLE = 0x003c, /* '<' */
    282     CLOSE_ANGLE = 0x003e, /* '>' */
    283     COMMA = 0x002c,
    284     TICK = 0x0027,
    285     QUOTE = 0x0022,
    286     SPACE = 0x0020
    287 };
    288 
    289 /**
    290  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
    291  */
    292 class LocDataParser {
    293     UChar* data;
    294     const UChar* e;
    295     UChar* p;
    296     UChar ch;
    297     UParseError& pe;
    298     UErrorCode& ec;
    299 
    300 public:
    301     LocDataParser(UParseError& parseError, UErrorCode& status)
    302         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
    303     ~LocDataParser() {}
    304 
    305     /*
    306     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
    307     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
    308     */
    309     StringLocalizationInfo* parse(UChar* data, int32_t len);
    310 
    311 private:
    312 
    313     void inc(void) { ++p; ch = 0xffff; }
    314     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
    315     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
    316     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
    317     UBool inList(UChar c, const UChar* list) const {
    318         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
    319         while (*list && *list != c) ++list; return *list == c;
    320     }
    321     void parseError(const char* msg);
    322 
    323     StringLocalizationInfo* doParse(void);
    324 
    325     UChar** nextArray(int32_t& requiredLength);
    326     UChar*  nextString(void);
    327 };
    328 
    329 #ifdef DEBUG
    330 #define ERROR(msg) parseError(msg); return NULL;
    331 #else
    332 #define ERROR(msg) parseError(NULL); return NULL;
    333 #endif
    334 
    335 
    336 static const UChar DQUOTE_STOPLIST[] = {
    337     QUOTE, 0
    338 };
    339 
    340 static const UChar SQUOTE_STOPLIST[] = {
    341     TICK, 0
    342 };
    343 
    344 static const UChar NOQUOTE_STOPLIST[] = {
    345     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
    346 };
    347 
    348 static void
    349 DeleteFn(void* p) {
    350   uprv_free(p);
    351 }
    352 
    353 StringLocalizationInfo*
    354 LocDataParser::parse(UChar* _data, int32_t len) {
    355     if (U_FAILURE(ec)) {
    356         if (_data) uprv_free(_data);
    357         return NULL;
    358     }
    359 
    360     pe.line = 0;
    361     pe.offset = -1;
    362     pe.postContext[0] = 0;
    363     pe.preContext[0] = 0;
    364 
    365     if (_data == NULL) {
    366         ec = U_ILLEGAL_ARGUMENT_ERROR;
    367         return NULL;
    368     }
    369 
    370     if (len <= 0) {
    371         ec = U_ILLEGAL_ARGUMENT_ERROR;
    372         uprv_free(_data);
    373         return NULL;
    374     }
    375 
    376     data = _data;
    377     e = data + len;
    378     p = _data;
    379     ch = 0xffff;
    380 
    381     return doParse();
    382 }
    383 
    384 
    385 StringLocalizationInfo*
    386 LocDataParser::doParse(void) {
    387     skipWhitespace();
    388     if (!checkInc(OPEN_ANGLE)) {
    389         ERROR("Missing open angle");
    390     } else {
    391         VArray array(DeleteFn);
    392         UBool mightHaveNext = TRUE;
    393         int32_t requiredLength = -1;
    394         while (mightHaveNext) {
    395             mightHaveNext = FALSE;
    396             UChar** elem = nextArray(requiredLength);
    397             skipWhitespace();
    398             UBool haveComma = check(COMMA);
    399             if (elem) {
    400                 array.add(elem, ec);
    401                 if (haveComma) {
    402                     inc();
    403                     mightHaveNext = TRUE;
    404                 }
    405             } else if (haveComma) {
    406                 ERROR("Unexpected character");
    407             }
    408         }
    409 
    410         skipWhitespace();
    411         if (!checkInc(CLOSE_ANGLE)) {
    412             if (check(OPEN_ANGLE)) {
    413                 ERROR("Missing comma in outer array");
    414             } else {
    415                 ERROR("Missing close angle bracket in outer array");
    416             }
    417         }
    418 
    419         skipWhitespace();
    420         if (p != e) {
    421             ERROR("Extra text after close of localization data");
    422         }
    423 
    424         array.add(NULL, ec);
    425         if (U_SUCCESS(ec)) {
    426             int32_t numLocs = array.length() - 2; // subtract first, NULL
    427             UChar*** result = (UChar***)array.release();
    428 
    429             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
    430         }
    431     }
    432 
    433     ERROR("Unknown error");
    434 }
    435 
    436 UChar**
    437 LocDataParser::nextArray(int32_t& requiredLength) {
    438     if (U_FAILURE(ec)) {
    439         return NULL;
    440     }
    441 
    442     skipWhitespace();
    443     if (!checkInc(OPEN_ANGLE)) {
    444         ERROR("Missing open angle");
    445     }
    446 
    447     VArray array;
    448     UBool mightHaveNext = TRUE;
    449     while (mightHaveNext) {
    450         mightHaveNext = FALSE;
    451         UChar* elem = nextString();
    452         skipWhitespace();
    453         UBool haveComma = check(COMMA);
    454         if (elem) {
    455             array.add(elem, ec);
    456             if (haveComma) {
    457                 inc();
    458                 mightHaveNext = TRUE;
    459             }
    460         } else if (haveComma) {
    461             ERROR("Unexpected comma");
    462         }
    463     }
    464     skipWhitespace();
    465     if (!checkInc(CLOSE_ANGLE)) {
    466         if (check(OPEN_ANGLE)) {
    467             ERROR("Missing close angle bracket in inner array");
    468         } else {
    469             ERROR("Missing comma in inner array");
    470         }
    471     }
    472 
    473     array.add(NULL, ec);
    474     if (U_SUCCESS(ec)) {
    475         if (requiredLength == -1) {
    476             requiredLength = array.length() + 1;
    477         } else if (array.length() != requiredLength) {
    478             ec = U_ILLEGAL_ARGUMENT_ERROR;
    479             ERROR("Array not of required length");
    480         }
    481 
    482         return (UChar**)array.release();
    483     }
    484     ERROR("Unknown Error");
    485 }
    486 
    487 UChar*
    488 LocDataParser::nextString() {
    489     UChar* result = NULL;
    490 
    491     skipWhitespace();
    492     if (p < e) {
    493         const UChar* terminators;
    494         UChar c = *p;
    495         UBool haveQuote = c == QUOTE || c == TICK;
    496         if (haveQuote) {
    497             inc();
    498             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
    499         } else {
    500             terminators = NOQUOTE_STOPLIST;
    501         }
    502         UChar* start = p;
    503         while (p < e && !inList(*p, terminators)) ++p;
    504         if (p == e) {
    505             ERROR("Unexpected end of data");
    506         }
    507 
    508         UChar x = *p;
    509         if (p > start) {
    510             ch = x;
    511             *p = 0x0; // terminate by writing to data
    512             result = start; // just point into data
    513         }
    514         if (haveQuote) {
    515             if (x != c) {
    516                 ERROR("Missing matching quote");
    517             } else if (p == start) {
    518                 ERROR("Empty string");
    519             }
    520             inc();
    521         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
    522             ERROR("Unexpected character in string");
    523         }
    524     }
    525 
    526     // ok for there to be no next string
    527     return result;
    528 }
    529 
    530 void
    531 LocDataParser::parseError(const char* /*str*/) {
    532     if (!data) {
    533         return;
    534     }
    535 
    536     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
    537     if (start < data) {
    538         start = data;
    539     }
    540     for (UChar* x = p; --x >= start;) {
    541         if (!*x) {
    542             start = x+1;
    543             break;
    544         }
    545     }
    546     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
    547     if (limit > e) {
    548         limit = e;
    549     }
    550     u_strncpy(pe.preContext, start, (int32_t)(p-start));
    551     pe.preContext[p-start] = 0;
    552     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
    553     pe.postContext[limit-p] = 0;
    554     pe.offset = (int32_t)(p - data);
    555 
    556 #ifdef DEBUG
    557     fprintf(stderr, "%s at or near character %d: ", str, p-data);
    558 
    559     UnicodeString msg;
    560     msg.append(start, p - start);
    561     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
    562     msg.append(p, limit-p);
    563     msg.append("'");
    564 
    565     char buf[128];
    566     int32_t len = msg.extract(0, msg.length(), buf, 128);
    567     if (len >= 128) {
    568         buf[127] = 0;
    569     } else {
    570         buf[len] = 0;
    571     }
    572     fprintf(stderr, "%s\n", buf);
    573     fflush(stderr);
    574 #endif
    575 
    576     uprv_free(data);
    577     data = NULL;
    578     p = NULL;
    579     e = NULL;
    580 
    581     if (U_SUCCESS(ec)) {
    582         ec = U_PARSE_ERROR;
    583     }
    584 }
    585 
    586 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
    587 
    588 StringLocalizationInfo*
    589 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
    590     if (U_FAILURE(status)) {
    591         return NULL;
    592     }
    593 
    594     int32_t len = info.length();
    595     if (len == 0) {
    596         return NULL; // no error;
    597     }
    598 
    599     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
    600     if (!p) {
    601         status = U_MEMORY_ALLOCATION_ERROR;
    602         return NULL;
    603     }
    604     info.extract(p, len, status);
    605     if (!U_FAILURE(status)) {
    606         status = U_ZERO_ERROR; // clear warning about non-termination
    607     }
    608 
    609     LocDataParser parser(perror, status);
    610     return parser.parse(p, len);
    611 }
    612 
    613 StringLocalizationInfo::~StringLocalizationInfo() {
    614     for (UChar*** p = (UChar***)data; *p; ++p) {
    615         // remaining data is simply pointer into our unicode string data.
    616         if (*p) uprv_free(*p);
    617     }
    618     if (data) uprv_free(data);
    619     if (info) uprv_free(info);
    620 }
    621 
    622 
    623 const UChar*
    624 StringLocalizationInfo::getRuleSetName(int32_t index) const {
    625     if (index >= 0 && index < getNumberOfRuleSets()) {
    626         return data[0][index];
    627     }
    628     return NULL;
    629 }
    630 
    631 const UChar*
    632 StringLocalizationInfo::getLocaleName(int32_t index) const {
    633     if (index >= 0 && index < getNumberOfDisplayLocales()) {
    634         return data[index+1][0];
    635     }
    636     return NULL;
    637 }
    638 
    639 const UChar*
    640 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
    641     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
    642         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
    643         return data[localeIndex+1][ruleIndex+1];
    644     }
    645     return NULL;
    646 }
    647 
    648 // ----------
    649 
    650 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    651                                              const UnicodeString& locs,
    652                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    653   : ruleSets(NULL)
    654   , defaultRuleSet(NULL)
    655   , locale(alocale)
    656   , collator(NULL)
    657   , decimalFormatSymbols(NULL)
    658   , lenient(FALSE)
    659   , lenientParseRules(NULL)
    660   , localizations(NULL)
    661   , noParse(FALSE) //TODO: to be removed after #6895
    662 {
    663   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    664   init(description, locinfo, perror, status);
    665 }
    666 
    667 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    668                                              const UnicodeString& locs,
    669                                              UParseError& perror, UErrorCode& status)
    670   : ruleSets(NULL)
    671   , defaultRuleSet(NULL)
    672   , locale(Locale::getDefault())
    673   , collator(NULL)
    674   , decimalFormatSymbols(NULL)
    675   , lenient(FALSE)
    676   , lenientParseRules(NULL)
    677   , localizations(NULL)
    678   , noParse(FALSE) //TODO: to be removed after #6895
    679 {
    680   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    681   init(description, locinfo, perror, status);
    682 }
    683 
    684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    685                                              LocalizationInfo* info,
    686                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    687   : ruleSets(NULL)
    688   , defaultRuleSet(NULL)
    689   , locale(alocale)
    690   , collator(NULL)
    691   , decimalFormatSymbols(NULL)
    692   , lenient(FALSE)
    693   , lenientParseRules(NULL)
    694   , localizations(NULL)
    695   , noParse(FALSE) //TODO: to be removed after #6895
    696 {
    697   init(description, info, perror, status);
    698 }
    699 
    700 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    701                          UParseError& perror,
    702                          UErrorCode& status)
    703   : ruleSets(NULL)
    704   , defaultRuleSet(NULL)
    705   , locale(Locale::getDefault())
    706   , collator(NULL)
    707   , decimalFormatSymbols(NULL)
    708   , lenient(FALSE)
    709   , lenientParseRules(NULL)
    710   , localizations(NULL)
    711   , noParse(FALSE) //TODO: to be removed after #6895
    712 {
    713     init(description, NULL, perror, status);
    714 }
    715 
    716 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    717                          const Locale& aLocale,
    718                          UParseError& perror,
    719                          UErrorCode& status)
    720   : ruleSets(NULL)
    721   , defaultRuleSet(NULL)
    722   , locale(aLocale)
    723   , collator(NULL)
    724   , decimalFormatSymbols(NULL)
    725   , lenient(FALSE)
    726   , lenientParseRules(NULL)
    727   , localizations(NULL)
    728   , noParse(FALSE) //TODO: to be removed after #6895
    729 {
    730     init(description, NULL, perror, status);
    731 }
    732 
    733 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
    734   : ruleSets(NULL)
    735   , defaultRuleSet(NULL)
    736   , locale(alocale)
    737   , collator(NULL)
    738   , decimalFormatSymbols(NULL)
    739   , lenient(FALSE)
    740   , lenientParseRules(NULL)
    741   , localizations(NULL)
    742 {
    743     if (U_FAILURE(status)) {
    744         return;
    745     }
    746 
    747     const char* rules_tag = "RBNFRules";
    748     const char* fmt_tag = "";
    749     switch (tag) {
    750     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
    751     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
    752     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
    753     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
    754     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
    755     }
    756 
    757     // TODO: read localization info from resource
    758     LocalizationInfo* locinfo = NULL;
    759 
    760     int32_t len = 0;
    761     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
    762     if (U_SUCCESS(status)) {
    763         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
    764                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
    765 
    766         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
    767         if (U_FAILURE(status)) {
    768             ures_close(nfrb);
    769         }
    770         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
    771         if (U_FAILURE(status)) {
    772             ures_close(rbnfRules);
    773             ures_close(nfrb);
    774             return;
    775         }
    776 
    777         UnicodeString desc;
    778         while (ures_hasNext(ruleSets)) {
    779            const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
    780            desc.append(currentString);
    781         }
    782         UParseError perror;
    783 
    784 
    785         init (desc, locinfo, perror, status);
    786 
    787         //TODO: we need a real fix - see #6895 / #6896
    788         noParse = FALSE;
    789         if (tag == URBNF_SPELLOUT) {
    790             const char *lang = alocale.getLanguage();
    791             for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
    792                 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
    793                     noParse = TRUE;
    794                     break;
    795                 }
    796             }
    797         }
    798         //TODO: end
    799 
    800         ures_close(ruleSets);
    801         ures_close(rbnfRules);
    802     }
    803     ures_close(nfrb);
    804 }
    805 
    806 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
    807   : NumberFormat(rhs)
    808   , ruleSets(NULL)
    809   , defaultRuleSet(NULL)
    810   , locale(rhs.locale)
    811   , collator(NULL)
    812   , decimalFormatSymbols(NULL)
    813   , lenient(FALSE)
    814   , lenientParseRules(NULL)
    815   , localizations(NULL)
    816 {
    817     this->operator=(rhs);
    818 }
    819 
    820 // --------
    821 
    822 RuleBasedNumberFormat&
    823 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
    824 {
    825     UErrorCode status = U_ZERO_ERROR;
    826     dispose();
    827     locale = rhs.locale;
    828     lenient = rhs.lenient;
    829 
    830     UnicodeString rules = rhs.getRules();
    831     UParseError perror;
    832     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
    833 
    834     //TODO: remove below when we fix the parse bug - See #6895 / #6896
    835     noParse = rhs.noParse;
    836 
    837     return *this;
    838 }
    839 
    840 RuleBasedNumberFormat::~RuleBasedNumberFormat()
    841 {
    842     dispose();
    843 }
    844 
    845 Format*
    846 RuleBasedNumberFormat::clone(void) const
    847 {
    848     RuleBasedNumberFormat * result = NULL;
    849     UnicodeString rules = getRules();
    850     UErrorCode status = U_ZERO_ERROR;
    851     UParseError perror;
    852     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
    853     /* test for NULL */
    854     if (result == 0) {
    855         status = U_MEMORY_ALLOCATION_ERROR;
    856         return 0;
    857     }
    858     if (U_FAILURE(status)) {
    859         delete result;
    860         result = 0;
    861     } else {
    862         result->lenient = lenient;
    863 
    864         //TODO: remove below when we fix the parse bug - See #6895 / #6896
    865         result->noParse = noParse;
    866     }
    867     return result;
    868 }
    869 
    870 UBool
    871 RuleBasedNumberFormat::operator==(const Format& other) const
    872 {
    873     if (this == &other) {
    874         return TRUE;
    875     }
    876 
    877     if (typeid(*this) == typeid(other)) {
    878         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
    879         if (locale == rhs.locale &&
    880             lenient == rhs.lenient &&
    881             (localizations == NULL
    882                 ? rhs.localizations == NULL
    883                 : (rhs.localizations == NULL
    884                     ? FALSE
    885                     : *localizations == rhs.localizations))) {
    886 
    887             NFRuleSet** p = ruleSets;
    888             NFRuleSet** q = rhs.ruleSets;
    889             if (p == NULL) {
    890                 return q == NULL;
    891             } else if (q == NULL) {
    892                 return FALSE;
    893             }
    894             while (*p && *q && (**p == **q)) {
    895                 ++p;
    896                 ++q;
    897             }
    898             return *q == NULL && *p == NULL;
    899         }
    900     }
    901 
    902     return FALSE;
    903 }
    904 
    905 UnicodeString
    906 RuleBasedNumberFormat::getRules() const
    907 {
    908     UnicodeString result;
    909     if (ruleSets != NULL) {
    910         for (NFRuleSet** p = ruleSets; *p; ++p) {
    911             (*p)->appendRules(result);
    912         }
    913     }
    914     return result;
    915 }
    916 
    917 UnicodeString
    918 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
    919 {
    920     if (localizations) {
    921       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
    922       return string;
    923     } else if (ruleSets) {
    924         UnicodeString result;
    925         for (NFRuleSet** p = ruleSets; *p; ++p) {
    926             NFRuleSet* rs = *p;
    927             if (rs->isPublic()) {
    928                 if (--index == -1) {
    929                     rs->getName(result);
    930                     return result;
    931                 }
    932             }
    933         }
    934     }
    935     UnicodeString empty;
    936     return empty;
    937 }
    938 
    939 int32_t
    940 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
    941 {
    942     int32_t result = 0;
    943     if (localizations) {
    944       result = localizations->getNumberOfRuleSets();
    945     } else if (ruleSets) {
    946         for (NFRuleSet** p = ruleSets; *p; ++p) {
    947             if ((**p).isPublic()) {
    948                 ++result;
    949             }
    950         }
    951     }
    952     return result;
    953 }
    954 
    955 int32_t
    956 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
    957     if (localizations) {
    958         return localizations->getNumberOfDisplayLocales();
    959     }
    960     return 0;
    961 }
    962 
    963 Locale
    964 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
    965     if (U_FAILURE(status)) {
    966         return Locale("");
    967     }
    968     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
    969         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
    970         char buffer[64];
    971         int32_t cap = name.length() + 1;
    972         char* bp = buffer;
    973         if (cap > 64) {
    974             bp = (char *)uprv_malloc(cap);
    975             if (bp == NULL) {
    976                 status = U_MEMORY_ALLOCATION_ERROR;
    977                 return Locale("");
    978             }
    979         }
    980         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
    981         Locale retLocale(bp);
    982         if (bp != buffer) {
    983             uprv_free(bp);
    984         }
    985         return retLocale;
    986     }
    987     status = U_ILLEGAL_ARGUMENT_ERROR;
    988     Locale retLocale;
    989     return retLocale;
    990 }
    991 
    992 UnicodeString
    993 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
    994     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
    995         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
    996         int32_t len = localeName.length();
    997         UChar* localeStr = localeName.getBuffer(len + 1);
    998         while (len >= 0) {
    999             localeStr[len] = 0;
   1000             int32_t ix = localizations->indexForLocale(localeStr);
   1001             if (ix >= 0) {
   1002                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
   1003                 return name;
   1004             }
   1005 
   1006             // trim trailing portion, skipping over ommitted sections
   1007             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
   1008             while (len > 0 && localeStr[len-1] == 0x005F) --len;
   1009         }
   1010         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
   1011         return name;
   1012     }
   1013     UnicodeString bogus;
   1014     bogus.setToBogus();
   1015     return bogus;
   1016 }
   1017 
   1018 UnicodeString
   1019 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
   1020     if (localizations) {
   1021         UnicodeString rsn(ruleSetName);
   1022         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
   1023         return getRuleSetDisplayName(ix, localeParam);
   1024     }
   1025     UnicodeString bogus;
   1026     bogus.setToBogus();
   1027     return bogus;
   1028 }
   1029 
   1030 NFRuleSet*
   1031 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
   1032 {
   1033     if (U_SUCCESS(status) && ruleSets) {
   1034         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1035             NFRuleSet* rs = *p;
   1036             if (rs->isNamed(name)) {
   1037                 return rs;
   1038             }
   1039         }
   1040         status = U_ILLEGAL_ARGUMENT_ERROR;
   1041     }
   1042     return NULL;
   1043 }
   1044 
   1045 UnicodeString&
   1046 RuleBasedNumberFormat::format(int32_t number,
   1047                               UnicodeString& toAppendTo,
   1048                               FieldPosition& /* pos */) const
   1049 {
   1050     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
   1051     return toAppendTo;
   1052 }
   1053 
   1054 
   1055 UnicodeString&
   1056 RuleBasedNumberFormat::format(int64_t number,
   1057                               UnicodeString& toAppendTo,
   1058                               FieldPosition& /* pos */) const
   1059 {
   1060     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
   1061     return toAppendTo;
   1062 }
   1063 
   1064 
   1065 UnicodeString&
   1066 RuleBasedNumberFormat::format(double number,
   1067                               UnicodeString& toAppendTo,
   1068                               FieldPosition& /* pos */) const
   1069 {
   1070     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
   1071     return toAppendTo;
   1072 }
   1073 
   1074 
   1075 UnicodeString&
   1076 RuleBasedNumberFormat::format(int32_t number,
   1077                               const UnicodeString& ruleSetName,
   1078                               UnicodeString& toAppendTo,
   1079                               FieldPosition& /* pos */,
   1080                               UErrorCode& status) const
   1081 {
   1082     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
   1083     if (U_SUCCESS(status)) {
   1084         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1085             // throw new IllegalArgumentException("Can't use internal rule set");
   1086             status = U_ILLEGAL_ARGUMENT_ERROR;
   1087         } else {
   1088             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1089             if (rs) {
   1090                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
   1091             }
   1092         }
   1093     }
   1094     return toAppendTo;
   1095 }
   1096 
   1097 
   1098 UnicodeString&
   1099 RuleBasedNumberFormat::format(int64_t number,
   1100                               const UnicodeString& ruleSetName,
   1101                               UnicodeString& toAppendTo,
   1102                               FieldPosition& /* pos */,
   1103                               UErrorCode& status) const
   1104 {
   1105     if (U_SUCCESS(status)) {
   1106         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1107             // throw new IllegalArgumentException("Can't use internal rule set");
   1108             status = U_ILLEGAL_ARGUMENT_ERROR;
   1109         } else {
   1110             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1111             if (rs) {
   1112                 rs->format(number, toAppendTo, toAppendTo.length());
   1113             }
   1114         }
   1115     }
   1116     return toAppendTo;
   1117 }
   1118 
   1119 
   1120 // make linker happy
   1121 UnicodeString&
   1122 RuleBasedNumberFormat::format(const Formattable& obj,
   1123                               UnicodeString& toAppendTo,
   1124                               FieldPosition& pos,
   1125                               UErrorCode& status) const
   1126 {
   1127     return NumberFormat::format(obj, toAppendTo, pos, status);
   1128 }
   1129 
   1130 UnicodeString&
   1131 RuleBasedNumberFormat::format(double number,
   1132                               const UnicodeString& ruleSetName,
   1133                               UnicodeString& toAppendTo,
   1134                               FieldPosition& /* pos */,
   1135                               UErrorCode& status) const
   1136 {
   1137     if (U_SUCCESS(status)) {
   1138         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1139             // throw new IllegalArgumentException("Can't use internal rule set");
   1140             status = U_ILLEGAL_ARGUMENT_ERROR;
   1141         } else {
   1142             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1143             if (rs) {
   1144                 rs->format(number, toAppendTo, toAppendTo.length());
   1145             }
   1146         }
   1147     }
   1148     return toAppendTo;
   1149 }
   1150 
   1151 void
   1152 RuleBasedNumberFormat::parse(const UnicodeString& text,
   1153                              Formattable& result,
   1154                              ParsePosition& parsePosition) const
   1155 {
   1156     //TODO: We need a real fix.  See #6895 / #6896
   1157     if (noParse) {
   1158         // skip parsing
   1159         parsePosition.setErrorIndex(0);
   1160         return;
   1161     }
   1162 
   1163     if (!ruleSets) {
   1164         parsePosition.setErrorIndex(0);
   1165         return;
   1166     }
   1167 
   1168     UnicodeString workingText(text, parsePosition.getIndex());
   1169     ParsePosition workingPos(0);
   1170 
   1171     ParsePosition high_pp(0);
   1172     Formattable high_result;
   1173 
   1174     for (NFRuleSet** p = ruleSets; *p; ++p) {
   1175         NFRuleSet *rp = *p;
   1176         if (rp->isPublic() && rp->isParseable()) {
   1177             ParsePosition working_pp(0);
   1178             Formattable working_result;
   1179 
   1180             rp->parse(workingText, working_pp, kMaxDouble, working_result);
   1181             if (working_pp.getIndex() > high_pp.getIndex()) {
   1182                 high_pp = working_pp;
   1183                 high_result = working_result;
   1184 
   1185                 if (high_pp.getIndex() == workingText.length()) {
   1186                     break;
   1187                 }
   1188             }
   1189         }
   1190     }
   1191 
   1192     int32_t startIndex = parsePosition.getIndex();
   1193     parsePosition.setIndex(startIndex + high_pp.getIndex());
   1194     if (high_pp.getIndex() > 0) {
   1195         parsePosition.setErrorIndex(-1);
   1196     } else {
   1197         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
   1198         parsePosition.setErrorIndex(startIndex + errorIndex);
   1199     }
   1200     result = high_result;
   1201     if (result.getType() == Formattable::kDouble) {
   1202         int32_t r = (int32_t)result.getDouble();
   1203         if ((double)r == result.getDouble()) {
   1204             result.setLong(r);
   1205         }
   1206     }
   1207 }
   1208 
   1209 #if !UCONFIG_NO_COLLATION
   1210 
   1211 void
   1212 RuleBasedNumberFormat::setLenient(UBool enabled)
   1213 {
   1214     lenient = enabled;
   1215     if (!enabled && collator) {
   1216         delete collator;
   1217         collator = NULL;
   1218     }
   1219 }
   1220 
   1221 #endif
   1222 
   1223 void
   1224 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
   1225     if (U_SUCCESS(status)) {
   1226         if (ruleSetName.isEmpty()) {
   1227           if (localizations) {
   1228               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
   1229               defaultRuleSet = findRuleSet(name, status);
   1230           } else {
   1231             initDefaultRuleSet();
   1232           }
   1233         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
   1234             status = U_ILLEGAL_ARGUMENT_ERROR;
   1235         } else {
   1236             NFRuleSet* result = findRuleSet(ruleSetName, status);
   1237             if (result != NULL) {
   1238                 defaultRuleSet = result;
   1239             }
   1240         }
   1241     }
   1242 }
   1243 
   1244 UnicodeString
   1245 RuleBasedNumberFormat::getDefaultRuleSetName() const {
   1246   UnicodeString result;
   1247   if (defaultRuleSet && defaultRuleSet->isPublic()) {
   1248     defaultRuleSet->getName(result);
   1249   } else {
   1250     result.setToBogus();
   1251   }
   1252   return result;
   1253 }
   1254 
   1255 void
   1256 RuleBasedNumberFormat::initDefaultRuleSet()
   1257 {
   1258     defaultRuleSet = NULL;
   1259     if (!ruleSets) {
   1260       return;
   1261     }
   1262 
   1263     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
   1264     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
   1265     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
   1266 
   1267     NFRuleSet**p = &ruleSets[0];
   1268     while (*p) {
   1269         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
   1270             defaultRuleSet = *p;
   1271             return;
   1272         } else {
   1273             ++p;
   1274         }
   1275     }
   1276 
   1277     defaultRuleSet = *--p;
   1278     if (!defaultRuleSet->isPublic()) {
   1279         while (p != ruleSets) {
   1280             if ((*--p)->isPublic()) {
   1281                 defaultRuleSet = *p;
   1282                 break;
   1283             }
   1284         }
   1285     }
   1286 }
   1287 
   1288 
   1289 void
   1290 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
   1291                             UParseError& pErr, UErrorCode& status)
   1292 {
   1293     // TODO: implement UParseError
   1294     uprv_memset(&pErr, 0, sizeof(UParseError));
   1295     // Note: this can leave ruleSets == NULL, so remaining code should check
   1296     if (U_FAILURE(status)) {
   1297         return;
   1298     }
   1299 
   1300     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
   1301 
   1302     UnicodeString description(rules);
   1303     if (!description.length()) {
   1304         status = U_MEMORY_ALLOCATION_ERROR;
   1305         return;
   1306     }
   1307 
   1308     // start by stripping the trailing whitespace from all the rules
   1309     // (this is all the whitespace follwing each semicolon in the
   1310     // description).  This allows us to look for rule-set boundaries
   1311     // by searching for ";%" without having to worry about whitespace
   1312     // between the ; and the %
   1313     stripWhitespace(description);
   1314 
   1315     // check to see if there's a set of lenient-parse rules.  If there
   1316     // is, pull them out into our temporary holding place for them,
   1317     // and delete them from the description before the real desciption-
   1318     // parsing code sees them
   1319     int32_t lp = description.indexOf(gLenientParse);
   1320     if (lp != -1) {
   1321         // we've got to make sure we're not in the middle of a rule
   1322         // (where "%%lenient-parse" would actually get treated as
   1323         // rule text)
   1324         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
   1325             // locate the beginning and end of the actual collation
   1326             // rules (there may be whitespace between the name and
   1327             // the first token in the description)
   1328             int lpEnd = description.indexOf(gSemiPercent, lp);
   1329 
   1330             if (lpEnd == -1) {
   1331                 lpEnd = description.length() - 1;
   1332             }
   1333             int lpStart = lp + u_strlen(gLenientParse);
   1334             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
   1335                 ++lpStart;
   1336             }
   1337 
   1338             // copy out the lenient-parse rules and delete them
   1339             // from the description
   1340             lenientParseRules = new UnicodeString();
   1341             /* test for NULL */
   1342             if (lenientParseRules == 0) {
   1343                 status = U_MEMORY_ALLOCATION_ERROR;
   1344                 return;
   1345             }
   1346             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
   1347 
   1348             description.remove(lp, lpEnd + 1 - lp);
   1349         }
   1350     }
   1351 
   1352     // pre-flight parsing the description and count the number of
   1353     // rule sets (";%" marks the end of one rule set and the beginning
   1354     // of the next)
   1355     int numRuleSets = 0;
   1356     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
   1357         ++numRuleSets;
   1358         ++p;
   1359     }
   1360     ++numRuleSets;
   1361 
   1362     // our rule list is an array of the appropriate size
   1363     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
   1364     /* test for NULL */
   1365     if (ruleSets == 0) {
   1366         status = U_MEMORY_ALLOCATION_ERROR;
   1367         return;
   1368     }
   1369 
   1370     for (int i = 0; i <= numRuleSets; ++i) {
   1371         ruleSets[i] = NULL;
   1372     }
   1373 
   1374     // divide up the descriptions into individual rule-set descriptions
   1375     // and store them in a temporary array.  At each step, we also
   1376     // new up a rule set, but all this does is initialize its name
   1377     // and remove it from its description.  We can't actually parse
   1378     // the rest of the descriptions and finish initializing everything
   1379     // because we have to know the names and locations of all the rule
   1380     // sets before we can actually set everything up
   1381     if(!numRuleSets) {
   1382         status = U_ILLEGAL_ARGUMENT_ERROR;
   1383         return;
   1384     }
   1385     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
   1386     if (ruleSetDescriptions == 0) {
   1387         status = U_MEMORY_ALLOCATION_ERROR;
   1388         return;
   1389     }
   1390 
   1391     {
   1392         int curRuleSet = 0;
   1393         int32_t start = 0;
   1394         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
   1395             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
   1396             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1397             if (ruleSets[curRuleSet] == 0) {
   1398                 status = U_MEMORY_ALLOCATION_ERROR;
   1399                 goto cleanup;
   1400             }
   1401             ++curRuleSet;
   1402             start = p + 1;
   1403         }
   1404         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
   1405         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1406         if (ruleSets[curRuleSet] == 0) {
   1407             status = U_MEMORY_ALLOCATION_ERROR;
   1408             goto cleanup;
   1409         }
   1410     }
   1411 
   1412     // now we can take note of the formatter's default rule set, which
   1413     // is the last public rule set in the description (it's the last
   1414     // rather than the first so that a user can create a new formatter
   1415     // from an existing formatter and change its default behavior just
   1416     // by appending more rule sets to the end)
   1417 
   1418     // {dlf} Initialization of a fraction rule set requires the default rule
   1419     // set to be known.  For purposes of initialization, this is always the
   1420     // last public rule set, no matter what the localization data says.
   1421     initDefaultRuleSet();
   1422 
   1423     // finally, we can go back through the temporary descriptions
   1424     // list and finish seting up the substructure (and we throw
   1425     // away the temporary descriptions as we go)
   1426     {
   1427         for (int i = 0; i < numRuleSets; i++) {
   1428             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
   1429         }
   1430     }
   1431 
   1432     // Now that the rules are initialized, the 'real' default rule
   1433     // set can be adjusted by the localization data.
   1434 
   1435     // The C code keeps the localization array as is, rather than building
   1436     // a separate array of the public rule set names, so we have less work
   1437     // to do here-- but we still need to check the names.
   1438 
   1439     if (localizationInfos) {
   1440         // confirm the names, if any aren't in the rules, that's an error
   1441         // it is ok if the rules contain public rule sets that are not in this list
   1442         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
   1443             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
   1444             NFRuleSet* rs = findRuleSet(name, status);
   1445             if (rs == NULL) {
   1446                 break; // error
   1447             }
   1448             if (i == 0) {
   1449                 defaultRuleSet = rs;
   1450             }
   1451         }
   1452     } else {
   1453         defaultRuleSet = getDefaultRuleSet();
   1454     }
   1455 
   1456 cleanup:
   1457     delete[] ruleSetDescriptions;
   1458 }
   1459 
   1460 void
   1461 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
   1462 {
   1463     // iterate through the characters...
   1464     UnicodeString result;
   1465 
   1466     int start = 0;
   1467     while (start != -1 && start < description.length()) {
   1468         // seek to the first non-whitespace character...
   1469         while (start < description.length()
   1470             && uprv_isRuleWhiteSpace(description.charAt(start))) {
   1471             ++start;
   1472         }
   1473 
   1474         // locate the next semicolon in the text and copy the text from
   1475         // our current position up to that semicolon into the result
   1476         int32_t p = description.indexOf(gSemiColon, start);
   1477         if (p == -1) {
   1478             // or if we don't find a semicolon, just copy the rest of
   1479             // the string into the result
   1480             result.append(description, start, description.length() - start);
   1481             start = -1;
   1482         }
   1483         else if (p < description.length()) {
   1484             result.append(description, start, p + 1 - start);
   1485             start = p + 1;
   1486         }
   1487 
   1488         // when we get here, we've seeked off the end of the sring, and
   1489         // we terminate the loop (we continue until *start* is -1 rather
   1490         // than until *p* is -1, because otherwise we'd miss the last
   1491         // rule in the description)
   1492         else {
   1493             start = -1;
   1494         }
   1495     }
   1496 
   1497     description.setTo(result);
   1498 }
   1499 
   1500 
   1501 void
   1502 RuleBasedNumberFormat::dispose()
   1503 {
   1504     if (ruleSets) {
   1505         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1506             delete *p;
   1507         }
   1508         uprv_free(ruleSets);
   1509         ruleSets = NULL;
   1510     }
   1511 
   1512 #if !UCONFIG_NO_COLLATION
   1513     delete collator;
   1514 #endif
   1515     collator = NULL;
   1516 
   1517     delete decimalFormatSymbols;
   1518     decimalFormatSymbols = NULL;
   1519 
   1520     delete lenientParseRules;
   1521     lenientParseRules = NULL;
   1522 
   1523     if (localizations) localizations = localizations->unref();
   1524 }
   1525 
   1526 
   1527 //-----------------------------------------------------------------------
   1528 // package-internal API
   1529 //-----------------------------------------------------------------------
   1530 
   1531 /**
   1532  * Returns the collator to use for lenient parsing.  The collator is lazily created:
   1533  * this function creates it the first time it's called.
   1534  * @return The collator to use for lenient parsing, or null if lenient parsing
   1535  * is turned off.
   1536 */
   1537 Collator*
   1538 RuleBasedNumberFormat::getCollator() const
   1539 {
   1540 #if !UCONFIG_NO_COLLATION
   1541     if (!ruleSets) {
   1542         return NULL;
   1543     }
   1544 
   1545     // lazy-evaulate the collator
   1546     if (collator == NULL && lenient) {
   1547         // create a default collator based on the formatter's locale,
   1548         // then pull out that collator's rules, append any additional
   1549         // rules specified in the description, and create a _new_
   1550         // collator based on the combinaiton of those rules
   1551 
   1552         UErrorCode status = U_ZERO_ERROR;
   1553 
   1554         Collator* temp = Collator::createInstance(locale, status);
   1555         RuleBasedCollator* newCollator;
   1556         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
   1557             if (lenientParseRules) {
   1558                 UnicodeString rules(newCollator->getRules());
   1559                 rules.append(*lenientParseRules);
   1560 
   1561                 newCollator = new RuleBasedCollator(rules, status);
   1562                 // Exit if newCollator could not be created.
   1563                 if (newCollator == NULL) {
   1564                 	return NULL;
   1565                 }
   1566             } else {
   1567                 temp = NULL;
   1568             }
   1569             if (U_SUCCESS(status)) {
   1570                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
   1571                 // cast away const
   1572                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
   1573             } else {
   1574                 delete newCollator;
   1575             }
   1576         }
   1577         delete temp;
   1578     }
   1579 #endif
   1580 
   1581     // if lenient-parse mode is off, this will be null
   1582     // (see setLenientParseMode())
   1583     return collator;
   1584 }
   1585 
   1586 
   1587 /**
   1588  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
   1589  * instances owned by this formatter.  This object is lazily created: this function
   1590  * creates it the first time it's called.
   1591  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
   1592  * instances owned by this formatter.
   1593 */
   1594 DecimalFormatSymbols*
   1595 RuleBasedNumberFormat::getDecimalFormatSymbols() const
   1596 {
   1597     // lazy-evaluate the DecimalFormatSymbols object.  This object
   1598     // is shared by all DecimalFormat instances belonging to this
   1599     // formatter
   1600     if (decimalFormatSymbols == NULL) {
   1601         UErrorCode status = U_ZERO_ERROR;
   1602         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
   1603         if (U_SUCCESS(status)) {
   1604             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
   1605         } else {
   1606             delete temp;
   1607         }
   1608     }
   1609     return decimalFormatSymbols;
   1610 }
   1611 
   1612 U_NAMESPACE_END
   1613 
   1614 /* U_HAVE_RBNF */
   1615 #endif
   1616