Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1997-2011, International Business Machines Corporation
      4 * and others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 #include <typeinfo>  // for 'typeid' to work
      9 
     10 #include "unicode/rbnf.h"
     11 
     12 #if U_HAVE_RBNF
     13 
     14 #include "unicode/normlzr.h"
     15 #include "unicode/tblcoll.h"
     16 #include "unicode/uchar.h"
     17 #include "unicode/ucol.h"
     18 #include "unicode/uloc.h"
     19 #include "unicode/unum.h"
     20 #include "unicode/ures.h"
     21 #include "unicode/ustring.h"
     22 #include "unicode/utf16.h"
     23 #include "unicode/udata.h"
     24 #include "nfrs.h"
     25 
     26 #include "cmemory.h"
     27 #include "cstring.h"
     28 #include "patternprops.h"
     29 #include "uresimp.h"
     30 
     31 // debugging
     32 // #define DEBUG
     33 
     34 #ifdef DEBUG
     35 #include "stdio.h"
     36 #endif
     37 
     38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
     39 
     40 static const UChar gPercentPercent[] =
     41 {
     42     0x25, 0x25, 0
     43 }; /* "%%" */
     44 
     45 // All urbnf objects are created through openRules, so we init all of the
     46 // Unicode string constants required by rbnf, nfrs, or nfr here.
     47 static const UChar gLenientParse[] =
     48 {
     49     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
     50 }; /* "%%lenient-parse:" */
     51 static const UChar gSemiColon = 0x003B;
     52 static const UChar gSemiPercent[] =
     53 {
     54     0x3B, 0x25, 0
     55 }; /* ";%" */
     56 
     57 #define kSomeNumberOfBitsDiv2 22
     58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
     59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
     60 
     61 // Temporary workaround - when noParse is true, do noting in parse.
     62 // TODO: We need a real fix - see #6895/#6896
     63 static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
     64 
     65 U_NAMESPACE_BEGIN
     66 
     67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
     68 
     69 /*
     70 This is a utility class. It does not use ICU's RTTI.
     71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
     72 Please make sure that intltest passes on Windows in Release mode,
     73 since the string pooling per compilation unit will mess up how RTTI works.
     74 The RTTI code was also removed due to lack of code coverage.
     75 */
     76 class LocalizationInfo : public UMemory {
     77 protected:
     78     virtual ~LocalizationInfo() {}
     79     uint32_t refcount;
     80 
     81 public:
     82     LocalizationInfo() : refcount(0) {}
     83 
     84     LocalizationInfo* ref(void) {
     85         ++refcount;
     86         return this;
     87     }
     88 
     89     LocalizationInfo* unref(void) {
     90         if (refcount && --refcount == 0) {
     91             delete this;
     92         }
     93         return NULL;
     94     }
     95 
     96     virtual UBool operator==(const LocalizationInfo* rhs) const;
     97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
     98 
     99     virtual int32_t getNumberOfRuleSets(void) const = 0;
    100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
    101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
    102     virtual const UChar* getLocaleName(int32_t index) const = 0;
    103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
    104 
    105     virtual int32_t indexForLocale(const UChar* locale) const;
    106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
    107 
    108 //    virtual UClassID getDynamicClassID() const = 0;
    109 //    static UClassID getStaticClassID(void);
    110 };
    111 
    112 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
    113 
    114 // if both strings are NULL, this returns TRUE
    115 static UBool
    116 streq(const UChar* lhs, const UChar* rhs) {
    117     if (rhs == lhs) {
    118         return TRUE;
    119     }
    120     if (lhs && rhs) {
    121         return u_strcmp(lhs, rhs) == 0;
    122     }
    123     return FALSE;
    124 }
    125 
    126 UBool
    127 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
    128     if (rhs) {
    129         if (this == rhs) {
    130             return TRUE;
    131         }
    132 
    133         int32_t rsc = getNumberOfRuleSets();
    134         if (rsc == rhs->getNumberOfRuleSets()) {
    135             for (int i = 0; i < rsc; ++i) {
    136                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
    137                     return FALSE;
    138                 }
    139             }
    140             int32_t dlc = getNumberOfDisplayLocales();
    141             if (dlc == rhs->getNumberOfDisplayLocales()) {
    142                 for (int i = 0; i < dlc; ++i) {
    143                     const UChar* locale = getLocaleName(i);
    144                     int32_t ix = rhs->indexForLocale(locale);
    145                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
    146                     if (!streq(locale, rhs->getLocaleName(ix))) {
    147                         return FALSE;
    148                     }
    149                     for (int j = 0; j < rsc; ++j) {
    150                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
    151                             return FALSE;
    152                         }
    153                     }
    154                 }
    155                 return TRUE;
    156             }
    157         }
    158     }
    159     return FALSE;
    160 }
    161 
    162 int32_t
    163 LocalizationInfo::indexForLocale(const UChar* locale) const {
    164     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
    165         if (streq(locale, getLocaleName(i))) {
    166             return i;
    167         }
    168     }
    169     return -1;
    170 }
    171 
    172 int32_t
    173 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
    174     if (ruleset) {
    175         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
    176             if (streq(ruleset, getRuleSetName(i))) {
    177                 return i;
    178             }
    179         }
    180     }
    181     return -1;
    182 }
    183 
    184 
    185 typedef void (*Fn_Deleter)(void*);
    186 
    187 class VArray {
    188     void** buf;
    189     int32_t cap;
    190     int32_t size;
    191     Fn_Deleter deleter;
    192 public:
    193     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
    194 
    195     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
    196 
    197     ~VArray() {
    198         if (deleter) {
    199             for (int i = 0; i < size; ++i) {
    200                 (*deleter)(buf[i]);
    201             }
    202         }
    203         uprv_free(buf);
    204     }
    205 
    206     int32_t length() {
    207         return size;
    208     }
    209 
    210     void add(void* elem, UErrorCode& status) {
    211         if (U_SUCCESS(status)) {
    212             if (size == cap) {
    213                 if (cap == 0) {
    214                     cap = 1;
    215                 } else if (cap < 256) {
    216                     cap *= 2;
    217                 } else {
    218                     cap += 256;
    219                 }
    220                 if (buf == NULL) {
    221                     buf = (void**)uprv_malloc(cap * sizeof(void*));
    222                 } else {
    223                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
    224                 }
    225                 if (buf == NULL) {
    226                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
    227                     status = U_MEMORY_ALLOCATION_ERROR;
    228                     return;
    229                 }
    230                 void* start = &buf[size];
    231                 size_t count = (cap - size) * sizeof(void*);
    232                 uprv_memset(start, 0, count); // fill with nulls, just because
    233             }
    234             buf[size++] = elem;
    235         }
    236     }
    237 
    238     void** release(void) {
    239         void** result = buf;
    240         buf = NULL;
    241         cap = 0;
    242         size = 0;
    243         return result;
    244     }
    245 };
    246 
    247 class LocDataParser;
    248 
    249 class StringLocalizationInfo : public LocalizationInfo {
    250     UChar* info;
    251     UChar*** data;
    252     int32_t numRuleSets;
    253     int32_t numLocales;
    254 
    255 friend class LocDataParser;
    256 
    257     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
    258         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
    259     {
    260     }
    261 
    262 public:
    263     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
    264 
    265     virtual ~StringLocalizationInfo();
    266     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
    267     virtual const UChar* getRuleSetName(int32_t index) const;
    268     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
    269     virtual const UChar* getLocaleName(int32_t index) const;
    270     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
    271 
    272 //    virtual UClassID getDynamicClassID() const;
    273 //    static UClassID getStaticClassID(void);
    274 
    275 private:
    276     void init(UErrorCode& status) const;
    277 };
    278 
    279 
    280 enum {
    281     OPEN_ANGLE = 0x003c, /* '<' */
    282     CLOSE_ANGLE = 0x003e, /* '>' */
    283     COMMA = 0x002c,
    284     TICK = 0x0027,
    285     QUOTE = 0x0022,
    286     SPACE = 0x0020
    287 };
    288 
    289 /**
    290  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
    291  */
    292 class LocDataParser {
    293     UChar* data;
    294     const UChar* e;
    295     UChar* p;
    296     UChar ch;
    297     UParseError& pe;
    298     UErrorCode& ec;
    299 
    300 public:
    301     LocDataParser(UParseError& parseError, UErrorCode& status)
    302         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
    303     ~LocDataParser() {}
    304 
    305     /*
    306     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
    307     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
    308     */
    309     StringLocalizationInfo* parse(UChar* data, int32_t len);
    310 
    311 private:
    312 
    313     void inc(void) { ++p; ch = 0xffff; }
    314     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
    315     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
    316     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
    317     UBool inList(UChar c, const UChar* list) const {
    318         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
    319         while (*list && *list != c) ++list; return *list == c;
    320     }
    321     void parseError(const char* msg);
    322 
    323     StringLocalizationInfo* doParse(void);
    324 
    325     UChar** nextArray(int32_t& requiredLength);
    326     UChar*  nextString(void);
    327 };
    328 
    329 #ifdef DEBUG
    330 #define ERROR(msg) parseError(msg); return NULL;
    331 #else
    332 #define ERROR(msg) parseError(NULL); return NULL;
    333 #endif
    334 
    335 
    336 static const UChar DQUOTE_STOPLIST[] = {
    337     QUOTE, 0
    338 };
    339 
    340 static const UChar SQUOTE_STOPLIST[] = {
    341     TICK, 0
    342 };
    343 
    344 static const UChar NOQUOTE_STOPLIST[] = {
    345     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
    346 };
    347 
    348 static void
    349 DeleteFn(void* p) {
    350   uprv_free(p);
    351 }
    352 
    353 StringLocalizationInfo*
    354 LocDataParser::parse(UChar* _data, int32_t len) {
    355     if (U_FAILURE(ec)) {
    356         if (_data) uprv_free(_data);
    357         return NULL;
    358     }
    359 
    360     pe.line = 0;
    361     pe.offset = -1;
    362     pe.postContext[0] = 0;
    363     pe.preContext[0] = 0;
    364 
    365     if (_data == NULL) {
    366         ec = U_ILLEGAL_ARGUMENT_ERROR;
    367         return NULL;
    368     }
    369 
    370     if (len <= 0) {
    371         ec = U_ILLEGAL_ARGUMENT_ERROR;
    372         uprv_free(_data);
    373         return NULL;
    374     }
    375 
    376     data = _data;
    377     e = data + len;
    378     p = _data;
    379     ch = 0xffff;
    380 
    381     return doParse();
    382 }
    383 
    384 
    385 StringLocalizationInfo*
    386 LocDataParser::doParse(void) {
    387     skipWhitespace();
    388     if (!checkInc(OPEN_ANGLE)) {
    389         ERROR("Missing open angle");
    390     } else {
    391         VArray array(DeleteFn);
    392         UBool mightHaveNext = TRUE;
    393         int32_t requiredLength = -1;
    394         while (mightHaveNext) {
    395             mightHaveNext = FALSE;
    396             UChar** elem = nextArray(requiredLength);
    397             skipWhitespace();
    398             UBool haveComma = check(COMMA);
    399             if (elem) {
    400                 array.add(elem, ec);
    401                 if (haveComma) {
    402                     inc();
    403                     mightHaveNext = TRUE;
    404                 }
    405             } else if (haveComma) {
    406                 ERROR("Unexpected character");
    407             }
    408         }
    409 
    410         skipWhitespace();
    411         if (!checkInc(CLOSE_ANGLE)) {
    412             if (check(OPEN_ANGLE)) {
    413                 ERROR("Missing comma in outer array");
    414             } else {
    415                 ERROR("Missing close angle bracket in outer array");
    416             }
    417         }
    418 
    419         skipWhitespace();
    420         if (p != e) {
    421             ERROR("Extra text after close of localization data");
    422         }
    423 
    424         array.add(NULL, ec);
    425         if (U_SUCCESS(ec)) {
    426             int32_t numLocs = array.length() - 2; // subtract first, NULL
    427             UChar*** result = (UChar***)array.release();
    428 
    429             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
    430         }
    431     }
    432 
    433     ERROR("Unknown error");
    434 }
    435 
    436 UChar**
    437 LocDataParser::nextArray(int32_t& requiredLength) {
    438     if (U_FAILURE(ec)) {
    439         return NULL;
    440     }
    441 
    442     skipWhitespace();
    443     if (!checkInc(OPEN_ANGLE)) {
    444         ERROR("Missing open angle");
    445     }
    446 
    447     VArray array;
    448     UBool mightHaveNext = TRUE;
    449     while (mightHaveNext) {
    450         mightHaveNext = FALSE;
    451         UChar* elem = nextString();
    452         skipWhitespace();
    453         UBool haveComma = check(COMMA);
    454         if (elem) {
    455             array.add(elem, ec);
    456             if (haveComma) {
    457                 inc();
    458                 mightHaveNext = TRUE;
    459             }
    460         } else if (haveComma) {
    461             ERROR("Unexpected comma");
    462         }
    463     }
    464     skipWhitespace();
    465     if (!checkInc(CLOSE_ANGLE)) {
    466         if (check(OPEN_ANGLE)) {
    467             ERROR("Missing close angle bracket in inner array");
    468         } else {
    469             ERROR("Missing comma in inner array");
    470         }
    471     }
    472 
    473     array.add(NULL, ec);
    474     if (U_SUCCESS(ec)) {
    475         if (requiredLength == -1) {
    476             requiredLength = array.length() + 1;
    477         } else if (array.length() != requiredLength) {
    478             ec = U_ILLEGAL_ARGUMENT_ERROR;
    479             ERROR("Array not of required length");
    480         }
    481 
    482         return (UChar**)array.release();
    483     }
    484     ERROR("Unknown Error");
    485 }
    486 
    487 UChar*
    488 LocDataParser::nextString() {
    489     UChar* result = NULL;
    490 
    491     skipWhitespace();
    492     if (p < e) {
    493         const UChar* terminators;
    494         UChar c = *p;
    495         UBool haveQuote = c == QUOTE || c == TICK;
    496         if (haveQuote) {
    497             inc();
    498             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
    499         } else {
    500             terminators = NOQUOTE_STOPLIST;
    501         }
    502         UChar* start = p;
    503         while (p < e && !inList(*p, terminators)) ++p;
    504         if (p == e) {
    505             ERROR("Unexpected end of data");
    506         }
    507 
    508         UChar x = *p;
    509         if (p > start) {
    510             ch = x;
    511             *p = 0x0; // terminate by writing to data
    512             result = start; // just point into data
    513         }
    514         if (haveQuote) {
    515             if (x != c) {
    516                 ERROR("Missing matching quote");
    517             } else if (p == start) {
    518                 ERROR("Empty string");
    519             }
    520             inc();
    521         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
    522             ERROR("Unexpected character in string");
    523         }
    524     }
    525 
    526     // ok for there to be no next string
    527     return result;
    528 }
    529 
    530 void
    531 LocDataParser::parseError(const char* /*str*/) {
    532     if (!data) {
    533         return;
    534     }
    535 
    536     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
    537     if (start < data) {
    538         start = data;
    539     }
    540     for (UChar* x = p; --x >= start;) {
    541         if (!*x) {
    542             start = x+1;
    543             break;
    544         }
    545     }
    546     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
    547     if (limit > e) {
    548         limit = e;
    549     }
    550     u_strncpy(pe.preContext, start, (int32_t)(p-start));
    551     pe.preContext[p-start] = 0;
    552     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
    553     pe.postContext[limit-p] = 0;
    554     pe.offset = (int32_t)(p - data);
    555 
    556 #ifdef DEBUG
    557     fprintf(stderr, "%s at or near character %d: ", str, p-data);
    558 
    559     UnicodeString msg;
    560     msg.append(start, p - start);
    561     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
    562     msg.append(p, limit-p);
    563     msg.append("'");
    564 
    565     char buf[128];
    566     int32_t len = msg.extract(0, msg.length(), buf, 128);
    567     if (len >= 128) {
    568         buf[127] = 0;
    569     } else {
    570         buf[len] = 0;
    571     }
    572     fprintf(stderr, "%s\n", buf);
    573     fflush(stderr);
    574 #endif
    575 
    576     uprv_free(data);
    577     data = NULL;
    578     p = NULL;
    579     e = NULL;
    580 
    581     if (U_SUCCESS(ec)) {
    582         ec = U_PARSE_ERROR;
    583     }
    584 }
    585 
    586 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
    587 
    588 StringLocalizationInfo*
    589 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
    590     if (U_FAILURE(status)) {
    591         return NULL;
    592     }
    593 
    594     int32_t len = info.length();
    595     if (len == 0) {
    596         return NULL; // no error;
    597     }
    598 
    599     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
    600     if (!p) {
    601         status = U_MEMORY_ALLOCATION_ERROR;
    602         return NULL;
    603     }
    604     info.extract(p, len, status);
    605     if (!U_FAILURE(status)) {
    606         status = U_ZERO_ERROR; // clear warning about non-termination
    607     }
    608 
    609     LocDataParser parser(perror, status);
    610     return parser.parse(p, len);
    611 }
    612 
    613 StringLocalizationInfo::~StringLocalizationInfo() {
    614     for (UChar*** p = (UChar***)data; *p; ++p) {
    615         // remaining data is simply pointer into our unicode string data.
    616         if (*p) uprv_free(*p);
    617     }
    618     if (data) uprv_free(data);
    619     if (info) uprv_free(info);
    620 }
    621 
    622 
    623 const UChar*
    624 StringLocalizationInfo::getRuleSetName(int32_t index) const {
    625     if (index >= 0 && index < getNumberOfRuleSets()) {
    626         return data[0][index];
    627     }
    628     return NULL;
    629 }
    630 
    631 const UChar*
    632 StringLocalizationInfo::getLocaleName(int32_t index) const {
    633     if (index >= 0 && index < getNumberOfDisplayLocales()) {
    634         return data[index+1][0];
    635     }
    636     return NULL;
    637 }
    638 
    639 const UChar*
    640 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
    641     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
    642         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
    643         return data[localeIndex+1][ruleIndex+1];
    644     }
    645     return NULL;
    646 }
    647 
    648 // ----------
    649 
    650 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    651                                              const UnicodeString& locs,
    652                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    653   : ruleSets(NULL)
    654   , defaultRuleSet(NULL)
    655   , locale(alocale)
    656   , collator(NULL)
    657   , decimalFormatSymbols(NULL)
    658   , lenient(FALSE)
    659   , lenientParseRules(NULL)
    660   , localizations(NULL)
    661   , noParse(FALSE) //TODO: to be removed after #6895
    662 {
    663   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    664   init(description, locinfo, perror, status);
    665 }
    666 
    667 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    668                                              const UnicodeString& locs,
    669                                              UParseError& perror, UErrorCode& status)
    670   : ruleSets(NULL)
    671   , defaultRuleSet(NULL)
    672   , locale(Locale::getDefault())
    673   , collator(NULL)
    674   , decimalFormatSymbols(NULL)
    675   , lenient(FALSE)
    676   , lenientParseRules(NULL)
    677   , localizations(NULL)
    678   , noParse(FALSE) //TODO: to be removed after #6895
    679 {
    680   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
    681   init(description, locinfo, perror, status);
    682 }
    683 
    684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    685                                              LocalizationInfo* info,
    686                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
    687   : ruleSets(NULL)
    688   , defaultRuleSet(NULL)
    689   , locale(alocale)
    690   , collator(NULL)
    691   , decimalFormatSymbols(NULL)
    692   , lenient(FALSE)
    693   , lenientParseRules(NULL)
    694   , localizations(NULL)
    695   , noParse(FALSE) //TODO: to be removed after #6895
    696 {
    697   init(description, info, perror, status);
    698 }
    699 
    700 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    701                          UParseError& perror,
    702                          UErrorCode& status)
    703   : ruleSets(NULL)
    704   , defaultRuleSet(NULL)
    705   , locale(Locale::getDefault())
    706   , collator(NULL)
    707   , decimalFormatSymbols(NULL)
    708   , lenient(FALSE)
    709   , lenientParseRules(NULL)
    710   , localizations(NULL)
    711   , noParse(FALSE) //TODO: to be removed after #6895
    712 {
    713     init(description, NULL, perror, status);
    714 }
    715 
    716 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
    717                          const Locale& aLocale,
    718                          UParseError& perror,
    719                          UErrorCode& status)
    720   : ruleSets(NULL)
    721   , defaultRuleSet(NULL)
    722   , locale(aLocale)
    723   , collator(NULL)
    724   , decimalFormatSymbols(NULL)
    725   , lenient(FALSE)
    726   , lenientParseRules(NULL)
    727   , localizations(NULL)
    728   , noParse(FALSE) //TODO: to be removed after #6895
    729 {
    730     init(description, NULL, perror, status);
    731 }
    732 
    733 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
    734   : ruleSets(NULL)
    735   , defaultRuleSet(NULL)
    736   , locale(alocale)
    737   , collator(NULL)
    738   , decimalFormatSymbols(NULL)
    739   , lenient(FALSE)
    740   , lenientParseRules(NULL)
    741   , localizations(NULL)
    742 {
    743     if (U_FAILURE(status)) {
    744         return;
    745     }
    746 
    747     const char* rules_tag = "RBNFRules";
    748     const char* fmt_tag = "";
    749     switch (tag) {
    750     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
    751     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
    752     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
    753     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
    754     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
    755     }
    756 
    757     // TODO: read localization info from resource
    758     LocalizationInfo* locinfo = NULL;
    759 
    760     int32_t len = 0;
    761     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
    762     if (U_SUCCESS(status)) {
    763         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
    764                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
    765 
    766         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
    767         if (U_FAILURE(status)) {
    768             ures_close(nfrb);
    769         }
    770         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
    771         if (U_FAILURE(status)) {
    772             ures_close(rbnfRules);
    773             ures_close(nfrb);
    774             return;
    775         }
    776 
    777         UnicodeString desc;
    778         while (ures_hasNext(ruleSets)) {
    779            const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
    780            desc.append(currentString);
    781         }
    782         UParseError perror;
    783 
    784 
    785         init (desc, locinfo, perror, status);
    786 
    787         //TODO: we need a real fix - see #6895 / #6896
    788         noParse = FALSE;
    789         if (tag == URBNF_SPELLOUT) {
    790             const char *lang = alocale.getLanguage();
    791             for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
    792                 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
    793                     noParse = TRUE;
    794                     break;
    795                 }
    796             }
    797         }
    798         //TODO: end
    799 
    800         ures_close(ruleSets);
    801         ures_close(rbnfRules);
    802     }
    803     ures_close(nfrb);
    804 }
    805 
    806 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
    807   : NumberFormat(rhs)
    808   , ruleSets(NULL)
    809   , defaultRuleSet(NULL)
    810   , locale(rhs.locale)
    811   , collator(NULL)
    812   , decimalFormatSymbols(NULL)
    813   , lenient(FALSE)
    814   , lenientParseRules(NULL)
    815   , localizations(NULL)
    816 {
    817     this->operator=(rhs);
    818 }
    819 
    820 // --------
    821 
    822 RuleBasedNumberFormat&
    823 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
    824 {
    825     UErrorCode status = U_ZERO_ERROR;
    826     dispose();
    827     locale = rhs.locale;
    828     lenient = rhs.lenient;
    829 
    830     UnicodeString rules = rhs.getRules();
    831     UParseError perror;
    832     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
    833 
    834     //TODO: remove below when we fix the parse bug - See #6895 / #6896
    835     noParse = rhs.noParse;
    836 
    837     return *this;
    838 }
    839 
    840 RuleBasedNumberFormat::~RuleBasedNumberFormat()
    841 {
    842     dispose();
    843 }
    844 
    845 Format*
    846 RuleBasedNumberFormat::clone(void) const
    847 {
    848     RuleBasedNumberFormat * result = NULL;
    849     UnicodeString rules = getRules();
    850     UErrorCode status = U_ZERO_ERROR;
    851     UParseError perror;
    852     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
    853     /* test for NULL */
    854     if (result == 0) {
    855         status = U_MEMORY_ALLOCATION_ERROR;
    856         return 0;
    857     }
    858     if (U_FAILURE(status)) {
    859         delete result;
    860         result = 0;
    861     } else {
    862         result->lenient = lenient;
    863 
    864         //TODO: remove below when we fix the parse bug - See #6895 / #6896
    865         result->noParse = noParse;
    866     }
    867     return result;
    868 }
    869 
    870 UBool
    871 RuleBasedNumberFormat::operator==(const Format& other) const
    872 {
    873     if (this == &other) {
    874         return TRUE;
    875     }
    876 
    877     if (typeid(*this) == typeid(other)) {
    878         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
    879         if (locale == rhs.locale &&
    880             lenient == rhs.lenient &&
    881             (localizations == NULL
    882                 ? rhs.localizations == NULL
    883                 : (rhs.localizations == NULL
    884                     ? FALSE
    885                     : *localizations == rhs.localizations))) {
    886 
    887             NFRuleSet** p = ruleSets;
    888             NFRuleSet** q = rhs.ruleSets;
    889             if (p == NULL) {
    890                 return q == NULL;
    891             } else if (q == NULL) {
    892                 return FALSE;
    893             }
    894             while (*p && *q && (**p == **q)) {
    895                 ++p;
    896                 ++q;
    897             }
    898             return *q == NULL && *p == NULL;
    899         }
    900     }
    901 
    902     return FALSE;
    903 }
    904 
    905 UnicodeString
    906 RuleBasedNumberFormat::getRules() const
    907 {
    908     UnicodeString result;
    909     if (ruleSets != NULL) {
    910         for (NFRuleSet** p = ruleSets; *p; ++p) {
    911             (*p)->appendRules(result);
    912         }
    913     }
    914     return result;
    915 }
    916 
    917 UnicodeString
    918 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
    919 {
    920     if (localizations) {
    921       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
    922       return string;
    923     } else if (ruleSets) {
    924         UnicodeString result;
    925         for (NFRuleSet** p = ruleSets; *p; ++p) {
    926             NFRuleSet* rs = *p;
    927             if (rs->isPublic()) {
    928                 if (--index == -1) {
    929                     rs->getName(result);
    930                     return result;
    931                 }
    932             }
    933         }
    934     }
    935     UnicodeString empty;
    936     return empty;
    937 }
    938 
    939 int32_t
    940 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
    941 {
    942     int32_t result = 0;
    943     if (localizations) {
    944       result = localizations->getNumberOfRuleSets();
    945     } else if (ruleSets) {
    946         for (NFRuleSet** p = ruleSets; *p; ++p) {
    947             if ((**p).isPublic()) {
    948                 ++result;
    949             }
    950         }
    951     }
    952     return result;
    953 }
    954 
    955 int32_t
    956 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
    957     if (localizations) {
    958         return localizations->getNumberOfDisplayLocales();
    959     }
    960     return 0;
    961 }
    962 
    963 Locale
    964 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
    965     if (U_FAILURE(status)) {
    966         return Locale("");
    967     }
    968     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
    969         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
    970         char buffer[64];
    971         int32_t cap = name.length() + 1;
    972         char* bp = buffer;
    973         if (cap > 64) {
    974             bp = (char *)uprv_malloc(cap);
    975             if (bp == NULL) {
    976                 status = U_MEMORY_ALLOCATION_ERROR;
    977                 return Locale("");
    978             }
    979         }
    980         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
    981         Locale retLocale(bp);
    982         if (bp != buffer) {
    983             uprv_free(bp);
    984         }
    985         return retLocale;
    986     }
    987     status = U_ILLEGAL_ARGUMENT_ERROR;
    988     Locale retLocale;
    989     return retLocale;
    990 }
    991 
    992 UnicodeString
    993 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
    994     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
    995         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
    996         int32_t len = localeName.length();
    997         UChar* localeStr = localeName.getBuffer(len + 1);
    998         while (len >= 0) {
    999             localeStr[len] = 0;
   1000             int32_t ix = localizations->indexForLocale(localeStr);
   1001             if (ix >= 0) {
   1002                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
   1003                 return name;
   1004             }
   1005 
   1006             // trim trailing portion, skipping over ommitted sections
   1007             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
   1008             while (len > 0 && localeStr[len-1] == 0x005F) --len;
   1009         }
   1010         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
   1011         return name;
   1012     }
   1013     UnicodeString bogus;
   1014     bogus.setToBogus();
   1015     return bogus;
   1016 }
   1017 
   1018 UnicodeString
   1019 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
   1020     if (localizations) {
   1021         UnicodeString rsn(ruleSetName);
   1022         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
   1023         return getRuleSetDisplayName(ix, localeParam);
   1024     }
   1025     UnicodeString bogus;
   1026     bogus.setToBogus();
   1027     return bogus;
   1028 }
   1029 
   1030 NFRuleSet*
   1031 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
   1032 {
   1033     if (U_SUCCESS(status) && ruleSets) {
   1034         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1035             NFRuleSet* rs = *p;
   1036             if (rs->isNamed(name)) {
   1037                 return rs;
   1038             }
   1039         }
   1040         status = U_ILLEGAL_ARGUMENT_ERROR;
   1041     }
   1042     return NULL;
   1043 }
   1044 
   1045 UnicodeString&
   1046 RuleBasedNumberFormat::format(int32_t number,
   1047                               UnicodeString& toAppendTo,
   1048                               FieldPosition& /* pos */) const
   1049 {
   1050     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
   1051     return toAppendTo;
   1052 }
   1053 
   1054 
   1055 UnicodeString&
   1056 RuleBasedNumberFormat::format(int64_t number,
   1057                               UnicodeString& toAppendTo,
   1058                               FieldPosition& /* pos */) const
   1059 {
   1060     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
   1061     return toAppendTo;
   1062 }
   1063 
   1064 
   1065 UnicodeString&
   1066 RuleBasedNumberFormat::format(double number,
   1067                               UnicodeString& toAppendTo,
   1068                               FieldPosition& /* pos */) const
   1069 {
   1070     // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
   1071     if (uprv_isNaN(number)) {
   1072         DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
   1073         if (decFmtSyms) {
   1074             toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
   1075         }
   1076     } else if (defaultRuleSet) {
   1077         defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
   1078     }
   1079     return toAppendTo;
   1080 }
   1081 
   1082 
   1083 UnicodeString&
   1084 RuleBasedNumberFormat::format(int32_t number,
   1085                               const UnicodeString& ruleSetName,
   1086                               UnicodeString& toAppendTo,
   1087                               FieldPosition& /* pos */,
   1088                               UErrorCode& status) const
   1089 {
   1090     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
   1091     if (U_SUCCESS(status)) {
   1092         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1093             // throw new IllegalArgumentException("Can't use internal rule set");
   1094             status = U_ILLEGAL_ARGUMENT_ERROR;
   1095         } else {
   1096             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1097             if (rs) {
   1098                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
   1099             }
   1100         }
   1101     }
   1102     return toAppendTo;
   1103 }
   1104 
   1105 
   1106 UnicodeString&
   1107 RuleBasedNumberFormat::format(int64_t number,
   1108                               const UnicodeString& ruleSetName,
   1109                               UnicodeString& toAppendTo,
   1110                               FieldPosition& /* pos */,
   1111                               UErrorCode& status) const
   1112 {
   1113     if (U_SUCCESS(status)) {
   1114         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1115             // throw new IllegalArgumentException("Can't use internal rule set");
   1116             status = U_ILLEGAL_ARGUMENT_ERROR;
   1117         } else {
   1118             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1119             if (rs) {
   1120                 rs->format(number, toAppendTo, toAppendTo.length());
   1121             }
   1122         }
   1123     }
   1124     return toAppendTo;
   1125 }
   1126 
   1127 
   1128 // make linker happy
   1129 UnicodeString&
   1130 RuleBasedNumberFormat::format(const Formattable& obj,
   1131                               UnicodeString& toAppendTo,
   1132                               FieldPosition& pos,
   1133                               UErrorCode& status) const
   1134 {
   1135     return NumberFormat::format(obj, toAppendTo, pos, status);
   1136 }
   1137 
   1138 UnicodeString&
   1139 RuleBasedNumberFormat::format(double number,
   1140                               const UnicodeString& ruleSetName,
   1141                               UnicodeString& toAppendTo,
   1142                               FieldPosition& /* pos */,
   1143                               UErrorCode& status) const
   1144 {
   1145     if (U_SUCCESS(status)) {
   1146         if (ruleSetName.indexOf(gPercentPercent) == 0) {
   1147             // throw new IllegalArgumentException("Can't use internal rule set");
   1148             status = U_ILLEGAL_ARGUMENT_ERROR;
   1149         } else {
   1150             NFRuleSet *rs = findRuleSet(ruleSetName, status);
   1151             if (rs) {
   1152                 rs->format(number, toAppendTo, toAppendTo.length());
   1153             }
   1154         }
   1155     }
   1156     return toAppendTo;
   1157 }
   1158 
   1159 void
   1160 RuleBasedNumberFormat::parse(const UnicodeString& text,
   1161                              Formattable& result,
   1162                              ParsePosition& parsePosition) const
   1163 {
   1164     //TODO: We need a real fix.  See #6895 / #6896
   1165     if (noParse) {
   1166         // skip parsing
   1167         parsePosition.setErrorIndex(0);
   1168         return;
   1169     }
   1170 
   1171     if (!ruleSets) {
   1172         parsePosition.setErrorIndex(0);
   1173         return;
   1174     }
   1175 
   1176     UnicodeString workingText(text, parsePosition.getIndex());
   1177     ParsePosition workingPos(0);
   1178 
   1179     ParsePosition high_pp(0);
   1180     Formattable high_result;
   1181 
   1182     for (NFRuleSet** p = ruleSets; *p; ++p) {
   1183         NFRuleSet *rp = *p;
   1184         if (rp->isPublic() && rp->isParseable()) {
   1185             ParsePosition working_pp(0);
   1186             Formattable working_result;
   1187 
   1188             rp->parse(workingText, working_pp, kMaxDouble, working_result);
   1189             if (working_pp.getIndex() > high_pp.getIndex()) {
   1190                 high_pp = working_pp;
   1191                 high_result = working_result;
   1192 
   1193                 if (high_pp.getIndex() == workingText.length()) {
   1194                     break;
   1195                 }
   1196             }
   1197         }
   1198     }
   1199 
   1200     int32_t startIndex = parsePosition.getIndex();
   1201     parsePosition.setIndex(startIndex + high_pp.getIndex());
   1202     if (high_pp.getIndex() > 0) {
   1203         parsePosition.setErrorIndex(-1);
   1204     } else {
   1205         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
   1206         parsePosition.setErrorIndex(startIndex + errorIndex);
   1207     }
   1208     result = high_result;
   1209     if (result.getType() == Formattable::kDouble) {
   1210         int32_t r = (int32_t)result.getDouble();
   1211         if ((double)r == result.getDouble()) {
   1212             result.setLong(r);
   1213         }
   1214     }
   1215 }
   1216 
   1217 #if !UCONFIG_NO_COLLATION
   1218 
   1219 void
   1220 RuleBasedNumberFormat::setLenient(UBool enabled)
   1221 {
   1222     lenient = enabled;
   1223     if (!enabled && collator) {
   1224         delete collator;
   1225         collator = NULL;
   1226     }
   1227 }
   1228 
   1229 #endif
   1230 
   1231 void
   1232 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
   1233     if (U_SUCCESS(status)) {
   1234         if (ruleSetName.isEmpty()) {
   1235           if (localizations) {
   1236               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
   1237               defaultRuleSet = findRuleSet(name, status);
   1238           } else {
   1239             initDefaultRuleSet();
   1240           }
   1241         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
   1242             status = U_ILLEGAL_ARGUMENT_ERROR;
   1243         } else {
   1244             NFRuleSet* result = findRuleSet(ruleSetName, status);
   1245             if (result != NULL) {
   1246                 defaultRuleSet = result;
   1247             }
   1248         }
   1249     }
   1250 }
   1251 
   1252 UnicodeString
   1253 RuleBasedNumberFormat::getDefaultRuleSetName() const {
   1254   UnicodeString result;
   1255   if (defaultRuleSet && defaultRuleSet->isPublic()) {
   1256     defaultRuleSet->getName(result);
   1257   } else {
   1258     result.setToBogus();
   1259   }
   1260   return result;
   1261 }
   1262 
   1263 void
   1264 RuleBasedNumberFormat::initDefaultRuleSet()
   1265 {
   1266     defaultRuleSet = NULL;
   1267     if (!ruleSets) {
   1268       return;
   1269     }
   1270 
   1271     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
   1272     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
   1273     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
   1274 
   1275     NFRuleSet**p = &ruleSets[0];
   1276     while (*p) {
   1277         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
   1278             defaultRuleSet = *p;
   1279             return;
   1280         } else {
   1281             ++p;
   1282         }
   1283     }
   1284 
   1285     defaultRuleSet = *--p;
   1286     if (!defaultRuleSet->isPublic()) {
   1287         while (p != ruleSets) {
   1288             if ((*--p)->isPublic()) {
   1289                 defaultRuleSet = *p;
   1290                 break;
   1291             }
   1292         }
   1293     }
   1294 }
   1295 
   1296 
   1297 void
   1298 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
   1299                             UParseError& pErr, UErrorCode& status)
   1300 {
   1301     // TODO: implement UParseError
   1302     uprv_memset(&pErr, 0, sizeof(UParseError));
   1303     // Note: this can leave ruleSets == NULL, so remaining code should check
   1304     if (U_FAILURE(status)) {
   1305         return;
   1306     }
   1307 
   1308     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
   1309 
   1310     UnicodeString description(rules);
   1311     if (!description.length()) {
   1312         status = U_MEMORY_ALLOCATION_ERROR;
   1313         return;
   1314     }
   1315 
   1316     // start by stripping the trailing whitespace from all the rules
   1317     // (this is all the whitespace follwing each semicolon in the
   1318     // description).  This allows us to look for rule-set boundaries
   1319     // by searching for ";%" without having to worry about whitespace
   1320     // between the ; and the %
   1321     stripWhitespace(description);
   1322 
   1323     // check to see if there's a set of lenient-parse rules.  If there
   1324     // is, pull them out into our temporary holding place for them,
   1325     // and delete them from the description before the real desciption-
   1326     // parsing code sees them
   1327     int32_t lp = description.indexOf(gLenientParse);
   1328     if (lp != -1) {
   1329         // we've got to make sure we're not in the middle of a rule
   1330         // (where "%%lenient-parse" would actually get treated as
   1331         // rule text)
   1332         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
   1333             // locate the beginning and end of the actual collation
   1334             // rules (there may be whitespace between the name and
   1335             // the first token in the description)
   1336             int lpEnd = description.indexOf(gSemiPercent, lp);
   1337 
   1338             if (lpEnd == -1) {
   1339                 lpEnd = description.length() - 1;
   1340             }
   1341             int lpStart = lp + u_strlen(gLenientParse);
   1342             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
   1343                 ++lpStart;
   1344             }
   1345 
   1346             // copy out the lenient-parse rules and delete them
   1347             // from the description
   1348             lenientParseRules = new UnicodeString();
   1349             /* test for NULL */
   1350             if (lenientParseRules == 0) {
   1351                 status = U_MEMORY_ALLOCATION_ERROR;
   1352                 return;
   1353             }
   1354             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
   1355 
   1356             description.remove(lp, lpEnd + 1 - lp);
   1357         }
   1358     }
   1359 
   1360     // pre-flight parsing the description and count the number of
   1361     // rule sets (";%" marks the end of one rule set and the beginning
   1362     // of the next)
   1363     int numRuleSets = 0;
   1364     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
   1365         ++numRuleSets;
   1366         ++p;
   1367     }
   1368     ++numRuleSets;
   1369 
   1370     // our rule list is an array of the appropriate size
   1371     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
   1372     /* test for NULL */
   1373     if (ruleSets == 0) {
   1374         status = U_MEMORY_ALLOCATION_ERROR;
   1375         return;
   1376     }
   1377 
   1378     for (int i = 0; i <= numRuleSets; ++i) {
   1379         ruleSets[i] = NULL;
   1380     }
   1381 
   1382     // divide up the descriptions into individual rule-set descriptions
   1383     // and store them in a temporary array.  At each step, we also
   1384     // new up a rule set, but all this does is initialize its name
   1385     // and remove it from its description.  We can't actually parse
   1386     // the rest of the descriptions and finish initializing everything
   1387     // because we have to know the names and locations of all the rule
   1388     // sets before we can actually set everything up
   1389     if(!numRuleSets) {
   1390         status = U_ILLEGAL_ARGUMENT_ERROR;
   1391         return;
   1392     }
   1393     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
   1394     if (ruleSetDescriptions == 0) {
   1395         status = U_MEMORY_ALLOCATION_ERROR;
   1396         return;
   1397     }
   1398 
   1399     {
   1400         int curRuleSet = 0;
   1401         int32_t start = 0;
   1402         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
   1403             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
   1404             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1405             if (ruleSets[curRuleSet] == 0) {
   1406                 status = U_MEMORY_ALLOCATION_ERROR;
   1407                 goto cleanup;
   1408             }
   1409             ++curRuleSet;
   1410             start = p + 1;
   1411         }
   1412         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
   1413         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
   1414         if (ruleSets[curRuleSet] == 0) {
   1415             status = U_MEMORY_ALLOCATION_ERROR;
   1416             goto cleanup;
   1417         }
   1418     }
   1419 
   1420     // now we can take note of the formatter's default rule set, which
   1421     // is the last public rule set in the description (it's the last
   1422     // rather than the first so that a user can create a new formatter
   1423     // from an existing formatter and change its default behavior just
   1424     // by appending more rule sets to the end)
   1425 
   1426     // {dlf} Initialization of a fraction rule set requires the default rule
   1427     // set to be known.  For purposes of initialization, this is always the
   1428     // last public rule set, no matter what the localization data says.
   1429     initDefaultRuleSet();
   1430 
   1431     // finally, we can go back through the temporary descriptions
   1432     // list and finish seting up the substructure (and we throw
   1433     // away the temporary descriptions as we go)
   1434     {
   1435         for (int i = 0; i < numRuleSets; i++) {
   1436             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
   1437         }
   1438     }
   1439 
   1440     // Now that the rules are initialized, the 'real' default rule
   1441     // set can be adjusted by the localization data.
   1442 
   1443     // The C code keeps the localization array as is, rather than building
   1444     // a separate array of the public rule set names, so we have less work
   1445     // to do here-- but we still need to check the names.
   1446 
   1447     if (localizationInfos) {
   1448         // confirm the names, if any aren't in the rules, that's an error
   1449         // it is ok if the rules contain public rule sets that are not in this list
   1450         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
   1451             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
   1452             NFRuleSet* rs = findRuleSet(name, status);
   1453             if (rs == NULL) {
   1454                 break; // error
   1455             }
   1456             if (i == 0) {
   1457                 defaultRuleSet = rs;
   1458             }
   1459         }
   1460     } else {
   1461         defaultRuleSet = getDefaultRuleSet();
   1462     }
   1463 
   1464 cleanup:
   1465     delete[] ruleSetDescriptions;
   1466 }
   1467 
   1468 void
   1469 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
   1470 {
   1471     // iterate through the characters...
   1472     UnicodeString result;
   1473 
   1474     int start = 0;
   1475     while (start != -1 && start < description.length()) {
   1476         // seek to the first non-whitespace character...
   1477         while (start < description.length()
   1478             && PatternProps::isWhiteSpace(description.charAt(start))) {
   1479             ++start;
   1480         }
   1481 
   1482         // locate the next semicolon in the text and copy the text from
   1483         // our current position up to that semicolon into the result
   1484         int32_t p = description.indexOf(gSemiColon, start);
   1485         if (p == -1) {
   1486             // or if we don't find a semicolon, just copy the rest of
   1487             // the string into the result
   1488             result.append(description, start, description.length() - start);
   1489             start = -1;
   1490         }
   1491         else if (p < description.length()) {
   1492             result.append(description, start, p + 1 - start);
   1493             start = p + 1;
   1494         }
   1495 
   1496         // when we get here, we've seeked off the end of the sring, and
   1497         // we terminate the loop (we continue until *start* is -1 rather
   1498         // than until *p* is -1, because otherwise we'd miss the last
   1499         // rule in the description)
   1500         else {
   1501             start = -1;
   1502         }
   1503     }
   1504 
   1505     description.setTo(result);
   1506 }
   1507 
   1508 
   1509 void
   1510 RuleBasedNumberFormat::dispose()
   1511 {
   1512     if (ruleSets) {
   1513         for (NFRuleSet** p = ruleSets; *p; ++p) {
   1514             delete *p;
   1515         }
   1516         uprv_free(ruleSets);
   1517         ruleSets = NULL;
   1518     }
   1519 
   1520 #if !UCONFIG_NO_COLLATION
   1521     delete collator;
   1522 #endif
   1523     collator = NULL;
   1524 
   1525     delete decimalFormatSymbols;
   1526     decimalFormatSymbols = NULL;
   1527 
   1528     delete lenientParseRules;
   1529     lenientParseRules = NULL;
   1530 
   1531     if (localizations) localizations = localizations->unref();
   1532 }
   1533 
   1534 
   1535 //-----------------------------------------------------------------------
   1536 // package-internal API
   1537 //-----------------------------------------------------------------------
   1538 
   1539 /**
   1540  * Returns the collator to use for lenient parsing.  The collator is lazily created:
   1541  * this function creates it the first time it's called.
   1542  * @return The collator to use for lenient parsing, or null if lenient parsing
   1543  * is turned off.
   1544 */
   1545 Collator*
   1546 RuleBasedNumberFormat::getCollator() const
   1547 {
   1548 #if !UCONFIG_NO_COLLATION
   1549     if (!ruleSets) {
   1550         return NULL;
   1551     }
   1552 
   1553     // lazy-evaulate the collator
   1554     if (collator == NULL && lenient) {
   1555         // create a default collator based on the formatter's locale,
   1556         // then pull out that collator's rules, append any additional
   1557         // rules specified in the description, and create a _new_
   1558         // collator based on the combinaiton of those rules
   1559 
   1560         UErrorCode status = U_ZERO_ERROR;
   1561 
   1562         Collator* temp = Collator::createInstance(locale, status);
   1563         RuleBasedCollator* newCollator;
   1564         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
   1565             if (lenientParseRules) {
   1566                 UnicodeString rules(newCollator->getRules());
   1567                 rules.append(*lenientParseRules);
   1568 
   1569                 newCollator = new RuleBasedCollator(rules, status);
   1570                 // Exit if newCollator could not be created.
   1571                 if (newCollator == NULL) {
   1572                 	return NULL;
   1573                 }
   1574             } else {
   1575                 temp = NULL;
   1576             }
   1577             if (U_SUCCESS(status)) {
   1578                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
   1579                 // cast away const
   1580                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
   1581             } else {
   1582                 delete newCollator;
   1583             }
   1584         }
   1585         delete temp;
   1586     }
   1587 #endif
   1588 
   1589     // if lenient-parse mode is off, this will be null
   1590     // (see setLenientParseMode())
   1591     return collator;
   1592 }
   1593 
   1594 
   1595 /**
   1596  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
   1597  * instances owned by this formatter.  This object is lazily created: this function
   1598  * creates it the first time it's called.
   1599  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
   1600  * instances owned by this formatter.
   1601 */
   1602 DecimalFormatSymbols*
   1603 RuleBasedNumberFormat::getDecimalFormatSymbols() const
   1604 {
   1605     // lazy-evaluate the DecimalFormatSymbols object.  This object
   1606     // is shared by all DecimalFormat instances belonging to this
   1607     // formatter
   1608     if (decimalFormatSymbols == NULL) {
   1609         UErrorCode status = U_ZERO_ERROR;
   1610         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
   1611         if (U_SUCCESS(status)) {
   1612             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
   1613         } else {
   1614             delete temp;
   1615         }
   1616     }
   1617     return decimalFormatSymbols;
   1618 }
   1619 
   1620 U_NAMESPACE_END
   1621 
   1622 /* U_HAVE_RBNF */
   1623 #endif
   1624