1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2012, International Business Machines Corporation 4 * and others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 #include "utypeinfo.h" // for 'typeid' to work 9 10 #include "unicode/rbnf.h" 11 12 #if U_HAVE_RBNF 13 14 #include "unicode/normlzr.h" 15 #include "unicode/tblcoll.h" 16 #include "unicode/uchar.h" 17 #include "unicode/ucol.h" 18 #include "unicode/uloc.h" 19 #include "unicode/unum.h" 20 #include "unicode/ures.h" 21 #include "unicode/ustring.h" 22 #include "unicode/utf16.h" 23 #include "unicode/udata.h" 24 #include "nfrs.h" 25 26 #include "cmemory.h" 27 #include "cstring.h" 28 #include "patternprops.h" 29 #include "uresimp.h" 30 31 // debugging 32 // #define DEBUG 33 34 #ifdef DEBUG 35 #include "stdio.h" 36 #endif 37 38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 39 40 static const UChar gPercentPercent[] = 41 { 42 0x25, 0x25, 0 43 }; /* "%%" */ 44 45 // All urbnf objects are created through openRules, so we init all of the 46 // Unicode string constants required by rbnf, nfrs, or nfr here. 47 static const UChar gLenientParse[] = 48 { 49 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 50 }; /* "%%lenient-parse:" */ 51 static const UChar gSemiColon = 0x003B; 52 static const UChar gSemiPercent[] = 53 { 54 0x3B, 0x25, 0 55 }; /* ";%" */ 56 57 #define kSomeNumberOfBitsDiv2 22 58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 60 61 U_NAMESPACE_BEGIN 62 63 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 64 65 /* 66 This is a utility class. It does not use ICU's RTTI. 67 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 68 Please make sure that intltest passes on Windows in Release mode, 69 since the string pooling per compilation unit will mess up how RTTI works. 70 The RTTI code was also removed due to lack of code coverage. 71 */ 72 class LocalizationInfo : public UMemory { 73 protected: 74 virtual ~LocalizationInfo(); 75 uint32_t refcount; 76 77 public: 78 LocalizationInfo() : refcount(0) {} 79 80 LocalizationInfo* ref(void) { 81 ++refcount; 82 return this; 83 } 84 85 LocalizationInfo* unref(void) { 86 if (refcount && --refcount == 0) { 87 delete this; 88 } 89 return NULL; 90 } 91 92 virtual UBool operator==(const LocalizationInfo* rhs) const; 93 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 94 95 virtual int32_t getNumberOfRuleSets(void) const = 0; 96 virtual const UChar* getRuleSetName(int32_t index) const = 0; 97 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 98 virtual const UChar* getLocaleName(int32_t index) const = 0; 99 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 100 101 virtual int32_t indexForLocale(const UChar* locale) const; 102 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 103 104 // virtual UClassID getDynamicClassID() const = 0; 105 // static UClassID getStaticClassID(void); 106 }; 107 108 LocalizationInfo::~LocalizationInfo() {} 109 110 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 111 112 // if both strings are NULL, this returns TRUE 113 static UBool 114 streq(const UChar* lhs, const UChar* rhs) { 115 if (rhs == lhs) { 116 return TRUE; 117 } 118 if (lhs && rhs) { 119 return u_strcmp(lhs, rhs) == 0; 120 } 121 return FALSE; 122 } 123 124 UBool 125 LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 126 if (rhs) { 127 if (this == rhs) { 128 return TRUE; 129 } 130 131 int32_t rsc = getNumberOfRuleSets(); 132 if (rsc == rhs->getNumberOfRuleSets()) { 133 for (int i = 0; i < rsc; ++i) { 134 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 135 return FALSE; 136 } 137 } 138 int32_t dlc = getNumberOfDisplayLocales(); 139 if (dlc == rhs->getNumberOfDisplayLocales()) { 140 for (int i = 0; i < dlc; ++i) { 141 const UChar* locale = getLocaleName(i); 142 int32_t ix = rhs->indexForLocale(locale); 143 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 144 if (!streq(locale, rhs->getLocaleName(ix))) { 145 return FALSE; 146 } 147 for (int j = 0; j < rsc; ++j) { 148 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 149 return FALSE; 150 } 151 } 152 } 153 return TRUE; 154 } 155 } 156 } 157 return FALSE; 158 } 159 160 int32_t 161 LocalizationInfo::indexForLocale(const UChar* locale) const { 162 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 163 if (streq(locale, getLocaleName(i))) { 164 return i; 165 } 166 } 167 return -1; 168 } 169 170 int32_t 171 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 172 if (ruleset) { 173 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 174 if (streq(ruleset, getRuleSetName(i))) { 175 return i; 176 } 177 } 178 } 179 return -1; 180 } 181 182 183 typedef void (*Fn_Deleter)(void*); 184 185 class VArray { 186 void** buf; 187 int32_t cap; 188 int32_t size; 189 Fn_Deleter deleter; 190 public: 191 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 192 193 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 194 195 ~VArray() { 196 if (deleter) { 197 for (int i = 0; i < size; ++i) { 198 (*deleter)(buf[i]); 199 } 200 } 201 uprv_free(buf); 202 } 203 204 int32_t length() { 205 return size; 206 } 207 208 void add(void* elem, UErrorCode& status) { 209 if (U_SUCCESS(status)) { 210 if (size == cap) { 211 if (cap == 0) { 212 cap = 1; 213 } else if (cap < 256) { 214 cap *= 2; 215 } else { 216 cap += 256; 217 } 218 if (buf == NULL) { 219 buf = (void**)uprv_malloc(cap * sizeof(void*)); 220 } else { 221 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 222 } 223 if (buf == NULL) { 224 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 225 status = U_MEMORY_ALLOCATION_ERROR; 226 return; 227 } 228 void* start = &buf[size]; 229 size_t count = (cap - size) * sizeof(void*); 230 uprv_memset(start, 0, count); // fill with nulls, just because 231 } 232 buf[size++] = elem; 233 } 234 } 235 236 void** release(void) { 237 void** result = buf; 238 buf = NULL; 239 cap = 0; 240 size = 0; 241 return result; 242 } 243 }; 244 245 class LocDataParser; 246 247 class StringLocalizationInfo : public LocalizationInfo { 248 UChar* info; 249 UChar*** data; 250 int32_t numRuleSets; 251 int32_t numLocales; 252 253 friend class LocDataParser; 254 255 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 256 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 257 { 258 } 259 260 public: 261 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 262 263 virtual ~StringLocalizationInfo(); 264 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 265 virtual const UChar* getRuleSetName(int32_t index) const; 266 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 267 virtual const UChar* getLocaleName(int32_t index) const; 268 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 269 270 // virtual UClassID getDynamicClassID() const; 271 // static UClassID getStaticClassID(void); 272 273 private: 274 void init(UErrorCode& status) const; 275 }; 276 277 278 enum { 279 OPEN_ANGLE = 0x003c, /* '<' */ 280 CLOSE_ANGLE = 0x003e, /* '>' */ 281 COMMA = 0x002c, 282 TICK = 0x0027, 283 QUOTE = 0x0022, 284 SPACE = 0x0020 285 }; 286 287 /** 288 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 289 */ 290 class LocDataParser { 291 UChar* data; 292 const UChar* e; 293 UChar* p; 294 UChar ch; 295 UParseError& pe; 296 UErrorCode& ec; 297 298 public: 299 LocDataParser(UParseError& parseError, UErrorCode& status) 300 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 301 ~LocDataParser() {} 302 303 /* 304 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 305 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 306 */ 307 StringLocalizationInfo* parse(UChar* data, int32_t len); 308 309 private: 310 311 void inc(void) { ++p; ch = 0xffff; } 312 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } 313 UBool check(UChar c) { return p < e && (ch == c || *p == c); } 314 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} 315 UBool inList(UChar c, const UChar* list) const { 316 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; 317 while (*list && *list != c) ++list; return *list == c; 318 } 319 void parseError(const char* msg); 320 321 StringLocalizationInfo* doParse(void); 322 323 UChar** nextArray(int32_t& requiredLength); 324 UChar* nextString(void); 325 }; 326 327 #ifdef DEBUG 328 #define ERROR(msg) parseError(msg); return NULL; 329 #else 330 #define ERROR(msg) parseError(NULL); return NULL; 331 #endif 332 333 334 static const UChar DQUOTE_STOPLIST[] = { 335 QUOTE, 0 336 }; 337 338 static const UChar SQUOTE_STOPLIST[] = { 339 TICK, 0 340 }; 341 342 static const UChar NOQUOTE_STOPLIST[] = { 343 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 344 }; 345 346 static void 347 DeleteFn(void* p) { 348 uprv_free(p); 349 } 350 351 StringLocalizationInfo* 352 LocDataParser::parse(UChar* _data, int32_t len) { 353 if (U_FAILURE(ec)) { 354 if (_data) uprv_free(_data); 355 return NULL; 356 } 357 358 pe.line = 0; 359 pe.offset = -1; 360 pe.postContext[0] = 0; 361 pe.preContext[0] = 0; 362 363 if (_data == NULL) { 364 ec = U_ILLEGAL_ARGUMENT_ERROR; 365 return NULL; 366 } 367 368 if (len <= 0) { 369 ec = U_ILLEGAL_ARGUMENT_ERROR; 370 uprv_free(_data); 371 return NULL; 372 } 373 374 data = _data; 375 e = data + len; 376 p = _data; 377 ch = 0xffff; 378 379 return doParse(); 380 } 381 382 383 StringLocalizationInfo* 384 LocDataParser::doParse(void) { 385 skipWhitespace(); 386 if (!checkInc(OPEN_ANGLE)) { 387 ERROR("Missing open angle"); 388 } else { 389 VArray array(DeleteFn); 390 UBool mightHaveNext = TRUE; 391 int32_t requiredLength = -1; 392 while (mightHaveNext) { 393 mightHaveNext = FALSE; 394 UChar** elem = nextArray(requiredLength); 395 skipWhitespace(); 396 UBool haveComma = check(COMMA); 397 if (elem) { 398 array.add(elem, ec); 399 if (haveComma) { 400 inc(); 401 mightHaveNext = TRUE; 402 } 403 } else if (haveComma) { 404 ERROR("Unexpected character"); 405 } 406 } 407 408 skipWhitespace(); 409 if (!checkInc(CLOSE_ANGLE)) { 410 if (check(OPEN_ANGLE)) { 411 ERROR("Missing comma in outer array"); 412 } else { 413 ERROR("Missing close angle bracket in outer array"); 414 } 415 } 416 417 skipWhitespace(); 418 if (p != e) { 419 ERROR("Extra text after close of localization data"); 420 } 421 422 array.add(NULL, ec); 423 if (U_SUCCESS(ec)) { 424 int32_t numLocs = array.length() - 2; // subtract first, NULL 425 UChar*** result = (UChar***)array.release(); 426 427 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 428 } 429 } 430 431 ERROR("Unknown error"); 432 } 433 434 UChar** 435 LocDataParser::nextArray(int32_t& requiredLength) { 436 if (U_FAILURE(ec)) { 437 return NULL; 438 } 439 440 skipWhitespace(); 441 if (!checkInc(OPEN_ANGLE)) { 442 ERROR("Missing open angle"); 443 } 444 445 VArray array; 446 UBool mightHaveNext = TRUE; 447 while (mightHaveNext) { 448 mightHaveNext = FALSE; 449 UChar* elem = nextString(); 450 skipWhitespace(); 451 UBool haveComma = check(COMMA); 452 if (elem) { 453 array.add(elem, ec); 454 if (haveComma) { 455 inc(); 456 mightHaveNext = TRUE; 457 } 458 } else if (haveComma) { 459 ERROR("Unexpected comma"); 460 } 461 } 462 skipWhitespace(); 463 if (!checkInc(CLOSE_ANGLE)) { 464 if (check(OPEN_ANGLE)) { 465 ERROR("Missing close angle bracket in inner array"); 466 } else { 467 ERROR("Missing comma in inner array"); 468 } 469 } 470 471 array.add(NULL, ec); 472 if (U_SUCCESS(ec)) { 473 if (requiredLength == -1) { 474 requiredLength = array.length() + 1; 475 } else if (array.length() != requiredLength) { 476 ec = U_ILLEGAL_ARGUMENT_ERROR; 477 ERROR("Array not of required length"); 478 } 479 480 return (UChar**)array.release(); 481 } 482 ERROR("Unknown Error"); 483 } 484 485 UChar* 486 LocDataParser::nextString() { 487 UChar* result = NULL; 488 489 skipWhitespace(); 490 if (p < e) { 491 const UChar* terminators; 492 UChar c = *p; 493 UBool haveQuote = c == QUOTE || c == TICK; 494 if (haveQuote) { 495 inc(); 496 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 497 } else { 498 terminators = NOQUOTE_STOPLIST; 499 } 500 UChar* start = p; 501 while (p < e && !inList(*p, terminators)) ++p; 502 if (p == e) { 503 ERROR("Unexpected end of data"); 504 } 505 506 UChar x = *p; 507 if (p > start) { 508 ch = x; 509 *p = 0x0; // terminate by writing to data 510 result = start; // just point into data 511 } 512 if (haveQuote) { 513 if (x != c) { 514 ERROR("Missing matching quote"); 515 } else if (p == start) { 516 ERROR("Empty string"); 517 } 518 inc(); 519 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 520 ERROR("Unexpected character in string"); 521 } 522 } 523 524 // ok for there to be no next string 525 return result; 526 } 527 528 void 529 LocDataParser::parseError(const char* /*str*/) { 530 if (!data) { 531 return; 532 } 533 534 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 535 if (start < data) { 536 start = data; 537 } 538 for (UChar* x = p; --x >= start;) { 539 if (!*x) { 540 start = x+1; 541 break; 542 } 543 } 544 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 545 if (limit > e) { 546 limit = e; 547 } 548 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 549 pe.preContext[p-start] = 0; 550 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 551 pe.postContext[limit-p] = 0; 552 pe.offset = (int32_t)(p - data); 553 554 #ifdef DEBUG 555 fprintf(stderr, "%s at or near character %d: ", str, p-data); 556 557 UnicodeString msg; 558 msg.append(start, p - start); 559 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 560 msg.append(p, limit-p); 561 msg.append("'"); 562 563 char buf[128]; 564 int32_t len = msg.extract(0, msg.length(), buf, 128); 565 if (len >= 128) { 566 buf[127] = 0; 567 } else { 568 buf[len] = 0; 569 } 570 fprintf(stderr, "%s\n", buf); 571 fflush(stderr); 572 #endif 573 574 uprv_free(data); 575 data = NULL; 576 p = NULL; 577 e = NULL; 578 579 if (U_SUCCESS(ec)) { 580 ec = U_PARSE_ERROR; 581 } 582 } 583 584 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 585 586 StringLocalizationInfo* 587 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 588 if (U_FAILURE(status)) { 589 return NULL; 590 } 591 592 int32_t len = info.length(); 593 if (len == 0) { 594 return NULL; // no error; 595 } 596 597 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 598 if (!p) { 599 status = U_MEMORY_ALLOCATION_ERROR; 600 return NULL; 601 } 602 info.extract(p, len, status); 603 if (!U_FAILURE(status)) { 604 status = U_ZERO_ERROR; // clear warning about non-termination 605 } 606 607 LocDataParser parser(perror, status); 608 return parser.parse(p, len); 609 } 610 611 StringLocalizationInfo::~StringLocalizationInfo() { 612 for (UChar*** p = (UChar***)data; *p; ++p) { 613 // remaining data is simply pointer into our unicode string data. 614 if (*p) uprv_free(*p); 615 } 616 if (data) uprv_free(data); 617 if (info) uprv_free(info); 618 } 619 620 621 const UChar* 622 StringLocalizationInfo::getRuleSetName(int32_t index) const { 623 if (index >= 0 && index < getNumberOfRuleSets()) { 624 return data[0][index]; 625 } 626 return NULL; 627 } 628 629 const UChar* 630 StringLocalizationInfo::getLocaleName(int32_t index) const { 631 if (index >= 0 && index < getNumberOfDisplayLocales()) { 632 return data[index+1][0]; 633 } 634 return NULL; 635 } 636 637 const UChar* 638 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 639 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 640 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 641 return data[localeIndex+1][ruleIndex+1]; 642 } 643 return NULL; 644 } 645 646 // ---------- 647 648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 649 const UnicodeString& locs, 650 const Locale& alocale, UParseError& perror, UErrorCode& status) 651 : ruleSets(NULL) 652 , ruleSetDescriptions(NULL) 653 , numRuleSets(0) 654 , defaultRuleSet(NULL) 655 , locale(alocale) 656 , collator(NULL) 657 , decimalFormatSymbols(NULL) 658 , lenient(FALSE) 659 , lenientParseRules(NULL) 660 , localizations(NULL) 661 { 662 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 663 init(description, locinfo, perror, status); 664 } 665 666 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 667 const UnicodeString& locs, 668 UParseError& perror, UErrorCode& status) 669 : ruleSets(NULL) 670 , ruleSetDescriptions(NULL) 671 , numRuleSets(0) 672 , defaultRuleSet(NULL) 673 , locale(Locale::getDefault()) 674 , collator(NULL) 675 , decimalFormatSymbols(NULL) 676 , lenient(FALSE) 677 , lenientParseRules(NULL) 678 , localizations(NULL) 679 { 680 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 681 init(description, locinfo, perror, status); 682 } 683 684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 685 LocalizationInfo* info, 686 const Locale& alocale, UParseError& perror, UErrorCode& status) 687 : ruleSets(NULL) 688 , ruleSetDescriptions(NULL) 689 , numRuleSets(0) 690 , defaultRuleSet(NULL) 691 , locale(alocale) 692 , collator(NULL) 693 , decimalFormatSymbols(NULL) 694 , lenient(FALSE) 695 , lenientParseRules(NULL) 696 , localizations(NULL) 697 { 698 init(description, info, perror, status); 699 } 700 701 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 702 UParseError& perror, 703 UErrorCode& status) 704 : ruleSets(NULL) 705 , ruleSetDescriptions(NULL) 706 , numRuleSets(0) 707 , defaultRuleSet(NULL) 708 , locale(Locale::getDefault()) 709 , collator(NULL) 710 , decimalFormatSymbols(NULL) 711 , lenient(FALSE) 712 , lenientParseRules(NULL) 713 , localizations(NULL) 714 { 715 init(description, NULL, perror, status); 716 } 717 718 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 719 const Locale& aLocale, 720 UParseError& perror, 721 UErrorCode& status) 722 : ruleSets(NULL) 723 , ruleSetDescriptions(NULL) 724 , numRuleSets(0) 725 , defaultRuleSet(NULL) 726 , locale(aLocale) 727 , collator(NULL) 728 , decimalFormatSymbols(NULL) 729 , lenient(FALSE) 730 , lenientParseRules(NULL) 731 , localizations(NULL) 732 { 733 init(description, NULL, perror, status); 734 } 735 736 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 737 : ruleSets(NULL) 738 , ruleSetDescriptions(NULL) 739 , numRuleSets(0) 740 , defaultRuleSet(NULL) 741 , locale(alocale) 742 , collator(NULL) 743 , decimalFormatSymbols(NULL) 744 , lenient(FALSE) 745 , lenientParseRules(NULL) 746 , localizations(NULL) 747 { 748 if (U_FAILURE(status)) { 749 return; 750 } 751 752 const char* rules_tag = "RBNFRules"; 753 const char* fmt_tag = ""; 754 switch (tag) { 755 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 756 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 757 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 758 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 759 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 760 } 761 762 // TODO: read localization info from resource 763 LocalizationInfo* locinfo = NULL; 764 765 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 766 if (U_SUCCESS(status)) { 767 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 768 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 769 770 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 771 if (U_FAILURE(status)) { 772 ures_close(nfrb); 773 } 774 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 775 if (U_FAILURE(status)) { 776 ures_close(rbnfRules); 777 ures_close(nfrb); 778 return; 779 } 780 781 UnicodeString desc; 782 while (ures_hasNext(ruleSets)) { 783 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); 784 } 785 UParseError perror; 786 787 init (desc, locinfo, perror, status); 788 789 ures_close(ruleSets); 790 ures_close(rbnfRules); 791 } 792 ures_close(nfrb); 793 } 794 795 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 796 : NumberFormat(rhs) 797 , ruleSets(NULL) 798 , ruleSetDescriptions(NULL) 799 , numRuleSets(0) 800 , defaultRuleSet(NULL) 801 , locale(rhs.locale) 802 , collator(NULL) 803 , decimalFormatSymbols(NULL) 804 , lenient(FALSE) 805 , lenientParseRules(NULL) 806 , localizations(NULL) 807 { 808 this->operator=(rhs); 809 } 810 811 // -------- 812 813 RuleBasedNumberFormat& 814 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 815 { 816 UErrorCode status = U_ZERO_ERROR; 817 dispose(); 818 locale = rhs.locale; 819 lenient = rhs.lenient; 820 821 UnicodeString rules = rhs.getRules(); 822 UParseError perror; 823 init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 824 825 return *this; 826 } 827 828 RuleBasedNumberFormat::~RuleBasedNumberFormat() 829 { 830 dispose(); 831 } 832 833 Format* 834 RuleBasedNumberFormat::clone(void) const 835 { 836 RuleBasedNumberFormat * result = NULL; 837 UnicodeString rules = getRules(); 838 UErrorCode status = U_ZERO_ERROR; 839 UParseError perror; 840 result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status); 841 /* test for NULL */ 842 if (result == 0) { 843 status = U_MEMORY_ALLOCATION_ERROR; 844 return 0; 845 } 846 if (U_FAILURE(status)) { 847 delete result; 848 result = 0; 849 } else { 850 result->lenient = lenient; 851 } 852 return result; 853 } 854 855 UBool 856 RuleBasedNumberFormat::operator==(const Format& other) const 857 { 858 if (this == &other) { 859 return TRUE; 860 } 861 862 if (typeid(*this) == typeid(other)) { 863 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 864 if (locale == rhs.locale && 865 lenient == rhs.lenient && 866 (localizations == NULL 867 ? rhs.localizations == NULL 868 : (rhs.localizations == NULL 869 ? FALSE 870 : *localizations == rhs.localizations))) { 871 872 NFRuleSet** p = ruleSets; 873 NFRuleSet** q = rhs.ruleSets; 874 if (p == NULL) { 875 return q == NULL; 876 } else if (q == NULL) { 877 return FALSE; 878 } 879 while (*p && *q && (**p == **q)) { 880 ++p; 881 ++q; 882 } 883 return *q == NULL && *p == NULL; 884 } 885 } 886 887 return FALSE; 888 } 889 890 UnicodeString 891 RuleBasedNumberFormat::getRules() const 892 { 893 UnicodeString result; 894 if (ruleSets != NULL) { 895 for (NFRuleSet** p = ruleSets; *p; ++p) { 896 (*p)->appendRules(result); 897 } 898 } 899 return result; 900 } 901 902 UnicodeString 903 RuleBasedNumberFormat::getRuleSetName(int32_t index) const 904 { 905 if (localizations) { 906 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 907 return string; 908 } else if (ruleSets) { 909 UnicodeString result; 910 for (NFRuleSet** p = ruleSets; *p; ++p) { 911 NFRuleSet* rs = *p; 912 if (rs->isPublic()) { 913 if (--index == -1) { 914 rs->getName(result); 915 return result; 916 } 917 } 918 } 919 } 920 UnicodeString empty; 921 return empty; 922 } 923 924 int32_t 925 RuleBasedNumberFormat::getNumberOfRuleSetNames() const 926 { 927 int32_t result = 0; 928 if (localizations) { 929 result = localizations->getNumberOfRuleSets(); 930 } else if (ruleSets) { 931 for (NFRuleSet** p = ruleSets; *p; ++p) { 932 if ((**p).isPublic()) { 933 ++result; 934 } 935 } 936 } 937 return result; 938 } 939 940 int32_t 941 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 942 if (localizations) { 943 return localizations->getNumberOfDisplayLocales(); 944 } 945 return 0; 946 } 947 948 Locale 949 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 950 if (U_FAILURE(status)) { 951 return Locale(""); 952 } 953 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 954 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 955 char buffer[64]; 956 int32_t cap = name.length() + 1; 957 char* bp = buffer; 958 if (cap > 64) { 959 bp = (char *)uprv_malloc(cap); 960 if (bp == NULL) { 961 status = U_MEMORY_ALLOCATION_ERROR; 962 return Locale(""); 963 } 964 } 965 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 966 Locale retLocale(bp); 967 if (bp != buffer) { 968 uprv_free(bp); 969 } 970 return retLocale; 971 } 972 status = U_ILLEGAL_ARGUMENT_ERROR; 973 Locale retLocale; 974 return retLocale; 975 } 976 977 UnicodeString 978 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 979 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 980 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 981 int32_t len = localeName.length(); 982 UChar* localeStr = localeName.getBuffer(len + 1); 983 while (len >= 0) { 984 localeStr[len] = 0; 985 int32_t ix = localizations->indexForLocale(localeStr); 986 if (ix >= 0) { 987 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 988 return name; 989 } 990 991 // trim trailing portion, skipping over ommitted sections 992 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 993 while (len > 0 && localeStr[len-1] == 0x005F) --len; 994 } 995 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 996 return name; 997 } 998 UnicodeString bogus; 999 bogus.setToBogus(); 1000 return bogus; 1001 } 1002 1003 UnicodeString 1004 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1005 if (localizations) { 1006 UnicodeString rsn(ruleSetName); 1007 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1008 return getRuleSetDisplayName(ix, localeParam); 1009 } 1010 UnicodeString bogus; 1011 bogus.setToBogus(); 1012 return bogus; 1013 } 1014 1015 NFRuleSet* 1016 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1017 { 1018 if (U_SUCCESS(status) && ruleSets) { 1019 for (NFRuleSet** p = ruleSets; *p; ++p) { 1020 NFRuleSet* rs = *p; 1021 if (rs->isNamed(name)) { 1022 return rs; 1023 } 1024 } 1025 status = U_ILLEGAL_ARGUMENT_ERROR; 1026 } 1027 return NULL; 1028 } 1029 1030 UnicodeString& 1031 RuleBasedNumberFormat::format(int32_t number, 1032 UnicodeString& toAppendTo, 1033 FieldPosition& /* pos */) const 1034 { 1035 if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); 1036 return toAppendTo; 1037 } 1038 1039 1040 UnicodeString& 1041 RuleBasedNumberFormat::format(int64_t number, 1042 UnicodeString& toAppendTo, 1043 FieldPosition& /* pos */) const 1044 { 1045 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); 1046 return toAppendTo; 1047 } 1048 1049 1050 UnicodeString& 1051 RuleBasedNumberFormat::format(double number, 1052 UnicodeString& toAppendTo, 1053 FieldPosition& /* pos */) const 1054 { 1055 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does. 1056 if (uprv_isNaN(number)) { 1057 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal 1058 if (decFmtSyms) { 1059 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol); 1060 } 1061 } else if (defaultRuleSet) { 1062 defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); 1063 } 1064 return toAppendTo; 1065 } 1066 1067 1068 UnicodeString& 1069 RuleBasedNumberFormat::format(int32_t number, 1070 const UnicodeString& ruleSetName, 1071 UnicodeString& toAppendTo, 1072 FieldPosition& /* pos */, 1073 UErrorCode& status) const 1074 { 1075 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1076 if (U_SUCCESS(status)) { 1077 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1078 // throw new IllegalArgumentException("Can't use internal rule set"); 1079 status = U_ILLEGAL_ARGUMENT_ERROR; 1080 } else { 1081 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1082 if (rs) { 1083 rs->format((int64_t)number, toAppendTo, toAppendTo.length()); 1084 } 1085 } 1086 } 1087 return toAppendTo; 1088 } 1089 1090 1091 UnicodeString& 1092 RuleBasedNumberFormat::format(int64_t number, 1093 const UnicodeString& ruleSetName, 1094 UnicodeString& toAppendTo, 1095 FieldPosition& /* pos */, 1096 UErrorCode& status) const 1097 { 1098 if (U_SUCCESS(status)) { 1099 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1100 // throw new IllegalArgumentException("Can't use internal rule set"); 1101 status = U_ILLEGAL_ARGUMENT_ERROR; 1102 } else { 1103 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1104 if (rs) { 1105 rs->format(number, toAppendTo, toAppendTo.length()); 1106 } 1107 } 1108 } 1109 return toAppendTo; 1110 } 1111 1112 1113 // make linker happy 1114 UnicodeString& 1115 RuleBasedNumberFormat::format(const Formattable& obj, 1116 UnicodeString& toAppendTo, 1117 FieldPosition& pos, 1118 UErrorCode& status) const 1119 { 1120 return NumberFormat::format(obj, toAppendTo, pos, status); 1121 } 1122 1123 UnicodeString& 1124 RuleBasedNumberFormat::format(double number, 1125 const UnicodeString& ruleSetName, 1126 UnicodeString& toAppendTo, 1127 FieldPosition& /* pos */, 1128 UErrorCode& status) const 1129 { 1130 if (U_SUCCESS(status)) { 1131 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1132 // throw new IllegalArgumentException("Can't use internal rule set"); 1133 status = U_ILLEGAL_ARGUMENT_ERROR; 1134 } else { 1135 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1136 if (rs) { 1137 rs->format(number, toAppendTo, toAppendTo.length()); 1138 } 1139 } 1140 } 1141 return toAppendTo; 1142 } 1143 1144 void 1145 RuleBasedNumberFormat::parse(const UnicodeString& text, 1146 Formattable& result, 1147 ParsePosition& parsePosition) const 1148 { 1149 if (!ruleSets) { 1150 parsePosition.setErrorIndex(0); 1151 return; 1152 } 1153 1154 UnicodeString workingText(text, parsePosition.getIndex()); 1155 ParsePosition workingPos(0); 1156 1157 ParsePosition high_pp(0); 1158 Formattable high_result; 1159 1160 for (NFRuleSet** p = ruleSets; *p; ++p) { 1161 NFRuleSet *rp = *p; 1162 if (rp->isPublic() && rp->isParseable()) { 1163 ParsePosition working_pp(0); 1164 Formattable working_result; 1165 1166 rp->parse(workingText, working_pp, kMaxDouble, working_result); 1167 if (working_pp.getIndex() > high_pp.getIndex()) { 1168 high_pp = working_pp; 1169 high_result = working_result; 1170 1171 if (high_pp.getIndex() == workingText.length()) { 1172 break; 1173 } 1174 } 1175 } 1176 } 1177 1178 int32_t startIndex = parsePosition.getIndex(); 1179 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1180 if (high_pp.getIndex() > 0) { 1181 parsePosition.setErrorIndex(-1); 1182 } else { 1183 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1184 parsePosition.setErrorIndex(startIndex + errorIndex); 1185 } 1186 result = high_result; 1187 if (result.getType() == Formattable::kDouble) { 1188 int32_t r = (int32_t)result.getDouble(); 1189 if ((double)r == result.getDouble()) { 1190 result.setLong(r); 1191 } 1192 } 1193 } 1194 1195 #if !UCONFIG_NO_COLLATION 1196 1197 void 1198 RuleBasedNumberFormat::setLenient(UBool enabled) 1199 { 1200 lenient = enabled; 1201 if (!enabled && collator) { 1202 delete collator; 1203 collator = NULL; 1204 } 1205 } 1206 1207 #endif 1208 1209 void 1210 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1211 if (U_SUCCESS(status)) { 1212 if (ruleSetName.isEmpty()) { 1213 if (localizations) { 1214 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1215 defaultRuleSet = findRuleSet(name, status); 1216 } else { 1217 initDefaultRuleSet(); 1218 } 1219 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1220 status = U_ILLEGAL_ARGUMENT_ERROR; 1221 } else { 1222 NFRuleSet* result = findRuleSet(ruleSetName, status); 1223 if (result != NULL) { 1224 defaultRuleSet = result; 1225 } 1226 } 1227 } 1228 } 1229 1230 UnicodeString 1231 RuleBasedNumberFormat::getDefaultRuleSetName() const { 1232 UnicodeString result; 1233 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1234 defaultRuleSet->getName(result); 1235 } else { 1236 result.setToBogus(); 1237 } 1238 return result; 1239 } 1240 1241 void 1242 RuleBasedNumberFormat::initDefaultRuleSet() 1243 { 1244 defaultRuleSet = NULL; 1245 if (!ruleSets) { 1246 return; 1247 } 1248 1249 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering"); 1250 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal"); 1251 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration"); 1252 1253 NFRuleSet**p = &ruleSets[0]; 1254 while (*p) { 1255 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1256 defaultRuleSet = *p; 1257 return; 1258 } else { 1259 ++p; 1260 } 1261 } 1262 1263 defaultRuleSet = *--p; 1264 if (!defaultRuleSet->isPublic()) { 1265 while (p != ruleSets) { 1266 if ((*--p)->isPublic()) { 1267 defaultRuleSet = *p; 1268 break; 1269 } 1270 } 1271 } 1272 } 1273 1274 1275 void 1276 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1277 UParseError& pErr, UErrorCode& status) 1278 { 1279 // TODO: implement UParseError 1280 uprv_memset(&pErr, 0, sizeof(UParseError)); 1281 // Note: this can leave ruleSets == NULL, so remaining code should check 1282 if (U_FAILURE(status)) { 1283 return; 1284 } 1285 1286 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1287 1288 UnicodeString description(rules); 1289 if (!description.length()) { 1290 status = U_MEMORY_ALLOCATION_ERROR; 1291 return; 1292 } 1293 1294 // start by stripping the trailing whitespace from all the rules 1295 // (this is all the whitespace follwing each semicolon in the 1296 // description). This allows us to look for rule-set boundaries 1297 // by searching for ";%" without having to worry about whitespace 1298 // between the ; and the % 1299 stripWhitespace(description); 1300 1301 // check to see if there's a set of lenient-parse rules. If there 1302 // is, pull them out into our temporary holding place for them, 1303 // and delete them from the description before the real desciption- 1304 // parsing code sees them 1305 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1306 if (lp != -1) { 1307 // we've got to make sure we're not in the middle of a rule 1308 // (where "%%lenient-parse" would actually get treated as 1309 // rule text) 1310 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1311 // locate the beginning and end of the actual collation 1312 // rules (there may be whitespace between the name and 1313 // the first token in the description) 1314 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1315 1316 if (lpEnd == -1) { 1317 lpEnd = description.length() - 1; 1318 } 1319 int lpStart = lp + u_strlen(gLenientParse); 1320 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1321 ++lpStart; 1322 } 1323 1324 // copy out the lenient-parse rules and delete them 1325 // from the description 1326 lenientParseRules = new UnicodeString(); 1327 /* test for NULL */ 1328 if (lenientParseRules == 0) { 1329 status = U_MEMORY_ALLOCATION_ERROR; 1330 return; 1331 } 1332 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1333 1334 description.remove(lp, lpEnd + 1 - lp); 1335 } 1336 } 1337 1338 // pre-flight parsing the description and count the number of 1339 // rule sets (";%" marks the end of one rule set and the beginning 1340 // of the next) 1341 numRuleSets = 0; 1342 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1343 ++numRuleSets; 1344 ++p; 1345 } 1346 ++numRuleSets; 1347 1348 // our rule list is an array of the appropriate size 1349 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1350 /* test for NULL */ 1351 if (ruleSets == 0) { 1352 status = U_MEMORY_ALLOCATION_ERROR; 1353 return; 1354 } 1355 1356 for (int i = 0; i <= numRuleSets; ++i) { 1357 ruleSets[i] = NULL; 1358 } 1359 1360 // divide up the descriptions into individual rule-set descriptions 1361 // and store them in a temporary array. At each step, we also 1362 // new up a rule set, but all this does is initialize its name 1363 // and remove it from its description. We can't actually parse 1364 // the rest of the descriptions and finish initializing everything 1365 // because we have to know the names and locations of all the rule 1366 // sets before we can actually set everything up 1367 if(!numRuleSets) { 1368 status = U_ILLEGAL_ARGUMENT_ERROR; 1369 return; 1370 } 1371 1372 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1373 if (ruleSetDescriptions == 0) { 1374 status = U_MEMORY_ALLOCATION_ERROR; 1375 return; 1376 } 1377 1378 { 1379 int curRuleSet = 0; 1380 int32_t start = 0; 1381 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1382 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1383 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1384 if (ruleSets[curRuleSet] == 0) { 1385 status = U_MEMORY_ALLOCATION_ERROR; 1386 return; 1387 } 1388 ++curRuleSet; 1389 start = p + 1; 1390 } 1391 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1392 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1393 if (ruleSets[curRuleSet] == 0) { 1394 status = U_MEMORY_ALLOCATION_ERROR; 1395 return; 1396 } 1397 } 1398 1399 // now we can take note of the formatter's default rule set, which 1400 // is the last public rule set in the description (it's the last 1401 // rather than the first so that a user can create a new formatter 1402 // from an existing formatter and change its default behavior just 1403 // by appending more rule sets to the end) 1404 1405 // {dlf} Initialization of a fraction rule set requires the default rule 1406 // set to be known. For purposes of initialization, this is always the 1407 // last public rule set, no matter what the localization data says. 1408 initDefaultRuleSet(); 1409 1410 // finally, we can go back through the temporary descriptions 1411 // list and finish seting up the substructure (and we throw 1412 // away the temporary descriptions as we go) 1413 { 1414 for (int i = 0; i < numRuleSets; i++) { 1415 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); 1416 } 1417 } 1418 1419 // Now that the rules are initialized, the 'real' default rule 1420 // set can be adjusted by the localization data. 1421 1422 // The C code keeps the localization array as is, rather than building 1423 // a separate array of the public rule set names, so we have less work 1424 // to do here-- but we still need to check the names. 1425 1426 if (localizationInfos) { 1427 // confirm the names, if any aren't in the rules, that's an error 1428 // it is ok if the rules contain public rule sets that are not in this list 1429 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1430 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1431 NFRuleSet* rs = findRuleSet(name, status); 1432 if (rs == NULL) { 1433 break; // error 1434 } 1435 if (i == 0) { 1436 defaultRuleSet = rs; 1437 } 1438 } 1439 } else { 1440 defaultRuleSet = getDefaultRuleSet(); 1441 } 1442 } 1443 1444 void 1445 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1446 { 1447 // iterate through the characters... 1448 UnicodeString result; 1449 1450 int start = 0; 1451 while (start != -1 && start < description.length()) { 1452 // seek to the first non-whitespace character... 1453 while (start < description.length() 1454 && PatternProps::isWhiteSpace(description.charAt(start))) { 1455 ++start; 1456 } 1457 1458 // locate the next semicolon in the text and copy the text from 1459 // our current position up to that semicolon into the result 1460 int32_t p = description.indexOf(gSemiColon, start); 1461 if (p == -1) { 1462 // or if we don't find a semicolon, just copy the rest of 1463 // the string into the result 1464 result.append(description, start, description.length() - start); 1465 start = -1; 1466 } 1467 else if (p < description.length()) { 1468 result.append(description, start, p + 1 - start); 1469 start = p + 1; 1470 } 1471 1472 // when we get here, we've seeked off the end of the sring, and 1473 // we terminate the loop (we continue until *start* is -1 rather 1474 // than until *p* is -1, because otherwise we'd miss the last 1475 // rule in the description) 1476 else { 1477 start = -1; 1478 } 1479 } 1480 1481 description.setTo(result); 1482 } 1483 1484 1485 void 1486 RuleBasedNumberFormat::dispose() 1487 { 1488 if (ruleSets) { 1489 for (NFRuleSet** p = ruleSets; *p; ++p) { 1490 delete *p; 1491 } 1492 uprv_free(ruleSets); 1493 ruleSets = NULL; 1494 } 1495 1496 if (ruleSetDescriptions) { 1497 delete [] ruleSetDescriptions; 1498 } 1499 1500 #if !UCONFIG_NO_COLLATION 1501 delete collator; 1502 #endif 1503 collator = NULL; 1504 1505 delete decimalFormatSymbols; 1506 decimalFormatSymbols = NULL; 1507 1508 delete lenientParseRules; 1509 lenientParseRules = NULL; 1510 1511 if (localizations) localizations = localizations->unref(); 1512 } 1513 1514 1515 //----------------------------------------------------------------------- 1516 // package-internal API 1517 //----------------------------------------------------------------------- 1518 1519 /** 1520 * Returns the collator to use for lenient parsing. The collator is lazily created: 1521 * this function creates it the first time it's called. 1522 * @return The collator to use for lenient parsing, or null if lenient parsing 1523 * is turned off. 1524 */ 1525 Collator* 1526 RuleBasedNumberFormat::getCollator() const 1527 { 1528 #if !UCONFIG_NO_COLLATION 1529 if (!ruleSets) { 1530 return NULL; 1531 } 1532 1533 // lazy-evaulate the collator 1534 if (collator == NULL && lenient) { 1535 // create a default collator based on the formatter's locale, 1536 // then pull out that collator's rules, append any additional 1537 // rules specified in the description, and create a _new_ 1538 // collator based on the combinaiton of those rules 1539 1540 UErrorCode status = U_ZERO_ERROR; 1541 1542 Collator* temp = Collator::createInstance(locale, status); 1543 RuleBasedCollator* newCollator; 1544 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { 1545 if (lenientParseRules) { 1546 UnicodeString rules(newCollator->getRules()); 1547 rules.append(*lenientParseRules); 1548 1549 newCollator = new RuleBasedCollator(rules, status); 1550 // Exit if newCollator could not be created. 1551 if (newCollator == NULL) { 1552 return NULL; 1553 } 1554 } else { 1555 temp = NULL; 1556 } 1557 if (U_SUCCESS(status)) { 1558 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1559 // cast away const 1560 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1561 } else { 1562 delete newCollator; 1563 } 1564 } 1565 delete temp; 1566 } 1567 #endif 1568 1569 // if lenient-parse mode is off, this will be null 1570 // (see setLenientParseMode()) 1571 return collator; 1572 } 1573 1574 1575 /** 1576 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1577 * instances owned by this formatter. This object is lazily created: this function 1578 * creates it the first time it's called. 1579 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat 1580 * instances owned by this formatter. 1581 */ 1582 DecimalFormatSymbols* 1583 RuleBasedNumberFormat::getDecimalFormatSymbols() const 1584 { 1585 // lazy-evaluate the DecimalFormatSymbols object. This object 1586 // is shared by all DecimalFormat instances belonging to this 1587 // formatter 1588 if (decimalFormatSymbols == NULL) { 1589 UErrorCode status = U_ZERO_ERROR; 1590 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); 1591 if (U_SUCCESS(status)) { 1592 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp; 1593 } else { 1594 delete temp; 1595 } 1596 } 1597 return decimalFormatSymbols; 1598 } 1599 1600 // De-owning the current localized symbols and adopt the new symbols. 1601 void 1602 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1603 { 1604 if (symbolsToAdopt == NULL) { 1605 return; // do not allow caller to set decimalFormatSymbols to NULL 1606 } 1607 1608 if (decimalFormatSymbols != NULL) { 1609 delete decimalFormatSymbols; 1610 } 1611 1612 decimalFormatSymbols = symbolsToAdopt; 1613 1614 { 1615 // Apply the new decimalFormatSymbols by reparsing the rulesets 1616 UErrorCode status = U_ZERO_ERROR; 1617 1618 for (int32_t i = 0; i < numRuleSets; i++) { 1619 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); 1620 } 1621 } 1622 } 1623 1624 // Setting the symbols is equlivalent to adopting a newly created localized symbols. 1625 void 1626 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1627 { 1628 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1629 } 1630 1631 U_NAMESPACE_END 1632 1633 /* U_HAVE_RBNF */ 1634 #endif 1635