1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2015, International Business Machines Corporation 4 * and others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 #include "unicode/utypes.h" 9 #include "utypeinfo.h" // for 'typeid' to work 10 11 #include "unicode/rbnf.h" 12 13 #if U_HAVE_RBNF 14 15 #include "unicode/normlzr.h" 16 #include "unicode/plurfmt.h" 17 #include "unicode/tblcoll.h" 18 #include "unicode/uchar.h" 19 #include "unicode/ucol.h" 20 #include "unicode/uloc.h" 21 #include "unicode/unum.h" 22 #include "unicode/ures.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utf16.h" 25 #include "unicode/udata.h" 26 #include "unicode/udisplaycontext.h" 27 #include "unicode/brkiter.h" 28 #include "nfrs.h" 29 30 #include "cmemory.h" 31 #include "cstring.h" 32 #include "patternprops.h" 33 #include "uresimp.h" 34 35 // debugging 36 // #define RBNF_DEBUG 37 38 #ifdef RBNF_DEBUG 39 #include <stdio.h> 40 #endif 41 42 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 43 44 static const UChar gPercentPercent[] = 45 { 46 0x25, 0x25, 0 47 }; /* "%%" */ 48 49 // All urbnf objects are created through openRules, so we init all of the 50 // Unicode string constants required by rbnf, nfrs, or nfr here. 51 static const UChar gLenientParse[] = 52 { 53 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 54 }; /* "%%lenient-parse:" */ 55 static const UChar gSemiColon = 0x003B; 56 static const UChar gSemiPercent[] = 57 { 58 0x3B, 0x25, 0 59 }; /* ";%" */ 60 61 #define kSomeNumberOfBitsDiv2 22 62 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 63 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 64 65 U_NAMESPACE_BEGIN 66 67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 68 69 /* 70 This is a utility class. It does not use ICU's RTTI. 71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 72 Please make sure that intltest passes on Windows in Release mode, 73 since the string pooling per compilation unit will mess up how RTTI works. 74 The RTTI code was also removed due to lack of code coverage. 75 */ 76 class LocalizationInfo : public UMemory { 77 protected: 78 virtual ~LocalizationInfo(); 79 uint32_t refcount; 80 81 public: 82 LocalizationInfo() : refcount(0) {} 83 84 LocalizationInfo* ref(void) { 85 ++refcount; 86 return this; 87 } 88 89 LocalizationInfo* unref(void) { 90 if (refcount && --refcount == 0) { 91 delete this; 92 } 93 return NULL; 94 } 95 96 virtual UBool operator==(const LocalizationInfo* rhs) const; 97 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 98 99 virtual int32_t getNumberOfRuleSets(void) const = 0; 100 virtual const UChar* getRuleSetName(int32_t index) const = 0; 101 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 102 virtual const UChar* getLocaleName(int32_t index) const = 0; 103 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 104 105 virtual int32_t indexForLocale(const UChar* locale) const; 106 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 107 108 // virtual UClassID getDynamicClassID() const = 0; 109 // static UClassID getStaticClassID(void); 110 }; 111 112 LocalizationInfo::~LocalizationInfo() {} 113 114 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 115 116 // if both strings are NULL, this returns TRUE 117 static UBool 118 streq(const UChar* lhs, const UChar* rhs) { 119 if (rhs == lhs) { 120 return TRUE; 121 } 122 if (lhs && rhs) { 123 return u_strcmp(lhs, rhs) == 0; 124 } 125 return FALSE; 126 } 127 128 UBool 129 LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 130 if (rhs) { 131 if (this == rhs) { 132 return TRUE; 133 } 134 135 int32_t rsc = getNumberOfRuleSets(); 136 if (rsc == rhs->getNumberOfRuleSets()) { 137 for (int i = 0; i < rsc; ++i) { 138 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 139 return FALSE; 140 } 141 } 142 int32_t dlc = getNumberOfDisplayLocales(); 143 if (dlc == rhs->getNumberOfDisplayLocales()) { 144 for (int i = 0; i < dlc; ++i) { 145 const UChar* locale = getLocaleName(i); 146 int32_t ix = rhs->indexForLocale(locale); 147 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 148 if (!streq(locale, rhs->getLocaleName(ix))) { 149 return FALSE; 150 } 151 for (int j = 0; j < rsc; ++j) { 152 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 153 return FALSE; 154 } 155 } 156 } 157 return TRUE; 158 } 159 } 160 } 161 return FALSE; 162 } 163 164 int32_t 165 LocalizationInfo::indexForLocale(const UChar* locale) const { 166 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 167 if (streq(locale, getLocaleName(i))) { 168 return i; 169 } 170 } 171 return -1; 172 } 173 174 int32_t 175 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 176 if (ruleset) { 177 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 178 if (streq(ruleset, getRuleSetName(i))) { 179 return i; 180 } 181 } 182 } 183 return -1; 184 } 185 186 187 typedef void (*Fn_Deleter)(void*); 188 189 class VArray { 190 void** buf; 191 int32_t cap; 192 int32_t size; 193 Fn_Deleter deleter; 194 public: 195 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 196 197 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 198 199 ~VArray() { 200 if (deleter) { 201 for (int i = 0; i < size; ++i) { 202 (*deleter)(buf[i]); 203 } 204 } 205 uprv_free(buf); 206 } 207 208 int32_t length() { 209 return size; 210 } 211 212 void add(void* elem, UErrorCode& status) { 213 if (U_SUCCESS(status)) { 214 if (size == cap) { 215 if (cap == 0) { 216 cap = 1; 217 } else if (cap < 256) { 218 cap *= 2; 219 } else { 220 cap += 256; 221 } 222 if (buf == NULL) { 223 buf = (void**)uprv_malloc(cap * sizeof(void*)); 224 } else { 225 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 226 } 227 if (buf == NULL) { 228 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 229 status = U_MEMORY_ALLOCATION_ERROR; 230 return; 231 } 232 void* start = &buf[size]; 233 size_t count = (cap - size) * sizeof(void*); 234 uprv_memset(start, 0, count); // fill with nulls, just because 235 } 236 buf[size++] = elem; 237 } 238 } 239 240 void** release(void) { 241 void** result = buf; 242 buf = NULL; 243 cap = 0; 244 size = 0; 245 return result; 246 } 247 }; 248 249 class LocDataParser; 250 251 class StringLocalizationInfo : public LocalizationInfo { 252 UChar* info; 253 UChar*** data; 254 int32_t numRuleSets; 255 int32_t numLocales; 256 257 friend class LocDataParser; 258 259 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 260 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 261 { 262 } 263 264 public: 265 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 266 267 virtual ~StringLocalizationInfo(); 268 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 269 virtual const UChar* getRuleSetName(int32_t index) const; 270 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 271 virtual const UChar* getLocaleName(int32_t index) const; 272 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 273 274 // virtual UClassID getDynamicClassID() const; 275 // static UClassID getStaticClassID(void); 276 277 private: 278 void init(UErrorCode& status) const; 279 }; 280 281 282 enum { 283 OPEN_ANGLE = 0x003c, /* '<' */ 284 CLOSE_ANGLE = 0x003e, /* '>' */ 285 COMMA = 0x002c, 286 TICK = 0x0027, 287 QUOTE = 0x0022, 288 SPACE = 0x0020 289 }; 290 291 /** 292 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 293 */ 294 class LocDataParser { 295 UChar* data; 296 const UChar* e; 297 UChar* p; 298 UChar ch; 299 UParseError& pe; 300 UErrorCode& ec; 301 302 public: 303 LocDataParser(UParseError& parseError, UErrorCode& status) 304 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 305 ~LocDataParser() {} 306 307 /* 308 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 309 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 310 */ 311 StringLocalizationInfo* parse(UChar* data, int32_t len); 312 313 private: 314 315 void inc(void) { ++p; ch = 0xffff; } 316 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } 317 UBool check(UChar c) { return p < e && (ch == c || *p == c); } 318 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} 319 UBool inList(UChar c, const UChar* list) const { 320 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; 321 while (*list && *list != c) ++list; return *list == c; 322 } 323 void parseError(const char* msg); 324 325 StringLocalizationInfo* doParse(void); 326 327 UChar** nextArray(int32_t& requiredLength); 328 UChar* nextString(void); 329 }; 330 331 #ifdef RBNF_DEBUG 332 #define ERROR(msg) parseError(msg); return NULL; 333 #define EXPLANATION_ARG explanationArg 334 #else 335 #define ERROR(msg) parseError(NULL); return NULL; 336 #define EXPLANATION_ARG 337 #endif 338 339 340 static const UChar DQUOTE_STOPLIST[] = { 341 QUOTE, 0 342 }; 343 344 static const UChar SQUOTE_STOPLIST[] = { 345 TICK, 0 346 }; 347 348 static const UChar NOQUOTE_STOPLIST[] = { 349 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 350 }; 351 352 static void 353 DeleteFn(void* p) { 354 uprv_free(p); 355 } 356 357 StringLocalizationInfo* 358 LocDataParser::parse(UChar* _data, int32_t len) { 359 if (U_FAILURE(ec)) { 360 if (_data) uprv_free(_data); 361 return NULL; 362 } 363 364 pe.line = 0; 365 pe.offset = -1; 366 pe.postContext[0] = 0; 367 pe.preContext[0] = 0; 368 369 if (_data == NULL) { 370 ec = U_ILLEGAL_ARGUMENT_ERROR; 371 return NULL; 372 } 373 374 if (len <= 0) { 375 ec = U_ILLEGAL_ARGUMENT_ERROR; 376 uprv_free(_data); 377 return NULL; 378 } 379 380 data = _data; 381 e = data + len; 382 p = _data; 383 ch = 0xffff; 384 385 return doParse(); 386 } 387 388 389 StringLocalizationInfo* 390 LocDataParser::doParse(void) { 391 skipWhitespace(); 392 if (!checkInc(OPEN_ANGLE)) { 393 ERROR("Missing open angle"); 394 } else { 395 VArray array(DeleteFn); 396 UBool mightHaveNext = TRUE; 397 int32_t requiredLength = -1; 398 while (mightHaveNext) { 399 mightHaveNext = FALSE; 400 UChar** elem = nextArray(requiredLength); 401 skipWhitespace(); 402 UBool haveComma = check(COMMA); 403 if (elem) { 404 array.add(elem, ec); 405 if (haveComma) { 406 inc(); 407 mightHaveNext = TRUE; 408 } 409 } else if (haveComma) { 410 ERROR("Unexpected character"); 411 } 412 } 413 414 skipWhitespace(); 415 if (!checkInc(CLOSE_ANGLE)) { 416 if (check(OPEN_ANGLE)) { 417 ERROR("Missing comma in outer array"); 418 } else { 419 ERROR("Missing close angle bracket in outer array"); 420 } 421 } 422 423 skipWhitespace(); 424 if (p != e) { 425 ERROR("Extra text after close of localization data"); 426 } 427 428 array.add(NULL, ec); 429 if (U_SUCCESS(ec)) { 430 int32_t numLocs = array.length() - 2; // subtract first, NULL 431 UChar*** result = (UChar***)array.release(); 432 433 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 434 } 435 } 436 437 ERROR("Unknown error"); 438 } 439 440 UChar** 441 LocDataParser::nextArray(int32_t& requiredLength) { 442 if (U_FAILURE(ec)) { 443 return NULL; 444 } 445 446 skipWhitespace(); 447 if (!checkInc(OPEN_ANGLE)) { 448 ERROR("Missing open angle"); 449 } 450 451 VArray array; 452 UBool mightHaveNext = TRUE; 453 while (mightHaveNext) { 454 mightHaveNext = FALSE; 455 UChar* elem = nextString(); 456 skipWhitespace(); 457 UBool haveComma = check(COMMA); 458 if (elem) { 459 array.add(elem, ec); 460 if (haveComma) { 461 inc(); 462 mightHaveNext = TRUE; 463 } 464 } else if (haveComma) { 465 ERROR("Unexpected comma"); 466 } 467 } 468 skipWhitespace(); 469 if (!checkInc(CLOSE_ANGLE)) { 470 if (check(OPEN_ANGLE)) { 471 ERROR("Missing close angle bracket in inner array"); 472 } else { 473 ERROR("Missing comma in inner array"); 474 } 475 } 476 477 array.add(NULL, ec); 478 if (U_SUCCESS(ec)) { 479 if (requiredLength == -1) { 480 requiredLength = array.length() + 1; 481 } else if (array.length() != requiredLength) { 482 ec = U_ILLEGAL_ARGUMENT_ERROR; 483 ERROR("Array not of required length"); 484 } 485 486 return (UChar**)array.release(); 487 } 488 ERROR("Unknown Error"); 489 } 490 491 UChar* 492 LocDataParser::nextString() { 493 UChar* result = NULL; 494 495 skipWhitespace(); 496 if (p < e) { 497 const UChar* terminators; 498 UChar c = *p; 499 UBool haveQuote = c == QUOTE || c == TICK; 500 if (haveQuote) { 501 inc(); 502 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 503 } else { 504 terminators = NOQUOTE_STOPLIST; 505 } 506 UChar* start = p; 507 while (p < e && !inList(*p, terminators)) ++p; 508 if (p == e) { 509 ERROR("Unexpected end of data"); 510 } 511 512 UChar x = *p; 513 if (p > start) { 514 ch = x; 515 *p = 0x0; // terminate by writing to data 516 result = start; // just point into data 517 } 518 if (haveQuote) { 519 if (x != c) { 520 ERROR("Missing matching quote"); 521 } else if (p == start) { 522 ERROR("Empty string"); 523 } 524 inc(); 525 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 526 ERROR("Unexpected character in string"); 527 } 528 } 529 530 // ok for there to be no next string 531 return result; 532 } 533 534 void LocDataParser::parseError(const char* EXPLANATION_ARG) 535 { 536 if (!data) { 537 return; 538 } 539 540 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 541 if (start < data) { 542 start = data; 543 } 544 for (UChar* x = p; --x >= start;) { 545 if (!*x) { 546 start = x+1; 547 break; 548 } 549 } 550 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 551 if (limit > e) { 552 limit = e; 553 } 554 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 555 pe.preContext[p-start] = 0; 556 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 557 pe.postContext[limit-p] = 0; 558 pe.offset = (int32_t)(p - data); 559 560 #ifdef RBNF_DEBUG 561 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data); 562 563 UnicodeString msg; 564 msg.append(start, p - start); 565 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 566 msg.append(p, limit-p); 567 msg.append(UNICODE_STRING_SIMPLE("'")); 568 569 char buf[128]; 570 int32_t len = msg.extract(0, msg.length(), buf, 128); 571 if (len >= 128) { 572 buf[127] = 0; 573 } else { 574 buf[len] = 0; 575 } 576 fprintf(stderr, "%s\n", buf); 577 fflush(stderr); 578 #endif 579 580 uprv_free(data); 581 data = NULL; 582 p = NULL; 583 e = NULL; 584 585 if (U_SUCCESS(ec)) { 586 ec = U_PARSE_ERROR; 587 } 588 } 589 590 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 591 592 StringLocalizationInfo* 593 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 594 if (U_FAILURE(status)) { 595 return NULL; 596 } 597 598 int32_t len = info.length(); 599 if (len == 0) { 600 return NULL; // no error; 601 } 602 603 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 604 if (!p) { 605 status = U_MEMORY_ALLOCATION_ERROR; 606 return NULL; 607 } 608 info.extract(p, len, status); 609 if (!U_FAILURE(status)) { 610 status = U_ZERO_ERROR; // clear warning about non-termination 611 } 612 613 LocDataParser parser(perror, status); 614 return parser.parse(p, len); 615 } 616 617 StringLocalizationInfo::~StringLocalizationInfo() { 618 for (UChar*** p = (UChar***)data; *p; ++p) { 619 // remaining data is simply pointer into our unicode string data. 620 if (*p) uprv_free(*p); 621 } 622 if (data) uprv_free(data); 623 if (info) uprv_free(info); 624 } 625 626 627 const UChar* 628 StringLocalizationInfo::getRuleSetName(int32_t index) const { 629 if (index >= 0 && index < getNumberOfRuleSets()) { 630 return data[0][index]; 631 } 632 return NULL; 633 } 634 635 const UChar* 636 StringLocalizationInfo::getLocaleName(int32_t index) const { 637 if (index >= 0 && index < getNumberOfDisplayLocales()) { 638 return data[index+1][0]; 639 } 640 return NULL; 641 } 642 643 const UChar* 644 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 645 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 646 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 647 return data[localeIndex+1][ruleIndex+1]; 648 } 649 return NULL; 650 } 651 652 // ---------- 653 654 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 655 const UnicodeString& locs, 656 const Locale& alocale, UParseError& perror, UErrorCode& status) 657 : ruleSets(NULL) 658 , ruleSetDescriptions(NULL) 659 , numRuleSets(0) 660 , defaultRuleSet(NULL) 661 , locale(alocale) 662 , collator(NULL) 663 , decimalFormatSymbols(NULL) 664 , defaultInfinityRule(NULL) 665 , defaultNaNRule(NULL) 666 , lenient(FALSE) 667 , lenientParseRules(NULL) 668 , localizations(NULL) 669 , capitalizationInfoSet(FALSE) 670 , capitalizationForUIListMenu(FALSE) 671 , capitalizationForStandAlone(FALSE) 672 , capitalizationBrkIter(NULL) 673 { 674 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 675 init(description, locinfo, perror, status); 676 } 677 678 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 679 const UnicodeString& locs, 680 UParseError& perror, UErrorCode& status) 681 : ruleSets(NULL) 682 , ruleSetDescriptions(NULL) 683 , numRuleSets(0) 684 , defaultRuleSet(NULL) 685 , locale(Locale::getDefault()) 686 , collator(NULL) 687 , decimalFormatSymbols(NULL) 688 , defaultInfinityRule(NULL) 689 , defaultNaNRule(NULL) 690 , lenient(FALSE) 691 , lenientParseRules(NULL) 692 , localizations(NULL) 693 , capitalizationInfoSet(FALSE) 694 , capitalizationForUIListMenu(FALSE) 695 , capitalizationForStandAlone(FALSE) 696 , capitalizationBrkIter(NULL) 697 { 698 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 699 init(description, locinfo, perror, status); 700 } 701 702 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 703 LocalizationInfo* info, 704 const Locale& alocale, UParseError& perror, UErrorCode& status) 705 : ruleSets(NULL) 706 , ruleSetDescriptions(NULL) 707 , numRuleSets(0) 708 , defaultRuleSet(NULL) 709 , locale(alocale) 710 , collator(NULL) 711 , decimalFormatSymbols(NULL) 712 , defaultInfinityRule(NULL) 713 , defaultNaNRule(NULL) 714 , lenient(FALSE) 715 , lenientParseRules(NULL) 716 , localizations(NULL) 717 , capitalizationInfoSet(FALSE) 718 , capitalizationForUIListMenu(FALSE) 719 , capitalizationForStandAlone(FALSE) 720 , capitalizationBrkIter(NULL) 721 { 722 init(description, info, perror, status); 723 } 724 725 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 726 UParseError& perror, 727 UErrorCode& status) 728 : ruleSets(NULL) 729 , ruleSetDescriptions(NULL) 730 , numRuleSets(0) 731 , defaultRuleSet(NULL) 732 , locale(Locale::getDefault()) 733 , collator(NULL) 734 , decimalFormatSymbols(NULL) 735 , defaultInfinityRule(NULL) 736 , defaultNaNRule(NULL) 737 , lenient(FALSE) 738 , lenientParseRules(NULL) 739 , localizations(NULL) 740 , capitalizationInfoSet(FALSE) 741 , capitalizationForUIListMenu(FALSE) 742 , capitalizationForStandAlone(FALSE) 743 , capitalizationBrkIter(NULL) 744 { 745 init(description, NULL, perror, status); 746 } 747 748 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 749 const Locale& aLocale, 750 UParseError& perror, 751 UErrorCode& status) 752 : ruleSets(NULL) 753 , ruleSetDescriptions(NULL) 754 , numRuleSets(0) 755 , defaultRuleSet(NULL) 756 , locale(aLocale) 757 , collator(NULL) 758 , decimalFormatSymbols(NULL) 759 , defaultInfinityRule(NULL) 760 , defaultNaNRule(NULL) 761 , lenient(FALSE) 762 , lenientParseRules(NULL) 763 , localizations(NULL) 764 , capitalizationInfoSet(FALSE) 765 , capitalizationForUIListMenu(FALSE) 766 , capitalizationForStandAlone(FALSE) 767 , capitalizationBrkIter(NULL) 768 { 769 init(description, NULL, perror, status); 770 } 771 772 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 773 : ruleSets(NULL) 774 , ruleSetDescriptions(NULL) 775 , numRuleSets(0) 776 , defaultRuleSet(NULL) 777 , locale(alocale) 778 , collator(NULL) 779 , decimalFormatSymbols(NULL) 780 , defaultInfinityRule(NULL) 781 , defaultNaNRule(NULL) 782 , lenient(FALSE) 783 , lenientParseRules(NULL) 784 , localizations(NULL) 785 , capitalizationInfoSet(FALSE) 786 , capitalizationForUIListMenu(FALSE) 787 , capitalizationForStandAlone(FALSE) 788 , capitalizationBrkIter(NULL) 789 { 790 if (U_FAILURE(status)) { 791 return; 792 } 793 794 const char* rules_tag = "RBNFRules"; 795 const char* fmt_tag = ""; 796 switch (tag) { 797 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 798 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 799 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 800 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 801 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 802 } 803 804 // TODO: read localization info from resource 805 LocalizationInfo* locinfo = NULL; 806 807 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 808 if (U_SUCCESS(status)) { 809 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 810 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 811 812 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 813 if (U_FAILURE(status)) { 814 ures_close(nfrb); 815 } 816 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 817 if (U_FAILURE(status)) { 818 ures_close(rbnfRules); 819 ures_close(nfrb); 820 return; 821 } 822 823 UnicodeString desc; 824 while (ures_hasNext(ruleSets)) { 825 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); 826 } 827 UParseError perror; 828 829 init(desc, locinfo, perror, status); 830 831 ures_close(ruleSets); 832 ures_close(rbnfRules); 833 } 834 ures_close(nfrb); 835 } 836 837 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 838 : NumberFormat(rhs) 839 , ruleSets(NULL) 840 , ruleSetDescriptions(NULL) 841 , numRuleSets(0) 842 , defaultRuleSet(NULL) 843 , locale(rhs.locale) 844 , collator(NULL) 845 , decimalFormatSymbols(NULL) 846 , defaultInfinityRule(NULL) 847 , defaultNaNRule(NULL) 848 , lenient(FALSE) 849 , lenientParseRules(NULL) 850 , localizations(NULL) 851 , capitalizationInfoSet(FALSE) 852 , capitalizationForUIListMenu(FALSE) 853 , capitalizationForStandAlone(FALSE) 854 , capitalizationBrkIter(NULL) 855 { 856 this->operator=(rhs); 857 } 858 859 // -------- 860 861 RuleBasedNumberFormat& 862 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 863 { 864 if (this == &rhs) { 865 return *this; 866 } 867 NumberFormat::operator=(rhs); 868 UErrorCode status = U_ZERO_ERROR; 869 dispose(); 870 locale = rhs.locale; 871 lenient = rhs.lenient; 872 873 UParseError perror; 874 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); 875 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 876 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); 877 878 capitalizationInfoSet = rhs.capitalizationInfoSet; 879 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; 880 capitalizationForStandAlone = rhs.capitalizationForStandAlone; 881 #if !UCONFIG_NO_BREAK_ITERATION 882 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL; 883 #endif 884 885 return *this; 886 } 887 888 RuleBasedNumberFormat::~RuleBasedNumberFormat() 889 { 890 dispose(); 891 } 892 893 Format* 894 RuleBasedNumberFormat::clone(void) const 895 { 896 return new RuleBasedNumberFormat(*this); 897 } 898 899 UBool 900 RuleBasedNumberFormat::operator==(const Format& other) const 901 { 902 if (this == &other) { 903 return TRUE; 904 } 905 906 if (typeid(*this) == typeid(other)) { 907 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 908 // test for capitalization info equality is adequately handled 909 // by the NumberFormat test for fCapitalizationContext equality; 910 // the info here is just derived from that. 911 if (locale == rhs.locale && 912 lenient == rhs.lenient && 913 (localizations == NULL 914 ? rhs.localizations == NULL 915 : (rhs.localizations == NULL 916 ? FALSE 917 : *localizations == rhs.localizations))) { 918 919 NFRuleSet** p = ruleSets; 920 NFRuleSet** q = rhs.ruleSets; 921 if (p == NULL) { 922 return q == NULL; 923 } else if (q == NULL) { 924 return FALSE; 925 } 926 while (*p && *q && (**p == **q)) { 927 ++p; 928 ++q; 929 } 930 return *q == NULL && *p == NULL; 931 } 932 } 933 934 return FALSE; 935 } 936 937 UnicodeString 938 RuleBasedNumberFormat::getRules() const 939 { 940 UnicodeString result; 941 if (ruleSets != NULL) { 942 for (NFRuleSet** p = ruleSets; *p; ++p) { 943 (*p)->appendRules(result); 944 } 945 } 946 return result; 947 } 948 949 UnicodeString 950 RuleBasedNumberFormat::getRuleSetName(int32_t index) const 951 { 952 if (localizations) { 953 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 954 return string; 955 } 956 else if (ruleSets) { 957 UnicodeString result; 958 for (NFRuleSet** p = ruleSets; *p; ++p) { 959 NFRuleSet* rs = *p; 960 if (rs->isPublic()) { 961 if (--index == -1) { 962 rs->getName(result); 963 return result; 964 } 965 } 966 } 967 } 968 UnicodeString empty; 969 return empty; 970 } 971 972 int32_t 973 RuleBasedNumberFormat::getNumberOfRuleSetNames() const 974 { 975 int32_t result = 0; 976 if (localizations) { 977 result = localizations->getNumberOfRuleSets(); 978 } 979 else if (ruleSets) { 980 for (NFRuleSet** p = ruleSets; *p; ++p) { 981 if ((**p).isPublic()) { 982 ++result; 983 } 984 } 985 } 986 return result; 987 } 988 989 int32_t 990 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 991 if (localizations) { 992 return localizations->getNumberOfDisplayLocales(); 993 } 994 return 0; 995 } 996 997 Locale 998 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 999 if (U_FAILURE(status)) { 1000 return Locale(""); 1001 } 1002 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 1003 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 1004 char buffer[64]; 1005 int32_t cap = name.length() + 1; 1006 char* bp = buffer; 1007 if (cap > 64) { 1008 bp = (char *)uprv_malloc(cap); 1009 if (bp == NULL) { 1010 status = U_MEMORY_ALLOCATION_ERROR; 1011 return Locale(""); 1012 } 1013 } 1014 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 1015 Locale retLocale(bp); 1016 if (bp != buffer) { 1017 uprv_free(bp); 1018 } 1019 return retLocale; 1020 } 1021 status = U_ILLEGAL_ARGUMENT_ERROR; 1022 Locale retLocale; 1023 return retLocale; 1024 } 1025 1026 UnicodeString 1027 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 1028 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 1029 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 1030 int32_t len = localeName.length(); 1031 UChar* localeStr = localeName.getBuffer(len + 1); 1032 while (len >= 0) { 1033 localeStr[len] = 0; 1034 int32_t ix = localizations->indexForLocale(localeStr); 1035 if (ix >= 0) { 1036 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 1037 return name; 1038 } 1039 1040 // trim trailing portion, skipping over ommitted sections 1041 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1042 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1043 } 1044 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 1045 return name; 1046 } 1047 UnicodeString bogus; 1048 bogus.setToBogus(); 1049 return bogus; 1050 } 1051 1052 UnicodeString 1053 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1054 if (localizations) { 1055 UnicodeString rsn(ruleSetName); 1056 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1057 return getRuleSetDisplayName(ix, localeParam); 1058 } 1059 UnicodeString bogus; 1060 bogus.setToBogus(); 1061 return bogus; 1062 } 1063 1064 NFRuleSet* 1065 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1066 { 1067 if (U_SUCCESS(status) && ruleSets) { 1068 for (NFRuleSet** p = ruleSets; *p; ++p) { 1069 NFRuleSet* rs = *p; 1070 if (rs->isNamed(name)) { 1071 return rs; 1072 } 1073 } 1074 status = U_ILLEGAL_ARGUMENT_ERROR; 1075 } 1076 return NULL; 1077 } 1078 1079 UnicodeString& 1080 RuleBasedNumberFormat::format(int32_t number, 1081 UnicodeString& toAppendTo, 1082 FieldPosition& /* pos */) const 1083 { 1084 if (defaultRuleSet) { 1085 UErrorCode status = U_ZERO_ERROR; 1086 int32_t startPos = toAppendTo.length(); 1087 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); 1088 adjustForCapitalizationContext(startPos, toAppendTo); 1089 } 1090 return toAppendTo; 1091 } 1092 1093 1094 UnicodeString& 1095 RuleBasedNumberFormat::format(int64_t number, 1096 UnicodeString& toAppendTo, 1097 FieldPosition& /* pos */) const 1098 { 1099 if (defaultRuleSet) { 1100 UErrorCode status = U_ZERO_ERROR; 1101 int32_t startPos = toAppendTo.length(); 1102 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); 1103 adjustForCapitalizationContext(startPos, toAppendTo); 1104 } 1105 return toAppendTo; 1106 } 1107 1108 1109 UnicodeString& 1110 RuleBasedNumberFormat::format(double number, 1111 UnicodeString& toAppendTo, 1112 FieldPosition& /* pos */) const 1113 { 1114 int32_t startPos = toAppendTo.length(); 1115 if (defaultRuleSet) { 1116 UErrorCode status = U_ZERO_ERROR; 1117 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); 1118 } 1119 return adjustForCapitalizationContext(startPos, toAppendTo); 1120 } 1121 1122 1123 UnicodeString& 1124 RuleBasedNumberFormat::format(int32_t number, 1125 const UnicodeString& ruleSetName, 1126 UnicodeString& toAppendTo, 1127 FieldPosition& /* pos */, 1128 UErrorCode& status) const 1129 { 1130 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1131 if (U_SUCCESS(status)) { 1132 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1133 // throw new IllegalArgumentException("Can't use internal rule set"); 1134 status = U_ILLEGAL_ARGUMENT_ERROR; 1135 } else { 1136 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1137 if (rs) { 1138 int32_t startPos = toAppendTo.length(); 1139 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); 1140 adjustForCapitalizationContext(startPos, toAppendTo); 1141 } 1142 } 1143 } 1144 return toAppendTo; 1145 } 1146 1147 1148 UnicodeString& 1149 RuleBasedNumberFormat::format(int64_t number, 1150 const UnicodeString& ruleSetName, 1151 UnicodeString& toAppendTo, 1152 FieldPosition& /* pos */, 1153 UErrorCode& status) const 1154 { 1155 if (U_SUCCESS(status)) { 1156 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1157 // throw new IllegalArgumentException("Can't use internal rule set"); 1158 status = U_ILLEGAL_ARGUMENT_ERROR; 1159 } else { 1160 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1161 if (rs) { 1162 int32_t startPos = toAppendTo.length(); 1163 rs->format(number, toAppendTo, toAppendTo.length(), 0, status); 1164 adjustForCapitalizationContext(startPos, toAppendTo); 1165 } 1166 } 1167 } 1168 return toAppendTo; 1169 } 1170 1171 1172 UnicodeString& 1173 RuleBasedNumberFormat::format(double number, 1174 const UnicodeString& ruleSetName, 1175 UnicodeString& toAppendTo, 1176 FieldPosition& /* pos */, 1177 UErrorCode& status) const 1178 { 1179 if (U_SUCCESS(status)) { 1180 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1181 // throw new IllegalArgumentException("Can't use internal rule set"); 1182 status = U_ILLEGAL_ARGUMENT_ERROR; 1183 } else { 1184 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1185 if (rs) { 1186 int32_t startPos = toAppendTo.length(); 1187 rs->format(number, toAppendTo, toAppendTo.length(), 0, status); 1188 adjustForCapitalizationContext(startPos, toAppendTo); 1189 } 1190 } 1191 } 1192 return toAppendTo; 1193 } 1194 1195 UnicodeString& 1196 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, 1197 UnicodeString& currentResult) const 1198 { 1199 #if !UCONFIG_NO_BREAK_ITERATION 1200 if (startPos==0 && currentResult.length() > 0) { 1201 // capitalize currentResult according to context 1202 UChar32 ch = currentResult.char32At(0); 1203 UErrorCode status = U_ZERO_ERROR; 1204 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1205 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL && 1206 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1207 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1208 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1209 // titlecase first word of currentResult, here use sentence iterator unlike current implementations 1210 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format 1211 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 1212 } 1213 } 1214 #endif 1215 return currentResult; 1216 } 1217 1218 1219 void 1220 RuleBasedNumberFormat::parse(const UnicodeString& text, 1221 Formattable& result, 1222 ParsePosition& parsePosition) const 1223 { 1224 if (!ruleSets) { 1225 parsePosition.setErrorIndex(0); 1226 return; 1227 } 1228 1229 UnicodeString workingText(text, parsePosition.getIndex()); 1230 ParsePosition workingPos(0); 1231 1232 ParsePosition high_pp(0); 1233 Formattable high_result; 1234 1235 for (NFRuleSet** p = ruleSets; *p; ++p) { 1236 NFRuleSet *rp = *p; 1237 if (rp->isPublic() && rp->isParseable()) { 1238 ParsePosition working_pp(0); 1239 Formattable working_result; 1240 1241 rp->parse(workingText, working_pp, kMaxDouble, working_result); 1242 if (working_pp.getIndex() > high_pp.getIndex()) { 1243 high_pp = working_pp; 1244 high_result = working_result; 1245 1246 if (high_pp.getIndex() == workingText.length()) { 1247 break; 1248 } 1249 } 1250 } 1251 } 1252 1253 int32_t startIndex = parsePosition.getIndex(); 1254 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1255 if (high_pp.getIndex() > 0) { 1256 parsePosition.setErrorIndex(-1); 1257 } else { 1258 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1259 parsePosition.setErrorIndex(startIndex + errorIndex); 1260 } 1261 result = high_result; 1262 if (result.getType() == Formattable::kDouble) { 1263 int32_t r = (int32_t)result.getDouble(); 1264 if ((double)r == result.getDouble()) { 1265 result.setLong(r); 1266 } 1267 } 1268 } 1269 1270 #if !UCONFIG_NO_COLLATION 1271 1272 void 1273 RuleBasedNumberFormat::setLenient(UBool enabled) 1274 { 1275 lenient = enabled; 1276 if (!enabled && collator) { 1277 delete collator; 1278 collator = NULL; 1279 } 1280 } 1281 1282 #endif 1283 1284 void 1285 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1286 if (U_SUCCESS(status)) { 1287 if (ruleSetName.isEmpty()) { 1288 if (localizations) { 1289 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1290 defaultRuleSet = findRuleSet(name, status); 1291 } else { 1292 initDefaultRuleSet(); 1293 } 1294 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1295 status = U_ILLEGAL_ARGUMENT_ERROR; 1296 } else { 1297 NFRuleSet* result = findRuleSet(ruleSetName, status); 1298 if (result != NULL) { 1299 defaultRuleSet = result; 1300 } 1301 } 1302 } 1303 } 1304 1305 UnicodeString 1306 RuleBasedNumberFormat::getDefaultRuleSetName() const { 1307 UnicodeString result; 1308 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1309 defaultRuleSet->getName(result); 1310 } else { 1311 result.setToBogus(); 1312 } 1313 return result; 1314 } 1315 1316 void 1317 RuleBasedNumberFormat::initDefaultRuleSet() 1318 { 1319 defaultRuleSet = NULL; 1320 if (!ruleSets) { 1321 return; 1322 } 1323 1324 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering")); 1325 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal")); 1326 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration")); 1327 1328 NFRuleSet**p = &ruleSets[0]; 1329 while (*p) { 1330 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1331 defaultRuleSet = *p; 1332 return; 1333 } else { 1334 ++p; 1335 } 1336 } 1337 1338 defaultRuleSet = *--p; 1339 if (!defaultRuleSet->isPublic()) { 1340 while (p != ruleSets) { 1341 if ((*--p)->isPublic()) { 1342 defaultRuleSet = *p; 1343 break; 1344 } 1345 } 1346 } 1347 } 1348 1349 1350 void 1351 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1352 UParseError& pErr, UErrorCode& status) 1353 { 1354 // TODO: implement UParseError 1355 uprv_memset(&pErr, 0, sizeof(UParseError)); 1356 // Note: this can leave ruleSets == NULL, so remaining code should check 1357 if (U_FAILURE(status)) { 1358 return; 1359 } 1360 1361 initializeDecimalFormatSymbols(status); 1362 initializeDefaultInfinityRule(status); 1363 initializeDefaultNaNRule(status); 1364 if (U_FAILURE(status)) { 1365 return; 1366 } 1367 1368 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1369 1370 UnicodeString description(rules); 1371 if (!description.length()) { 1372 status = U_MEMORY_ALLOCATION_ERROR; 1373 return; 1374 } 1375 1376 // start by stripping the trailing whitespace from all the rules 1377 // (this is all the whitespace follwing each semicolon in the 1378 // description). This allows us to look for rule-set boundaries 1379 // by searching for ";%" without having to worry about whitespace 1380 // between the ; and the % 1381 stripWhitespace(description); 1382 1383 // check to see if there's a set of lenient-parse rules. If there 1384 // is, pull them out into our temporary holding place for them, 1385 // and delete them from the description before the real desciption- 1386 // parsing code sees them 1387 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1388 if (lp != -1) { 1389 // we've got to make sure we're not in the middle of a rule 1390 // (where "%%lenient-parse" would actually get treated as 1391 // rule text) 1392 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1393 // locate the beginning and end of the actual collation 1394 // rules (there may be whitespace between the name and 1395 // the first token in the description) 1396 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1397 1398 if (lpEnd == -1) { 1399 lpEnd = description.length() - 1; 1400 } 1401 int lpStart = lp + u_strlen(gLenientParse); 1402 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1403 ++lpStart; 1404 } 1405 1406 // copy out the lenient-parse rules and delete them 1407 // from the description 1408 lenientParseRules = new UnicodeString(); 1409 /* test for NULL */ 1410 if (lenientParseRules == 0) { 1411 status = U_MEMORY_ALLOCATION_ERROR; 1412 return; 1413 } 1414 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1415 1416 description.remove(lp, lpEnd + 1 - lp); 1417 } 1418 } 1419 1420 // pre-flight parsing the description and count the number of 1421 // rule sets (";%" marks the end of one rule set and the beginning 1422 // of the next) 1423 numRuleSets = 0; 1424 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1425 ++numRuleSets; 1426 ++p; 1427 } 1428 ++numRuleSets; 1429 1430 // our rule list is an array of the appropriate size 1431 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1432 /* test for NULL */ 1433 if (ruleSets == 0) { 1434 status = U_MEMORY_ALLOCATION_ERROR; 1435 return; 1436 } 1437 1438 for (int i = 0; i <= numRuleSets; ++i) { 1439 ruleSets[i] = NULL; 1440 } 1441 1442 // divide up the descriptions into individual rule-set descriptions 1443 // and store them in a temporary array. At each step, we also 1444 // new up a rule set, but all this does is initialize its name 1445 // and remove it from its description. We can't actually parse 1446 // the rest of the descriptions and finish initializing everything 1447 // because we have to know the names and locations of all the rule 1448 // sets before we can actually set everything up 1449 if(!numRuleSets) { 1450 status = U_ILLEGAL_ARGUMENT_ERROR; 1451 return; 1452 } 1453 1454 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1455 if (ruleSetDescriptions == 0) { 1456 status = U_MEMORY_ALLOCATION_ERROR; 1457 return; 1458 } 1459 1460 { 1461 int curRuleSet = 0; 1462 int32_t start = 0; 1463 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1464 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1465 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1466 if (ruleSets[curRuleSet] == 0) { 1467 status = U_MEMORY_ALLOCATION_ERROR; 1468 return; 1469 } 1470 ++curRuleSet; 1471 start = p + 1; 1472 } 1473 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1474 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1475 if (ruleSets[curRuleSet] == 0) { 1476 status = U_MEMORY_ALLOCATION_ERROR; 1477 return; 1478 } 1479 } 1480 1481 // now we can take note of the formatter's default rule set, which 1482 // is the last public rule set in the description (it's the last 1483 // rather than the first so that a user can create a new formatter 1484 // from an existing formatter and change its default behavior just 1485 // by appending more rule sets to the end) 1486 1487 // {dlf} Initialization of a fraction rule set requires the default rule 1488 // set to be known. For purposes of initialization, this is always the 1489 // last public rule set, no matter what the localization data says. 1490 initDefaultRuleSet(); 1491 1492 // finally, we can go back through the temporary descriptions 1493 // list and finish seting up the substructure (and we throw 1494 // away the temporary descriptions as we go) 1495 { 1496 for (int i = 0; i < numRuleSets; i++) { 1497 ruleSets[i]->parseRules(ruleSetDescriptions[i], status); 1498 } 1499 } 1500 1501 // Now that the rules are initialized, the 'real' default rule 1502 // set can be adjusted by the localization data. 1503 1504 // The C code keeps the localization array as is, rather than building 1505 // a separate array of the public rule set names, so we have less work 1506 // to do here-- but we still need to check the names. 1507 1508 if (localizationInfos) { 1509 // confirm the names, if any aren't in the rules, that's an error 1510 // it is ok if the rules contain public rule sets that are not in this list 1511 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1512 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1513 NFRuleSet* rs = findRuleSet(name, status); 1514 if (rs == NULL) { 1515 break; // error 1516 } 1517 if (i == 0) { 1518 defaultRuleSet = rs; 1519 } 1520 } 1521 } else { 1522 defaultRuleSet = getDefaultRuleSet(); 1523 } 1524 originalDescription = rules; 1525 } 1526 1527 // override the NumberFormat implementation in order to 1528 // lazily initialize relevant items 1529 void 1530 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) 1531 { 1532 NumberFormat::setContext(value, status); 1533 if (U_SUCCESS(status)) { 1534 if (!capitalizationInfoSet && 1535 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { 1536 initCapitalizationContextInfo(locale); 1537 capitalizationInfoSet = TRUE; 1538 } 1539 #if !UCONFIG_NO_BREAK_ITERATION 1540 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1541 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1542 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1543 UErrorCode status = U_ZERO_ERROR; 1544 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); 1545 if (U_FAILURE(status)) { 1546 delete capitalizationBrkIter; 1547 capitalizationBrkIter = NULL; 1548 } 1549 } 1550 #endif 1551 } 1552 } 1553 1554 void 1555 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) 1556 { 1557 #if !UCONFIG_NO_BREAK_ITERATION 1558 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL; 1559 UErrorCode status = U_ZERO_ERROR; 1560 UResourceBundle *rb = ures_open(NULL, localeID, &status); 1561 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); 1562 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status); 1563 if (U_SUCCESS(status) && rb != NULL) { 1564 int32_t len = 0; 1565 const int32_t * intVector = ures_getIntVector(rb, &len, &status); 1566 if (U_SUCCESS(status) && intVector != NULL && len >= 2) { 1567 capitalizationForUIListMenu = intVector[0]; 1568 capitalizationForStandAlone = intVector[1]; 1569 } 1570 } 1571 ures_close(rb); 1572 #endif 1573 } 1574 1575 void 1576 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1577 { 1578 // iterate through the characters... 1579 UnicodeString result; 1580 1581 int start = 0; 1582 while (start != -1 && start < description.length()) { 1583 // seek to the first non-whitespace character... 1584 while (start < description.length() 1585 && PatternProps::isWhiteSpace(description.charAt(start))) { 1586 ++start; 1587 } 1588 1589 // locate the next semicolon in the text and copy the text from 1590 // our current position up to that semicolon into the result 1591 int32_t p = description.indexOf(gSemiColon, start); 1592 if (p == -1) { 1593 // or if we don't find a semicolon, just copy the rest of 1594 // the string into the result 1595 result.append(description, start, description.length() - start); 1596 start = -1; 1597 } 1598 else if (p < description.length()) { 1599 result.append(description, start, p + 1 - start); 1600 start = p + 1; 1601 } 1602 1603 // when we get here, we've seeked off the end of the sring, and 1604 // we terminate the loop (we continue until *start* is -1 rather 1605 // than until *p* is -1, because otherwise we'd miss the last 1606 // rule in the description) 1607 else { 1608 start = -1; 1609 } 1610 } 1611 1612 description.setTo(result); 1613 } 1614 1615 1616 void 1617 RuleBasedNumberFormat::dispose() 1618 { 1619 if (ruleSets) { 1620 for (NFRuleSet** p = ruleSets; *p; ++p) { 1621 delete *p; 1622 } 1623 uprv_free(ruleSets); 1624 ruleSets = NULL; 1625 } 1626 1627 if (ruleSetDescriptions) { 1628 delete [] ruleSetDescriptions; 1629 ruleSetDescriptions = NULL; 1630 } 1631 1632 #if !UCONFIG_NO_COLLATION 1633 delete collator; 1634 #endif 1635 collator = NULL; 1636 1637 delete decimalFormatSymbols; 1638 decimalFormatSymbols = NULL; 1639 1640 delete defaultInfinityRule; 1641 defaultInfinityRule = NULL; 1642 1643 delete defaultNaNRule; 1644 defaultNaNRule = NULL; 1645 1646 delete lenientParseRules; 1647 lenientParseRules = NULL; 1648 1649 #if !UCONFIG_NO_BREAK_ITERATION 1650 delete capitalizationBrkIter; 1651 capitalizationBrkIter = NULL; 1652 #endif 1653 1654 if (localizations) { 1655 localizations = localizations->unref(); 1656 } 1657 } 1658 1659 1660 //----------------------------------------------------------------------- 1661 // package-internal API 1662 //----------------------------------------------------------------------- 1663 1664 /** 1665 * Returns the collator to use for lenient parsing. The collator is lazily created: 1666 * this function creates it the first time it's called. 1667 * @return The collator to use for lenient parsing, or null if lenient parsing 1668 * is turned off. 1669 */ 1670 const RuleBasedCollator* 1671 RuleBasedNumberFormat::getCollator() const 1672 { 1673 #if !UCONFIG_NO_COLLATION 1674 if (!ruleSets) { 1675 return NULL; 1676 } 1677 1678 // lazy-evaluate the collator 1679 if (collator == NULL && lenient) { 1680 // create a default collator based on the formatter's locale, 1681 // then pull out that collator's rules, append any additional 1682 // rules specified in the description, and create a _new_ 1683 // collator based on the combinaiton of those rules 1684 1685 UErrorCode status = U_ZERO_ERROR; 1686 1687 Collator* temp = Collator::createInstance(locale, status); 1688 RuleBasedCollator* newCollator; 1689 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { 1690 if (lenientParseRules) { 1691 UnicodeString rules(newCollator->getRules()); 1692 rules.append(*lenientParseRules); 1693 1694 newCollator = new RuleBasedCollator(rules, status); 1695 // Exit if newCollator could not be created. 1696 if (newCollator == NULL) { 1697 return NULL; 1698 } 1699 } else { 1700 temp = NULL; 1701 } 1702 if (U_SUCCESS(status)) { 1703 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1704 // cast away const 1705 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1706 } else { 1707 delete newCollator; 1708 } 1709 } 1710 delete temp; 1711 } 1712 #endif 1713 1714 // if lenient-parse mode is off, this will be null 1715 // (see setLenientParseMode()) 1716 return collator; 1717 } 1718 1719 1720 DecimalFormatSymbols* 1721 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status) 1722 { 1723 // lazy-evaluate the DecimalFormatSymbols object. This object 1724 // is shared by all DecimalFormat instances belonging to this 1725 // formatter 1726 if (decimalFormatSymbols == NULL) { 1727 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); 1728 if (U_SUCCESS(status)) { 1729 decimalFormatSymbols = temp; 1730 } 1731 else { 1732 delete temp; 1733 } 1734 } 1735 return decimalFormatSymbols; 1736 } 1737 1738 /** 1739 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1740 * instances owned by this formatter. 1741 */ 1742 const DecimalFormatSymbols* 1743 RuleBasedNumberFormat::getDecimalFormatSymbols() const 1744 { 1745 return decimalFormatSymbols; 1746 } 1747 1748 NFRule* 1749 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status) 1750 { 1751 if (U_FAILURE(status)) { 1752 return NULL; 1753 } 1754 if (defaultInfinityRule == NULL) { 1755 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: ")); 1756 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol)); 1757 NFRule* temp = new NFRule(this, rule, status); 1758 if (U_SUCCESS(status)) { 1759 defaultInfinityRule = temp; 1760 } 1761 else { 1762 delete temp; 1763 } 1764 } 1765 return defaultInfinityRule; 1766 } 1767 1768 const NFRule* 1769 RuleBasedNumberFormat::getDefaultInfinityRule() const 1770 { 1771 return defaultInfinityRule; 1772 } 1773 1774 NFRule* 1775 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status) 1776 { 1777 if (U_FAILURE(status)) { 1778 return NULL; 1779 } 1780 if (defaultNaNRule == NULL) { 1781 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: ")); 1782 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol)); 1783 NFRule* temp = new NFRule(this, rule, status); 1784 if (U_SUCCESS(status)) { 1785 defaultNaNRule = temp; 1786 } 1787 else { 1788 delete temp; 1789 } 1790 } 1791 return defaultNaNRule; 1792 } 1793 1794 const NFRule* 1795 RuleBasedNumberFormat::getDefaultNaNRule() const 1796 { 1797 return defaultNaNRule; 1798 } 1799 1800 // De-owning the current localized symbols and adopt the new symbols. 1801 void 1802 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1803 { 1804 if (symbolsToAdopt == NULL) { 1805 return; // do not allow caller to set decimalFormatSymbols to NULL 1806 } 1807 1808 if (decimalFormatSymbols != NULL) { 1809 delete decimalFormatSymbols; 1810 } 1811 1812 decimalFormatSymbols = symbolsToAdopt; 1813 1814 { 1815 // Apply the new decimalFormatSymbols by reparsing the rulesets 1816 UErrorCode status = U_ZERO_ERROR; 1817 1818 delete defaultInfinityRule; 1819 defaultInfinityRule = NULL; 1820 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols 1821 1822 delete defaultNaNRule; 1823 defaultNaNRule = NULL; 1824 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols 1825 1826 if (ruleSets) { 1827 for (int32_t i = 0; i < numRuleSets; i++) { 1828 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status); 1829 } 1830 } 1831 } 1832 } 1833 1834 // Setting the symbols is equlivalent to adopting a newly created localized symbols. 1835 void 1836 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1837 { 1838 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1839 } 1840 1841 PluralFormat * 1842 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, 1843 const UnicodeString &pattern, 1844 UErrorCode& status) const 1845 { 1846 return new PluralFormat(locale, pluralType, pattern, status); 1847 } 1848 1849 U_NAMESPACE_END 1850 1851 /* U_HAVE_RBNF */ 1852 #endif 1853