1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2015, International Business Machines Corporation 6 * and others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 #include "unicode/utypes.h" 11 #include "utypeinfo.h" // for 'typeid' to work 12 13 #include "unicode/rbnf.h" 14 15 #if U_HAVE_RBNF 16 17 #include "unicode/normlzr.h" 18 #include "unicode/plurfmt.h" 19 #include "unicode/tblcoll.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ucol.h" 22 #include "unicode/uloc.h" 23 #include "unicode/unum.h" 24 #include "unicode/ures.h" 25 #include "unicode/ustring.h" 26 #include "unicode/utf16.h" 27 #include "unicode/udata.h" 28 #include "unicode/udisplaycontext.h" 29 #include "unicode/brkiter.h" 30 #include "nfrs.h" 31 32 #include "cmemory.h" 33 #include "cstring.h" 34 #include "patternprops.h" 35 #include "uresimp.h" 36 37 // debugging 38 // #define RBNF_DEBUG 39 40 #ifdef RBNF_DEBUG 41 #include <stdio.h> 42 #endif 43 44 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 45 46 static const UChar gPercentPercent[] = 47 { 48 0x25, 0x25, 0 49 }; /* "%%" */ 50 51 // All urbnf objects are created through openRules, so we init all of the 52 // Unicode string constants required by rbnf, nfrs, or nfr here. 53 static const UChar gLenientParse[] = 54 { 55 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 56 }; /* "%%lenient-parse:" */ 57 static const UChar gSemiColon = 0x003B; 58 static const UChar gSemiPercent[] = 59 { 60 0x3B, 0x25, 0 61 }; /* ";%" */ 62 63 #define kSomeNumberOfBitsDiv2 22 64 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 65 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 66 67 U_NAMESPACE_BEGIN 68 69 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 70 71 /* 72 This is a utility class. It does not use ICU's RTTI. 73 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 74 Please make sure that intltest passes on Windows in Release mode, 75 since the string pooling per compilation unit will mess up how RTTI works. 76 The RTTI code was also removed due to lack of code coverage. 77 */ 78 class LocalizationInfo : public UMemory { 79 protected: 80 virtual ~LocalizationInfo(); 81 uint32_t refcount; 82 83 public: 84 LocalizationInfo() : refcount(0) {} 85 86 LocalizationInfo* ref(void) { 87 ++refcount; 88 return this; 89 } 90 91 LocalizationInfo* unref(void) { 92 if (refcount && --refcount == 0) { 93 delete this; 94 } 95 return NULL; 96 } 97 98 virtual UBool operator==(const LocalizationInfo* rhs) const; 99 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 100 101 virtual int32_t getNumberOfRuleSets(void) const = 0; 102 virtual const UChar* getRuleSetName(int32_t index) const = 0; 103 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 104 virtual const UChar* getLocaleName(int32_t index) const = 0; 105 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 106 107 virtual int32_t indexForLocale(const UChar* locale) const; 108 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 109 110 // virtual UClassID getDynamicClassID() const = 0; 111 // static UClassID getStaticClassID(void); 112 }; 113 114 LocalizationInfo::~LocalizationInfo() {} 115 116 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 117 118 // if both strings are NULL, this returns TRUE 119 static UBool 120 streq(const UChar* lhs, const UChar* rhs) { 121 if (rhs == lhs) { 122 return TRUE; 123 } 124 if (lhs && rhs) { 125 return u_strcmp(lhs, rhs) == 0; 126 } 127 return FALSE; 128 } 129 130 UBool 131 LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 132 if (rhs) { 133 if (this == rhs) { 134 return TRUE; 135 } 136 137 int32_t rsc = getNumberOfRuleSets(); 138 if (rsc == rhs->getNumberOfRuleSets()) { 139 for (int i = 0; i < rsc; ++i) { 140 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 141 return FALSE; 142 } 143 } 144 int32_t dlc = getNumberOfDisplayLocales(); 145 if (dlc == rhs->getNumberOfDisplayLocales()) { 146 for (int i = 0; i < dlc; ++i) { 147 const UChar* locale = getLocaleName(i); 148 int32_t ix = rhs->indexForLocale(locale); 149 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 150 if (!streq(locale, rhs->getLocaleName(ix))) { 151 return FALSE; 152 } 153 for (int j = 0; j < rsc; ++j) { 154 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 155 return FALSE; 156 } 157 } 158 } 159 return TRUE; 160 } 161 } 162 } 163 return FALSE; 164 } 165 166 int32_t 167 LocalizationInfo::indexForLocale(const UChar* locale) const { 168 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 169 if (streq(locale, getLocaleName(i))) { 170 return i; 171 } 172 } 173 return -1; 174 } 175 176 int32_t 177 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 178 if (ruleset) { 179 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 180 if (streq(ruleset, getRuleSetName(i))) { 181 return i; 182 } 183 } 184 } 185 return -1; 186 } 187 188 189 typedef void (*Fn_Deleter)(void*); 190 191 class VArray { 192 void** buf; 193 int32_t cap; 194 int32_t size; 195 Fn_Deleter deleter; 196 public: 197 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 198 199 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 200 201 ~VArray() { 202 if (deleter) { 203 for (int i = 0; i < size; ++i) { 204 (*deleter)(buf[i]); 205 } 206 } 207 uprv_free(buf); 208 } 209 210 int32_t length() { 211 return size; 212 } 213 214 void add(void* elem, UErrorCode& status) { 215 if (U_SUCCESS(status)) { 216 if (size == cap) { 217 if (cap == 0) { 218 cap = 1; 219 } else if (cap < 256) { 220 cap *= 2; 221 } else { 222 cap += 256; 223 } 224 if (buf == NULL) { 225 buf = (void**)uprv_malloc(cap * sizeof(void*)); 226 } else { 227 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 228 } 229 if (buf == NULL) { 230 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 231 status = U_MEMORY_ALLOCATION_ERROR; 232 return; 233 } 234 void* start = &buf[size]; 235 size_t count = (cap - size) * sizeof(void*); 236 uprv_memset(start, 0, count); // fill with nulls, just because 237 } 238 buf[size++] = elem; 239 } 240 } 241 242 void** release(void) { 243 void** result = buf; 244 buf = NULL; 245 cap = 0; 246 size = 0; 247 return result; 248 } 249 }; 250 251 class LocDataParser; 252 253 class StringLocalizationInfo : public LocalizationInfo { 254 UChar* info; 255 UChar*** data; 256 int32_t numRuleSets; 257 int32_t numLocales; 258 259 friend class LocDataParser; 260 261 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 262 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 263 { 264 } 265 266 public: 267 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 268 269 virtual ~StringLocalizationInfo(); 270 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 271 virtual const UChar* getRuleSetName(int32_t index) const; 272 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 273 virtual const UChar* getLocaleName(int32_t index) const; 274 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 275 276 // virtual UClassID getDynamicClassID() const; 277 // static UClassID getStaticClassID(void); 278 279 private: 280 void init(UErrorCode& status) const; 281 }; 282 283 284 enum { 285 OPEN_ANGLE = 0x003c, /* '<' */ 286 CLOSE_ANGLE = 0x003e, /* '>' */ 287 COMMA = 0x002c, 288 TICK = 0x0027, 289 QUOTE = 0x0022, 290 SPACE = 0x0020 291 }; 292 293 /** 294 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 295 */ 296 class LocDataParser { 297 UChar* data; 298 const UChar* e; 299 UChar* p; 300 UChar ch; 301 UParseError& pe; 302 UErrorCode& ec; 303 304 public: 305 LocDataParser(UParseError& parseError, UErrorCode& status) 306 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 307 ~LocDataParser() {} 308 309 /* 310 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 311 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 312 */ 313 StringLocalizationInfo* parse(UChar* data, int32_t len); 314 315 private: 316 317 void inc(void) { ++p; ch = 0xffff; } 318 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } 319 UBool check(UChar c) { return p < e && (ch == c || *p == c); } 320 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} 321 UBool inList(UChar c, const UChar* list) const { 322 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; 323 while (*list && *list != c) ++list; return *list == c; 324 } 325 void parseError(const char* msg); 326 327 StringLocalizationInfo* doParse(void); 328 329 UChar** nextArray(int32_t& requiredLength); 330 UChar* nextString(void); 331 }; 332 333 #ifdef RBNF_DEBUG 334 #define ERROR(msg) parseError(msg); return NULL; 335 #define EXPLANATION_ARG explanationArg 336 #else 337 #define ERROR(msg) parseError(NULL); return NULL; 338 #define EXPLANATION_ARG 339 #endif 340 341 342 static const UChar DQUOTE_STOPLIST[] = { 343 QUOTE, 0 344 }; 345 346 static const UChar SQUOTE_STOPLIST[] = { 347 TICK, 0 348 }; 349 350 static const UChar NOQUOTE_STOPLIST[] = { 351 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 352 }; 353 354 static void 355 DeleteFn(void* p) { 356 uprv_free(p); 357 } 358 359 StringLocalizationInfo* 360 LocDataParser::parse(UChar* _data, int32_t len) { 361 if (U_FAILURE(ec)) { 362 if (_data) uprv_free(_data); 363 return NULL; 364 } 365 366 pe.line = 0; 367 pe.offset = -1; 368 pe.postContext[0] = 0; 369 pe.preContext[0] = 0; 370 371 if (_data == NULL) { 372 ec = U_ILLEGAL_ARGUMENT_ERROR; 373 return NULL; 374 } 375 376 if (len <= 0) { 377 ec = U_ILLEGAL_ARGUMENT_ERROR; 378 uprv_free(_data); 379 return NULL; 380 } 381 382 data = _data; 383 e = data + len; 384 p = _data; 385 ch = 0xffff; 386 387 return doParse(); 388 } 389 390 391 StringLocalizationInfo* 392 LocDataParser::doParse(void) { 393 skipWhitespace(); 394 if (!checkInc(OPEN_ANGLE)) { 395 ERROR("Missing open angle"); 396 } else { 397 VArray array(DeleteFn); 398 UBool mightHaveNext = TRUE; 399 int32_t requiredLength = -1; 400 while (mightHaveNext) { 401 mightHaveNext = FALSE; 402 UChar** elem = nextArray(requiredLength); 403 skipWhitespace(); 404 UBool haveComma = check(COMMA); 405 if (elem) { 406 array.add(elem, ec); 407 if (haveComma) { 408 inc(); 409 mightHaveNext = TRUE; 410 } 411 } else if (haveComma) { 412 ERROR("Unexpected character"); 413 } 414 } 415 416 skipWhitespace(); 417 if (!checkInc(CLOSE_ANGLE)) { 418 if (check(OPEN_ANGLE)) { 419 ERROR("Missing comma in outer array"); 420 } else { 421 ERROR("Missing close angle bracket in outer array"); 422 } 423 } 424 425 skipWhitespace(); 426 if (p != e) { 427 ERROR("Extra text after close of localization data"); 428 } 429 430 array.add(NULL, ec); 431 if (U_SUCCESS(ec)) { 432 int32_t numLocs = array.length() - 2; // subtract first, NULL 433 UChar*** result = (UChar***)array.release(); 434 435 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 436 } 437 } 438 439 ERROR("Unknown error"); 440 } 441 442 UChar** 443 LocDataParser::nextArray(int32_t& requiredLength) { 444 if (U_FAILURE(ec)) { 445 return NULL; 446 } 447 448 skipWhitespace(); 449 if (!checkInc(OPEN_ANGLE)) { 450 ERROR("Missing open angle"); 451 } 452 453 VArray array; 454 UBool mightHaveNext = TRUE; 455 while (mightHaveNext) { 456 mightHaveNext = FALSE; 457 UChar* elem = nextString(); 458 skipWhitespace(); 459 UBool haveComma = check(COMMA); 460 if (elem) { 461 array.add(elem, ec); 462 if (haveComma) { 463 inc(); 464 mightHaveNext = TRUE; 465 } 466 } else if (haveComma) { 467 ERROR("Unexpected comma"); 468 } 469 } 470 skipWhitespace(); 471 if (!checkInc(CLOSE_ANGLE)) { 472 if (check(OPEN_ANGLE)) { 473 ERROR("Missing close angle bracket in inner array"); 474 } else { 475 ERROR("Missing comma in inner array"); 476 } 477 } 478 479 array.add(NULL, ec); 480 if (U_SUCCESS(ec)) { 481 if (requiredLength == -1) { 482 requiredLength = array.length() + 1; 483 } else if (array.length() != requiredLength) { 484 ec = U_ILLEGAL_ARGUMENT_ERROR; 485 ERROR("Array not of required length"); 486 } 487 488 return (UChar**)array.release(); 489 } 490 ERROR("Unknown Error"); 491 } 492 493 UChar* 494 LocDataParser::nextString() { 495 UChar* result = NULL; 496 497 skipWhitespace(); 498 if (p < e) { 499 const UChar* terminators; 500 UChar c = *p; 501 UBool haveQuote = c == QUOTE || c == TICK; 502 if (haveQuote) { 503 inc(); 504 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 505 } else { 506 terminators = NOQUOTE_STOPLIST; 507 } 508 UChar* start = p; 509 while (p < e && !inList(*p, terminators)) ++p; 510 if (p == e) { 511 ERROR("Unexpected end of data"); 512 } 513 514 UChar x = *p; 515 if (p > start) { 516 ch = x; 517 *p = 0x0; // terminate by writing to data 518 result = start; // just point into data 519 } 520 if (haveQuote) { 521 if (x != c) { 522 ERROR("Missing matching quote"); 523 } else if (p == start) { 524 ERROR("Empty string"); 525 } 526 inc(); 527 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 528 ERROR("Unexpected character in string"); 529 } 530 } 531 532 // ok for there to be no next string 533 return result; 534 } 535 536 void LocDataParser::parseError(const char* EXPLANATION_ARG) 537 { 538 if (!data) { 539 return; 540 } 541 542 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 543 if (start < data) { 544 start = data; 545 } 546 for (UChar* x = p; --x >= start;) { 547 if (!*x) { 548 start = x+1; 549 break; 550 } 551 } 552 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 553 if (limit > e) { 554 limit = e; 555 } 556 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 557 pe.preContext[p-start] = 0; 558 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 559 pe.postContext[limit-p] = 0; 560 pe.offset = (int32_t)(p - data); 561 562 #ifdef RBNF_DEBUG 563 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data); 564 565 UnicodeString msg; 566 msg.append(start, p - start); 567 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 568 msg.append(p, limit-p); 569 msg.append(UNICODE_STRING_SIMPLE("'")); 570 571 char buf[128]; 572 int32_t len = msg.extract(0, msg.length(), buf, 128); 573 if (len >= 128) { 574 buf[127] = 0; 575 } else { 576 buf[len] = 0; 577 } 578 fprintf(stderr, "%s\n", buf); 579 fflush(stderr); 580 #endif 581 582 uprv_free(data); 583 data = NULL; 584 p = NULL; 585 e = NULL; 586 587 if (U_SUCCESS(ec)) { 588 ec = U_PARSE_ERROR; 589 } 590 } 591 592 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 593 594 StringLocalizationInfo* 595 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 596 if (U_FAILURE(status)) { 597 return NULL; 598 } 599 600 int32_t len = info.length(); 601 if (len == 0) { 602 return NULL; // no error; 603 } 604 605 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 606 if (!p) { 607 status = U_MEMORY_ALLOCATION_ERROR; 608 return NULL; 609 } 610 info.extract(p, len, status); 611 if (!U_FAILURE(status)) { 612 status = U_ZERO_ERROR; // clear warning about non-termination 613 } 614 615 LocDataParser parser(perror, status); 616 return parser.parse(p, len); 617 } 618 619 StringLocalizationInfo::~StringLocalizationInfo() { 620 for (UChar*** p = (UChar***)data; *p; ++p) { 621 // remaining data is simply pointer into our unicode string data. 622 if (*p) uprv_free(*p); 623 } 624 if (data) uprv_free(data); 625 if (info) uprv_free(info); 626 } 627 628 629 const UChar* 630 StringLocalizationInfo::getRuleSetName(int32_t index) const { 631 if (index >= 0 && index < getNumberOfRuleSets()) { 632 return data[0][index]; 633 } 634 return NULL; 635 } 636 637 const UChar* 638 StringLocalizationInfo::getLocaleName(int32_t index) const { 639 if (index >= 0 && index < getNumberOfDisplayLocales()) { 640 return data[index+1][0]; 641 } 642 return NULL; 643 } 644 645 const UChar* 646 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 647 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 648 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 649 return data[localeIndex+1][ruleIndex+1]; 650 } 651 return NULL; 652 } 653 654 // ---------- 655 656 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 657 const UnicodeString& locs, 658 const Locale& alocale, UParseError& perror, UErrorCode& status) 659 : ruleSets(NULL) 660 , ruleSetDescriptions(NULL) 661 , numRuleSets(0) 662 , defaultRuleSet(NULL) 663 , locale(alocale) 664 , collator(NULL) 665 , decimalFormatSymbols(NULL) 666 , defaultInfinityRule(NULL) 667 , defaultNaNRule(NULL) 668 , lenient(FALSE) 669 , lenientParseRules(NULL) 670 , localizations(NULL) 671 , capitalizationInfoSet(FALSE) 672 , capitalizationForUIListMenu(FALSE) 673 , capitalizationForStandAlone(FALSE) 674 , capitalizationBrkIter(NULL) 675 { 676 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 677 init(description, locinfo, perror, status); 678 } 679 680 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 681 const UnicodeString& locs, 682 UParseError& perror, UErrorCode& status) 683 : ruleSets(NULL) 684 , ruleSetDescriptions(NULL) 685 , numRuleSets(0) 686 , defaultRuleSet(NULL) 687 , locale(Locale::getDefault()) 688 , collator(NULL) 689 , decimalFormatSymbols(NULL) 690 , defaultInfinityRule(NULL) 691 , defaultNaNRule(NULL) 692 , lenient(FALSE) 693 , lenientParseRules(NULL) 694 , localizations(NULL) 695 , capitalizationInfoSet(FALSE) 696 , capitalizationForUIListMenu(FALSE) 697 , capitalizationForStandAlone(FALSE) 698 , capitalizationBrkIter(NULL) 699 { 700 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 701 init(description, locinfo, perror, status); 702 } 703 704 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 705 LocalizationInfo* info, 706 const Locale& alocale, UParseError& perror, UErrorCode& status) 707 : ruleSets(NULL) 708 , ruleSetDescriptions(NULL) 709 , numRuleSets(0) 710 , defaultRuleSet(NULL) 711 , locale(alocale) 712 , collator(NULL) 713 , decimalFormatSymbols(NULL) 714 , defaultInfinityRule(NULL) 715 , defaultNaNRule(NULL) 716 , lenient(FALSE) 717 , lenientParseRules(NULL) 718 , localizations(NULL) 719 , capitalizationInfoSet(FALSE) 720 , capitalizationForUIListMenu(FALSE) 721 , capitalizationForStandAlone(FALSE) 722 , capitalizationBrkIter(NULL) 723 { 724 init(description, info, perror, status); 725 } 726 727 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 728 UParseError& perror, 729 UErrorCode& status) 730 : ruleSets(NULL) 731 , ruleSetDescriptions(NULL) 732 , numRuleSets(0) 733 , defaultRuleSet(NULL) 734 , locale(Locale::getDefault()) 735 , collator(NULL) 736 , decimalFormatSymbols(NULL) 737 , defaultInfinityRule(NULL) 738 , defaultNaNRule(NULL) 739 , lenient(FALSE) 740 , lenientParseRules(NULL) 741 , localizations(NULL) 742 , capitalizationInfoSet(FALSE) 743 , capitalizationForUIListMenu(FALSE) 744 , capitalizationForStandAlone(FALSE) 745 , capitalizationBrkIter(NULL) 746 { 747 init(description, NULL, perror, status); 748 } 749 750 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 751 const Locale& aLocale, 752 UParseError& perror, 753 UErrorCode& status) 754 : ruleSets(NULL) 755 , ruleSetDescriptions(NULL) 756 , numRuleSets(0) 757 , defaultRuleSet(NULL) 758 , locale(aLocale) 759 , collator(NULL) 760 , decimalFormatSymbols(NULL) 761 , defaultInfinityRule(NULL) 762 , defaultNaNRule(NULL) 763 , lenient(FALSE) 764 , lenientParseRules(NULL) 765 , localizations(NULL) 766 , capitalizationInfoSet(FALSE) 767 , capitalizationForUIListMenu(FALSE) 768 , capitalizationForStandAlone(FALSE) 769 , capitalizationBrkIter(NULL) 770 { 771 init(description, NULL, perror, status); 772 } 773 774 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 775 : ruleSets(NULL) 776 , ruleSetDescriptions(NULL) 777 , numRuleSets(0) 778 , defaultRuleSet(NULL) 779 , locale(alocale) 780 , collator(NULL) 781 , decimalFormatSymbols(NULL) 782 , defaultInfinityRule(NULL) 783 , defaultNaNRule(NULL) 784 , lenient(FALSE) 785 , lenientParseRules(NULL) 786 , localizations(NULL) 787 , capitalizationInfoSet(FALSE) 788 , capitalizationForUIListMenu(FALSE) 789 , capitalizationForStandAlone(FALSE) 790 , capitalizationBrkIter(NULL) 791 { 792 if (U_FAILURE(status)) { 793 return; 794 } 795 796 const char* rules_tag = "RBNFRules"; 797 const char* fmt_tag = ""; 798 switch (tag) { 799 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 800 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 801 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 802 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 803 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 804 } 805 806 // TODO: read localization info from resource 807 LocalizationInfo* locinfo = NULL; 808 809 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 810 if (U_SUCCESS(status)) { 811 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 812 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 813 814 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 815 if (U_FAILURE(status)) { 816 ures_close(nfrb); 817 } 818 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 819 if (U_FAILURE(status)) { 820 ures_close(rbnfRules); 821 ures_close(nfrb); 822 return; 823 } 824 825 UnicodeString desc; 826 while (ures_hasNext(ruleSets)) { 827 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); 828 } 829 UParseError perror; 830 831 init(desc, locinfo, perror, status); 832 833 ures_close(ruleSets); 834 ures_close(rbnfRules); 835 } 836 ures_close(nfrb); 837 } 838 839 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 840 : NumberFormat(rhs) 841 , ruleSets(NULL) 842 , ruleSetDescriptions(NULL) 843 , numRuleSets(0) 844 , defaultRuleSet(NULL) 845 , locale(rhs.locale) 846 , collator(NULL) 847 , decimalFormatSymbols(NULL) 848 , defaultInfinityRule(NULL) 849 , defaultNaNRule(NULL) 850 , lenient(FALSE) 851 , lenientParseRules(NULL) 852 , localizations(NULL) 853 , capitalizationInfoSet(FALSE) 854 , capitalizationForUIListMenu(FALSE) 855 , capitalizationForStandAlone(FALSE) 856 , capitalizationBrkIter(NULL) 857 { 858 this->operator=(rhs); 859 } 860 861 // -------- 862 863 RuleBasedNumberFormat& 864 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 865 { 866 if (this == &rhs) { 867 return *this; 868 } 869 NumberFormat::operator=(rhs); 870 UErrorCode status = U_ZERO_ERROR; 871 dispose(); 872 locale = rhs.locale; 873 lenient = rhs.lenient; 874 875 UParseError perror; 876 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); 877 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 878 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); 879 880 capitalizationInfoSet = rhs.capitalizationInfoSet; 881 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; 882 capitalizationForStandAlone = rhs.capitalizationForStandAlone; 883 #if !UCONFIG_NO_BREAK_ITERATION 884 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL; 885 #endif 886 887 return *this; 888 } 889 890 RuleBasedNumberFormat::~RuleBasedNumberFormat() 891 { 892 dispose(); 893 } 894 895 Format* 896 RuleBasedNumberFormat::clone(void) const 897 { 898 return new RuleBasedNumberFormat(*this); 899 } 900 901 UBool 902 RuleBasedNumberFormat::operator==(const Format& other) const 903 { 904 if (this == &other) { 905 return TRUE; 906 } 907 908 if (typeid(*this) == typeid(other)) { 909 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 910 // test for capitalization info equality is adequately handled 911 // by the NumberFormat test for fCapitalizationContext equality; 912 // the info here is just derived from that. 913 if (locale == rhs.locale && 914 lenient == rhs.lenient && 915 (localizations == NULL 916 ? rhs.localizations == NULL 917 : (rhs.localizations == NULL 918 ? FALSE 919 : *localizations == rhs.localizations))) { 920 921 NFRuleSet** p = ruleSets; 922 NFRuleSet** q = rhs.ruleSets; 923 if (p == NULL) { 924 return q == NULL; 925 } else if (q == NULL) { 926 return FALSE; 927 } 928 while (*p && *q && (**p == **q)) { 929 ++p; 930 ++q; 931 } 932 return *q == NULL && *p == NULL; 933 } 934 } 935 936 return FALSE; 937 } 938 939 UnicodeString 940 RuleBasedNumberFormat::getRules() const 941 { 942 UnicodeString result; 943 if (ruleSets != NULL) { 944 for (NFRuleSet** p = ruleSets; *p; ++p) { 945 (*p)->appendRules(result); 946 } 947 } 948 return result; 949 } 950 951 UnicodeString 952 RuleBasedNumberFormat::getRuleSetName(int32_t index) const 953 { 954 if (localizations) { 955 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 956 return string; 957 } 958 else if (ruleSets) { 959 UnicodeString result; 960 for (NFRuleSet** p = ruleSets; *p; ++p) { 961 NFRuleSet* rs = *p; 962 if (rs->isPublic()) { 963 if (--index == -1) { 964 rs->getName(result); 965 return result; 966 } 967 } 968 } 969 } 970 UnicodeString empty; 971 return empty; 972 } 973 974 int32_t 975 RuleBasedNumberFormat::getNumberOfRuleSetNames() const 976 { 977 int32_t result = 0; 978 if (localizations) { 979 result = localizations->getNumberOfRuleSets(); 980 } 981 else if (ruleSets) { 982 for (NFRuleSet** p = ruleSets; *p; ++p) { 983 if ((**p).isPublic()) { 984 ++result; 985 } 986 } 987 } 988 return result; 989 } 990 991 int32_t 992 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 993 if (localizations) { 994 return localizations->getNumberOfDisplayLocales(); 995 } 996 return 0; 997 } 998 999 Locale 1000 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 1001 if (U_FAILURE(status)) { 1002 return Locale(""); 1003 } 1004 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 1005 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 1006 char buffer[64]; 1007 int32_t cap = name.length() + 1; 1008 char* bp = buffer; 1009 if (cap > 64) { 1010 bp = (char *)uprv_malloc(cap); 1011 if (bp == NULL) { 1012 status = U_MEMORY_ALLOCATION_ERROR; 1013 return Locale(""); 1014 } 1015 } 1016 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 1017 Locale retLocale(bp); 1018 if (bp != buffer) { 1019 uprv_free(bp); 1020 } 1021 return retLocale; 1022 } 1023 status = U_ILLEGAL_ARGUMENT_ERROR; 1024 Locale retLocale; 1025 return retLocale; 1026 } 1027 1028 UnicodeString 1029 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 1030 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 1031 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 1032 int32_t len = localeName.length(); 1033 UChar* localeStr = localeName.getBuffer(len + 1); 1034 while (len >= 0) { 1035 localeStr[len] = 0; 1036 int32_t ix = localizations->indexForLocale(localeStr); 1037 if (ix >= 0) { 1038 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 1039 return name; 1040 } 1041 1042 // trim trailing portion, skipping over ommitted sections 1043 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1044 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1045 } 1046 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 1047 return name; 1048 } 1049 UnicodeString bogus; 1050 bogus.setToBogus(); 1051 return bogus; 1052 } 1053 1054 UnicodeString 1055 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1056 if (localizations) { 1057 UnicodeString rsn(ruleSetName); 1058 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1059 return getRuleSetDisplayName(ix, localeParam); 1060 } 1061 UnicodeString bogus; 1062 bogus.setToBogus(); 1063 return bogus; 1064 } 1065 1066 NFRuleSet* 1067 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1068 { 1069 if (U_SUCCESS(status) && ruleSets) { 1070 for (NFRuleSet** p = ruleSets; *p; ++p) { 1071 NFRuleSet* rs = *p; 1072 if (rs->isNamed(name)) { 1073 return rs; 1074 } 1075 } 1076 status = U_ILLEGAL_ARGUMENT_ERROR; 1077 } 1078 return NULL; 1079 } 1080 1081 UnicodeString& 1082 RuleBasedNumberFormat::format(int32_t number, 1083 UnicodeString& toAppendTo, 1084 FieldPosition& /* pos */) const 1085 { 1086 if (defaultRuleSet) { 1087 UErrorCode status = U_ZERO_ERROR; 1088 int32_t startPos = toAppendTo.length(); 1089 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); 1090 adjustForCapitalizationContext(startPos, toAppendTo); 1091 } 1092 return toAppendTo; 1093 } 1094 1095 1096 UnicodeString& 1097 RuleBasedNumberFormat::format(int64_t number, 1098 UnicodeString& toAppendTo, 1099 FieldPosition& /* pos */) const 1100 { 1101 if (defaultRuleSet) { 1102 UErrorCode status = U_ZERO_ERROR; 1103 int32_t startPos = toAppendTo.length(); 1104 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); 1105 adjustForCapitalizationContext(startPos, toAppendTo); 1106 } 1107 return toAppendTo; 1108 } 1109 1110 1111 UnicodeString& 1112 RuleBasedNumberFormat::format(double number, 1113 UnicodeString& toAppendTo, 1114 FieldPosition& /* pos */) const 1115 { 1116 int32_t startPos = toAppendTo.length(); 1117 if (defaultRuleSet) { 1118 UErrorCode status = U_ZERO_ERROR; 1119 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); 1120 } 1121 return adjustForCapitalizationContext(startPos, toAppendTo); 1122 } 1123 1124 1125 UnicodeString& 1126 RuleBasedNumberFormat::format(int32_t number, 1127 const UnicodeString& ruleSetName, 1128 UnicodeString& toAppendTo, 1129 FieldPosition& /* pos */, 1130 UErrorCode& status) const 1131 { 1132 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1133 if (U_SUCCESS(status)) { 1134 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1135 // throw new IllegalArgumentException("Can't use internal rule set"); 1136 status = U_ILLEGAL_ARGUMENT_ERROR; 1137 } else { 1138 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1139 if (rs) { 1140 int32_t startPos = toAppendTo.length(); 1141 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); 1142 adjustForCapitalizationContext(startPos, toAppendTo); 1143 } 1144 } 1145 } 1146 return toAppendTo; 1147 } 1148 1149 1150 UnicodeString& 1151 RuleBasedNumberFormat::format(int64_t number, 1152 const UnicodeString& ruleSetName, 1153 UnicodeString& toAppendTo, 1154 FieldPosition& /* pos */, 1155 UErrorCode& status) const 1156 { 1157 if (U_SUCCESS(status)) { 1158 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1159 // throw new IllegalArgumentException("Can't use internal rule set"); 1160 status = U_ILLEGAL_ARGUMENT_ERROR; 1161 } else { 1162 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1163 if (rs) { 1164 int32_t startPos = toAppendTo.length(); 1165 rs->format(number, toAppendTo, toAppendTo.length(), 0, status); 1166 adjustForCapitalizationContext(startPos, toAppendTo); 1167 } 1168 } 1169 } 1170 return toAppendTo; 1171 } 1172 1173 1174 UnicodeString& 1175 RuleBasedNumberFormat::format(double number, 1176 const UnicodeString& ruleSetName, 1177 UnicodeString& toAppendTo, 1178 FieldPosition& /* pos */, 1179 UErrorCode& status) const 1180 { 1181 if (U_SUCCESS(status)) { 1182 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1183 // throw new IllegalArgumentException("Can't use internal rule set"); 1184 status = U_ILLEGAL_ARGUMENT_ERROR; 1185 } else { 1186 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1187 if (rs) { 1188 int32_t startPos = toAppendTo.length(); 1189 rs->format(number, toAppendTo, toAppendTo.length(), 0, status); 1190 adjustForCapitalizationContext(startPos, toAppendTo); 1191 } 1192 } 1193 } 1194 return toAppendTo; 1195 } 1196 1197 UnicodeString& 1198 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, 1199 UnicodeString& currentResult) const 1200 { 1201 #if !UCONFIG_NO_BREAK_ITERATION 1202 if (startPos==0 && currentResult.length() > 0) { 1203 // capitalize currentResult according to context 1204 UChar32 ch = currentResult.char32At(0); 1205 UErrorCode status = U_ZERO_ERROR; 1206 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1207 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL && 1208 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1209 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1210 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1211 // titlecase first word of currentResult, here use sentence iterator unlike current implementations 1212 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format 1213 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 1214 } 1215 } 1216 #endif 1217 return currentResult; 1218 } 1219 1220 1221 void 1222 RuleBasedNumberFormat::parse(const UnicodeString& text, 1223 Formattable& result, 1224 ParsePosition& parsePosition) const 1225 { 1226 if (!ruleSets) { 1227 parsePosition.setErrorIndex(0); 1228 return; 1229 } 1230 1231 UnicodeString workingText(text, parsePosition.getIndex()); 1232 ParsePosition workingPos(0); 1233 1234 ParsePosition high_pp(0); 1235 Formattable high_result; 1236 1237 for (NFRuleSet** p = ruleSets; *p; ++p) { 1238 NFRuleSet *rp = *p; 1239 if (rp->isPublic() && rp->isParseable()) { 1240 ParsePosition working_pp(0); 1241 Formattable working_result; 1242 1243 rp->parse(workingText, working_pp, kMaxDouble, working_result); 1244 if (working_pp.getIndex() > high_pp.getIndex()) { 1245 high_pp = working_pp; 1246 high_result = working_result; 1247 1248 if (high_pp.getIndex() == workingText.length()) { 1249 break; 1250 } 1251 } 1252 } 1253 } 1254 1255 int32_t startIndex = parsePosition.getIndex(); 1256 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1257 if (high_pp.getIndex() > 0) { 1258 parsePosition.setErrorIndex(-1); 1259 } else { 1260 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1261 parsePosition.setErrorIndex(startIndex + errorIndex); 1262 } 1263 result = high_result; 1264 if (result.getType() == Formattable::kDouble) { 1265 double d = result.getDouble(); 1266 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) { 1267 // Note: casting a double to an int when the double is too large or small 1268 // to fit the destination is undefined behavior. The explicit range checks, 1269 // above, are required. Just casting and checking the result value is undefined. 1270 result.setLong(static_cast<int32_t>(d)); 1271 } 1272 } 1273 } 1274 1275 #if !UCONFIG_NO_COLLATION 1276 1277 void 1278 RuleBasedNumberFormat::setLenient(UBool enabled) 1279 { 1280 lenient = enabled; 1281 if (!enabled && collator) { 1282 delete collator; 1283 collator = NULL; 1284 } 1285 } 1286 1287 #endif 1288 1289 void 1290 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1291 if (U_SUCCESS(status)) { 1292 if (ruleSetName.isEmpty()) { 1293 if (localizations) { 1294 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1295 defaultRuleSet = findRuleSet(name, status); 1296 } else { 1297 initDefaultRuleSet(); 1298 } 1299 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1300 status = U_ILLEGAL_ARGUMENT_ERROR; 1301 } else { 1302 NFRuleSet* result = findRuleSet(ruleSetName, status); 1303 if (result != NULL) { 1304 defaultRuleSet = result; 1305 } 1306 } 1307 } 1308 } 1309 1310 UnicodeString 1311 RuleBasedNumberFormat::getDefaultRuleSetName() const { 1312 UnicodeString result; 1313 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1314 defaultRuleSet->getName(result); 1315 } else { 1316 result.setToBogus(); 1317 } 1318 return result; 1319 } 1320 1321 void 1322 RuleBasedNumberFormat::initDefaultRuleSet() 1323 { 1324 defaultRuleSet = NULL; 1325 if (!ruleSets) { 1326 return; 1327 } 1328 1329 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering")); 1330 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal")); 1331 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration")); 1332 1333 NFRuleSet**p = &ruleSets[0]; 1334 while (*p) { 1335 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1336 defaultRuleSet = *p; 1337 return; 1338 } else { 1339 ++p; 1340 } 1341 } 1342 1343 defaultRuleSet = *--p; 1344 if (!defaultRuleSet->isPublic()) { 1345 while (p != ruleSets) { 1346 if ((*--p)->isPublic()) { 1347 defaultRuleSet = *p; 1348 break; 1349 } 1350 } 1351 } 1352 } 1353 1354 1355 void 1356 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1357 UParseError& pErr, UErrorCode& status) 1358 { 1359 // TODO: implement UParseError 1360 uprv_memset(&pErr, 0, sizeof(UParseError)); 1361 // Note: this can leave ruleSets == NULL, so remaining code should check 1362 if (U_FAILURE(status)) { 1363 return; 1364 } 1365 1366 initializeDecimalFormatSymbols(status); 1367 initializeDefaultInfinityRule(status); 1368 initializeDefaultNaNRule(status); 1369 if (U_FAILURE(status)) { 1370 return; 1371 } 1372 1373 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1374 1375 UnicodeString description(rules); 1376 if (!description.length()) { 1377 status = U_MEMORY_ALLOCATION_ERROR; 1378 return; 1379 } 1380 1381 // start by stripping the trailing whitespace from all the rules 1382 // (this is all the whitespace follwing each semicolon in the 1383 // description). This allows us to look for rule-set boundaries 1384 // by searching for ";%" without having to worry about whitespace 1385 // between the ; and the % 1386 stripWhitespace(description); 1387 1388 // check to see if there's a set of lenient-parse rules. If there 1389 // is, pull them out into our temporary holding place for them, 1390 // and delete them from the description before the real desciption- 1391 // parsing code sees them 1392 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1393 if (lp != -1) { 1394 // we've got to make sure we're not in the middle of a rule 1395 // (where "%%lenient-parse" would actually get treated as 1396 // rule text) 1397 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1398 // locate the beginning and end of the actual collation 1399 // rules (there may be whitespace between the name and 1400 // the first token in the description) 1401 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1402 1403 if (lpEnd == -1) { 1404 lpEnd = description.length() - 1; 1405 } 1406 int lpStart = lp + u_strlen(gLenientParse); 1407 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1408 ++lpStart; 1409 } 1410 1411 // copy out the lenient-parse rules and delete them 1412 // from the description 1413 lenientParseRules = new UnicodeString(); 1414 /* test for NULL */ 1415 if (lenientParseRules == 0) { 1416 status = U_MEMORY_ALLOCATION_ERROR; 1417 return; 1418 } 1419 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1420 1421 description.remove(lp, lpEnd + 1 - lp); 1422 } 1423 } 1424 1425 // pre-flight parsing the description and count the number of 1426 // rule sets (";%" marks the end of one rule set and the beginning 1427 // of the next) 1428 numRuleSets = 0; 1429 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1430 ++numRuleSets; 1431 ++p; 1432 } 1433 ++numRuleSets; 1434 1435 // our rule list is an array of the appropriate size 1436 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1437 /* test for NULL */ 1438 if (ruleSets == 0) { 1439 status = U_MEMORY_ALLOCATION_ERROR; 1440 return; 1441 } 1442 1443 for (int i = 0; i <= numRuleSets; ++i) { 1444 ruleSets[i] = NULL; 1445 } 1446 1447 // divide up the descriptions into individual rule-set descriptions 1448 // and store them in a temporary array. At each step, we also 1449 // new up a rule set, but all this does is initialize its name 1450 // and remove it from its description. We can't actually parse 1451 // the rest of the descriptions and finish initializing everything 1452 // because we have to know the names and locations of all the rule 1453 // sets before we can actually set everything up 1454 if(!numRuleSets) { 1455 status = U_ILLEGAL_ARGUMENT_ERROR; 1456 return; 1457 } 1458 1459 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1460 if (ruleSetDescriptions == 0) { 1461 status = U_MEMORY_ALLOCATION_ERROR; 1462 return; 1463 } 1464 1465 { 1466 int curRuleSet = 0; 1467 int32_t start = 0; 1468 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1469 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1470 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1471 if (ruleSets[curRuleSet] == 0) { 1472 status = U_MEMORY_ALLOCATION_ERROR; 1473 return; 1474 } 1475 ++curRuleSet; 1476 start = p + 1; 1477 } 1478 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1479 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1480 if (ruleSets[curRuleSet] == 0) { 1481 status = U_MEMORY_ALLOCATION_ERROR; 1482 return; 1483 } 1484 } 1485 1486 // now we can take note of the formatter's default rule set, which 1487 // is the last public rule set in the description (it's the last 1488 // rather than the first so that a user can create a new formatter 1489 // from an existing formatter and change its default behavior just 1490 // by appending more rule sets to the end) 1491 1492 // {dlf} Initialization of a fraction rule set requires the default rule 1493 // set to be known. For purposes of initialization, this is always the 1494 // last public rule set, no matter what the localization data says. 1495 initDefaultRuleSet(); 1496 1497 // finally, we can go back through the temporary descriptions 1498 // list and finish seting up the substructure (and we throw 1499 // away the temporary descriptions as we go) 1500 { 1501 for (int i = 0; i < numRuleSets; i++) { 1502 ruleSets[i]->parseRules(ruleSetDescriptions[i], status); 1503 } 1504 } 1505 1506 // Now that the rules are initialized, the 'real' default rule 1507 // set can be adjusted by the localization data. 1508 1509 // The C code keeps the localization array as is, rather than building 1510 // a separate array of the public rule set names, so we have less work 1511 // to do here-- but we still need to check the names. 1512 1513 if (localizationInfos) { 1514 // confirm the names, if any aren't in the rules, that's an error 1515 // it is ok if the rules contain public rule sets that are not in this list 1516 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1517 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1518 NFRuleSet* rs = findRuleSet(name, status); 1519 if (rs == NULL) { 1520 break; // error 1521 } 1522 if (i == 0) { 1523 defaultRuleSet = rs; 1524 } 1525 } 1526 } else { 1527 defaultRuleSet = getDefaultRuleSet(); 1528 } 1529 originalDescription = rules; 1530 } 1531 1532 // override the NumberFormat implementation in order to 1533 // lazily initialize relevant items 1534 void 1535 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) 1536 { 1537 NumberFormat::setContext(value, status); 1538 if (U_SUCCESS(status)) { 1539 if (!capitalizationInfoSet && 1540 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { 1541 initCapitalizationContextInfo(locale); 1542 capitalizationInfoSet = TRUE; 1543 } 1544 #if !UCONFIG_NO_BREAK_ITERATION 1545 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1546 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1547 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1548 UErrorCode status = U_ZERO_ERROR; 1549 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); 1550 if (U_FAILURE(status)) { 1551 delete capitalizationBrkIter; 1552 capitalizationBrkIter = NULL; 1553 } 1554 } 1555 #endif 1556 } 1557 } 1558 1559 void 1560 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) 1561 { 1562 #if !UCONFIG_NO_BREAK_ITERATION 1563 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL; 1564 UErrorCode status = U_ZERO_ERROR; 1565 UResourceBundle *rb = ures_open(NULL, localeID, &status); 1566 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); 1567 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status); 1568 if (U_SUCCESS(status) && rb != NULL) { 1569 int32_t len = 0; 1570 const int32_t * intVector = ures_getIntVector(rb, &len, &status); 1571 if (U_SUCCESS(status) && intVector != NULL && len >= 2) { 1572 capitalizationForUIListMenu = intVector[0]; 1573 capitalizationForStandAlone = intVector[1]; 1574 } 1575 } 1576 ures_close(rb); 1577 #endif 1578 } 1579 1580 void 1581 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1582 { 1583 // iterate through the characters... 1584 UnicodeString result; 1585 1586 int start = 0; 1587 while (start != -1 && start < description.length()) { 1588 // seek to the first non-whitespace character... 1589 while (start < description.length() 1590 && PatternProps::isWhiteSpace(description.charAt(start))) { 1591 ++start; 1592 } 1593 1594 // locate the next semicolon in the text and copy the text from 1595 // our current position up to that semicolon into the result 1596 int32_t p = description.indexOf(gSemiColon, start); 1597 if (p == -1) { 1598 // or if we don't find a semicolon, just copy the rest of 1599 // the string into the result 1600 result.append(description, start, description.length() - start); 1601 start = -1; 1602 } 1603 else if (p < description.length()) { 1604 result.append(description, start, p + 1 - start); 1605 start = p + 1; 1606 } 1607 1608 // when we get here, we've seeked off the end of the sring, and 1609 // we terminate the loop (we continue until *start* is -1 rather 1610 // than until *p* is -1, because otherwise we'd miss the last 1611 // rule in the description) 1612 else { 1613 start = -1; 1614 } 1615 } 1616 1617 description.setTo(result); 1618 } 1619 1620 1621 void 1622 RuleBasedNumberFormat::dispose() 1623 { 1624 if (ruleSets) { 1625 for (NFRuleSet** p = ruleSets; *p; ++p) { 1626 delete *p; 1627 } 1628 uprv_free(ruleSets); 1629 ruleSets = NULL; 1630 } 1631 1632 if (ruleSetDescriptions) { 1633 delete [] ruleSetDescriptions; 1634 ruleSetDescriptions = NULL; 1635 } 1636 1637 #if !UCONFIG_NO_COLLATION 1638 delete collator; 1639 #endif 1640 collator = NULL; 1641 1642 delete decimalFormatSymbols; 1643 decimalFormatSymbols = NULL; 1644 1645 delete defaultInfinityRule; 1646 defaultInfinityRule = NULL; 1647 1648 delete defaultNaNRule; 1649 defaultNaNRule = NULL; 1650 1651 delete lenientParseRules; 1652 lenientParseRules = NULL; 1653 1654 #if !UCONFIG_NO_BREAK_ITERATION 1655 delete capitalizationBrkIter; 1656 capitalizationBrkIter = NULL; 1657 #endif 1658 1659 if (localizations) { 1660 localizations = localizations->unref(); 1661 } 1662 } 1663 1664 1665 //----------------------------------------------------------------------- 1666 // package-internal API 1667 //----------------------------------------------------------------------- 1668 1669 /** 1670 * Returns the collator to use for lenient parsing. The collator is lazily created: 1671 * this function creates it the first time it's called. 1672 * @return The collator to use for lenient parsing, or null if lenient parsing 1673 * is turned off. 1674 */ 1675 const RuleBasedCollator* 1676 RuleBasedNumberFormat::getCollator() const 1677 { 1678 #if !UCONFIG_NO_COLLATION 1679 if (!ruleSets) { 1680 return NULL; 1681 } 1682 1683 // lazy-evaluate the collator 1684 if (collator == NULL && lenient) { 1685 // create a default collator based on the formatter's locale, 1686 // then pull out that collator's rules, append any additional 1687 // rules specified in the description, and create a _new_ 1688 // collator based on the combinaiton of those rules 1689 1690 UErrorCode status = U_ZERO_ERROR; 1691 1692 Collator* temp = Collator::createInstance(locale, status); 1693 RuleBasedCollator* newCollator; 1694 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { 1695 if (lenientParseRules) { 1696 UnicodeString rules(newCollator->getRules()); 1697 rules.append(*lenientParseRules); 1698 1699 newCollator = new RuleBasedCollator(rules, status); 1700 // Exit if newCollator could not be created. 1701 if (newCollator == NULL) { 1702 return NULL; 1703 } 1704 } else { 1705 temp = NULL; 1706 } 1707 if (U_SUCCESS(status)) { 1708 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1709 // cast away const 1710 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1711 } else { 1712 delete newCollator; 1713 } 1714 } 1715 delete temp; 1716 } 1717 #endif 1718 1719 // if lenient-parse mode is off, this will be null 1720 // (see setLenientParseMode()) 1721 return collator; 1722 } 1723 1724 1725 DecimalFormatSymbols* 1726 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status) 1727 { 1728 // lazy-evaluate the DecimalFormatSymbols object. This object 1729 // is shared by all DecimalFormat instances belonging to this 1730 // formatter 1731 if (decimalFormatSymbols == NULL) { 1732 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); 1733 if (U_SUCCESS(status)) { 1734 decimalFormatSymbols = temp; 1735 } 1736 else { 1737 delete temp; 1738 } 1739 } 1740 return decimalFormatSymbols; 1741 } 1742 1743 /** 1744 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1745 * instances owned by this formatter. 1746 */ 1747 const DecimalFormatSymbols* 1748 RuleBasedNumberFormat::getDecimalFormatSymbols() const 1749 { 1750 return decimalFormatSymbols; 1751 } 1752 1753 NFRule* 1754 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status) 1755 { 1756 if (U_FAILURE(status)) { 1757 return NULL; 1758 } 1759 if (defaultInfinityRule == NULL) { 1760 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: ")); 1761 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol)); 1762 NFRule* temp = new NFRule(this, rule, status); 1763 if (U_SUCCESS(status)) { 1764 defaultInfinityRule = temp; 1765 } 1766 else { 1767 delete temp; 1768 } 1769 } 1770 return defaultInfinityRule; 1771 } 1772 1773 const NFRule* 1774 RuleBasedNumberFormat::getDefaultInfinityRule() const 1775 { 1776 return defaultInfinityRule; 1777 } 1778 1779 NFRule* 1780 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status) 1781 { 1782 if (U_FAILURE(status)) { 1783 return NULL; 1784 } 1785 if (defaultNaNRule == NULL) { 1786 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: ")); 1787 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol)); 1788 NFRule* temp = new NFRule(this, rule, status); 1789 if (U_SUCCESS(status)) { 1790 defaultNaNRule = temp; 1791 } 1792 else { 1793 delete temp; 1794 } 1795 } 1796 return defaultNaNRule; 1797 } 1798 1799 const NFRule* 1800 RuleBasedNumberFormat::getDefaultNaNRule() const 1801 { 1802 return defaultNaNRule; 1803 } 1804 1805 // De-owning the current localized symbols and adopt the new symbols. 1806 void 1807 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1808 { 1809 if (symbolsToAdopt == NULL) { 1810 return; // do not allow caller to set decimalFormatSymbols to NULL 1811 } 1812 1813 if (decimalFormatSymbols != NULL) { 1814 delete decimalFormatSymbols; 1815 } 1816 1817 decimalFormatSymbols = symbolsToAdopt; 1818 1819 { 1820 // Apply the new decimalFormatSymbols by reparsing the rulesets 1821 UErrorCode status = U_ZERO_ERROR; 1822 1823 delete defaultInfinityRule; 1824 defaultInfinityRule = NULL; 1825 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols 1826 1827 delete defaultNaNRule; 1828 defaultNaNRule = NULL; 1829 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols 1830 1831 if (ruleSets) { 1832 for (int32_t i = 0; i < numRuleSets; i++) { 1833 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status); 1834 } 1835 } 1836 } 1837 } 1838 1839 // Setting the symbols is equlivalent to adopting a newly created localized symbols. 1840 void 1841 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1842 { 1843 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1844 } 1845 1846 PluralFormat * 1847 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, 1848 const UnicodeString &pattern, 1849 UErrorCode& status) const 1850 { 1851 return new PluralFormat(locale, pluralType, pattern, status); 1852 } 1853 1854 U_NAMESPACE_END 1855 1856 /* U_HAVE_RBNF */ 1857 #endif 1858