1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2015, International Business Machines Corporation 6 * and others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 #include "unicode/utypes.h" 11 #include "utypeinfo.h" // for 'typeid' to work 12 13 #include "unicode/rbnf.h" 14 15 #if U_HAVE_RBNF 16 17 #include "unicode/normlzr.h" 18 #include "unicode/plurfmt.h" 19 #include "unicode/tblcoll.h" 20 #include "unicode/uchar.h" 21 #include "unicode/ucol.h" 22 #include "unicode/uloc.h" 23 #include "unicode/unum.h" 24 #include "unicode/ures.h" 25 #include "unicode/ustring.h" 26 #include "unicode/utf16.h" 27 #include "unicode/udata.h" 28 #include "unicode/udisplaycontext.h" 29 #include "unicode/brkiter.h" 30 #include "unicode/ucasemap.h" 31 32 #include "cmemory.h" 33 #include "cstring.h" 34 #include "patternprops.h" 35 #include "uresimp.h" 36 #include "nfrs.h" 37 #include "number_decimalquantity.h" 38 39 // debugging 40 // #define RBNF_DEBUG 41 42 #ifdef RBNF_DEBUG 43 #include <stdio.h> 44 #endif 45 46 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 47 48 static const UChar gPercentPercent[] = 49 { 50 0x25, 0x25, 0 51 }; /* "%%" */ 52 53 // All urbnf objects are created through openRules, so we init all of the 54 // Unicode string constants required by rbnf, nfrs, or nfr here. 55 static const UChar gLenientParse[] = 56 { 57 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 58 }; /* "%%lenient-parse:" */ 59 static const UChar gSemiColon = 0x003B; 60 static const UChar gSemiPercent[] = 61 { 62 0x3B, 0x25, 0 63 }; /* ";%" */ 64 65 #define kSomeNumberOfBitsDiv2 22 66 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 67 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 68 69 U_NAMESPACE_BEGIN 70 71 using number::impl::DecimalQuantity; 72 73 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 74 75 /* 76 This is a utility class. It does not use ICU's RTTI. 77 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 78 Please make sure that intltest passes on Windows in Release mode, 79 since the string pooling per compilation unit will mess up how RTTI works. 80 The RTTI code was also removed due to lack of code coverage. 81 */ 82 class LocalizationInfo : public UMemory { 83 protected: 84 virtual ~LocalizationInfo(); 85 uint32_t refcount; 86 87 public: 88 LocalizationInfo() : refcount(0) {} 89 90 LocalizationInfo* ref(void) { 91 ++refcount; 92 return this; 93 } 94 95 LocalizationInfo* unref(void) { 96 if (refcount && --refcount == 0) { 97 delete this; 98 } 99 return NULL; 100 } 101 102 virtual UBool operator==(const LocalizationInfo* rhs) const; 103 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 104 105 virtual int32_t getNumberOfRuleSets(void) const = 0; 106 virtual const UChar* getRuleSetName(int32_t index) const = 0; 107 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 108 virtual const UChar* getLocaleName(int32_t index) const = 0; 109 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 110 111 virtual int32_t indexForLocale(const UChar* locale) const; 112 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 113 114 // virtual UClassID getDynamicClassID() const = 0; 115 // static UClassID getStaticClassID(void); 116 }; 117 118 LocalizationInfo::~LocalizationInfo() {} 119 120 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 121 122 // if both strings are NULL, this returns TRUE 123 static UBool 124 streq(const UChar* lhs, const UChar* rhs) { 125 if (rhs == lhs) { 126 return TRUE; 127 } 128 if (lhs && rhs) { 129 return u_strcmp(lhs, rhs) == 0; 130 } 131 return FALSE; 132 } 133 134 UBool 135 LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 136 if (rhs) { 137 if (this == rhs) { 138 return TRUE; 139 } 140 141 int32_t rsc = getNumberOfRuleSets(); 142 if (rsc == rhs->getNumberOfRuleSets()) { 143 for (int i = 0; i < rsc; ++i) { 144 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 145 return FALSE; 146 } 147 } 148 int32_t dlc = getNumberOfDisplayLocales(); 149 if (dlc == rhs->getNumberOfDisplayLocales()) { 150 for (int i = 0; i < dlc; ++i) { 151 const UChar* locale = getLocaleName(i); 152 int32_t ix = rhs->indexForLocale(locale); 153 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 154 if (!streq(locale, rhs->getLocaleName(ix))) { 155 return FALSE; 156 } 157 for (int j = 0; j < rsc; ++j) { 158 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 159 return FALSE; 160 } 161 } 162 } 163 return TRUE; 164 } 165 } 166 } 167 return FALSE; 168 } 169 170 int32_t 171 LocalizationInfo::indexForLocale(const UChar* locale) const { 172 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 173 if (streq(locale, getLocaleName(i))) { 174 return i; 175 } 176 } 177 return -1; 178 } 179 180 int32_t 181 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 182 if (ruleset) { 183 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 184 if (streq(ruleset, getRuleSetName(i))) { 185 return i; 186 } 187 } 188 } 189 return -1; 190 } 191 192 193 typedef void (*Fn_Deleter)(void*); 194 195 class VArray { 196 void** buf; 197 int32_t cap; 198 int32_t size; 199 Fn_Deleter deleter; 200 public: 201 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 202 203 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 204 205 ~VArray() { 206 if (deleter) { 207 for (int i = 0; i < size; ++i) { 208 (*deleter)(buf[i]); 209 } 210 } 211 uprv_free(buf); 212 } 213 214 int32_t length() { 215 return size; 216 } 217 218 void add(void* elem, UErrorCode& status) { 219 if (U_SUCCESS(status)) { 220 if (size == cap) { 221 if (cap == 0) { 222 cap = 1; 223 } else if (cap < 256) { 224 cap *= 2; 225 } else { 226 cap += 256; 227 } 228 if (buf == NULL) { 229 buf = (void**)uprv_malloc(cap * sizeof(void*)); 230 } else { 231 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 232 } 233 if (buf == NULL) { 234 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 235 status = U_MEMORY_ALLOCATION_ERROR; 236 return; 237 } 238 void* start = &buf[size]; 239 size_t count = (cap - size) * sizeof(void*); 240 uprv_memset(start, 0, count); // fill with nulls, just because 241 } 242 buf[size++] = elem; 243 } 244 } 245 246 void** release(void) { 247 void** result = buf; 248 buf = NULL; 249 cap = 0; 250 size = 0; 251 return result; 252 } 253 }; 254 255 class LocDataParser; 256 257 class StringLocalizationInfo : public LocalizationInfo { 258 UChar* info; 259 UChar*** data; 260 int32_t numRuleSets; 261 int32_t numLocales; 262 263 friend class LocDataParser; 264 265 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 266 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 267 { 268 } 269 270 public: 271 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 272 273 virtual ~StringLocalizationInfo(); 274 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 275 virtual const UChar* getRuleSetName(int32_t index) const; 276 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 277 virtual const UChar* getLocaleName(int32_t index) const; 278 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 279 280 // virtual UClassID getDynamicClassID() const; 281 // static UClassID getStaticClassID(void); 282 283 private: 284 void init(UErrorCode& status) const; 285 }; 286 287 288 enum { 289 OPEN_ANGLE = 0x003c, /* '<' */ 290 CLOSE_ANGLE = 0x003e, /* '>' */ 291 COMMA = 0x002c, 292 TICK = 0x0027, 293 QUOTE = 0x0022, 294 SPACE = 0x0020 295 }; 296 297 /** 298 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 299 */ 300 class LocDataParser { 301 UChar* data; 302 const UChar* e; 303 UChar* p; 304 UChar ch; 305 UParseError& pe; 306 UErrorCode& ec; 307 308 public: 309 LocDataParser(UParseError& parseError, UErrorCode& status) 310 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 311 ~LocDataParser() {} 312 313 /* 314 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 315 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 316 */ 317 StringLocalizationInfo* parse(UChar* data, int32_t len); 318 319 private: 320 321 inline void inc(void) { 322 ++p; 323 ch = 0xffff; 324 } 325 inline UBool checkInc(UChar c) { 326 if (p < e && (ch == c || *p == c)) { 327 inc(); 328 return TRUE; 329 } 330 return FALSE; 331 } 332 inline UBool check(UChar c) { 333 return p < e && (ch == c || *p == c); 334 } 335 inline void skipWhitespace(void) { 336 while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) { 337 inc(); 338 } 339 } 340 inline UBool inList(UChar c, const UChar* list) const { 341 if (*list == SPACE && PatternProps::isWhiteSpace(c)) { 342 return TRUE; 343 } 344 while (*list && *list != c) { 345 ++list; 346 } 347 return *list == c; 348 } 349 void parseError(const char* msg); 350 351 StringLocalizationInfo* doParse(void); 352 353 UChar** nextArray(int32_t& requiredLength); 354 UChar* nextString(void); 355 }; 356 357 #ifdef RBNF_DEBUG 358 #define ERROR(msg) parseError(msg); return NULL; 359 #define EXPLANATION_ARG explanationArg 360 #else 361 #define ERROR(msg) parseError(NULL); return NULL; 362 #define EXPLANATION_ARG 363 #endif 364 365 366 static const UChar DQUOTE_STOPLIST[] = { 367 QUOTE, 0 368 }; 369 370 static const UChar SQUOTE_STOPLIST[] = { 371 TICK, 0 372 }; 373 374 static const UChar NOQUOTE_STOPLIST[] = { 375 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 376 }; 377 378 static void 379 DeleteFn(void* p) { 380 uprv_free(p); 381 } 382 383 StringLocalizationInfo* 384 LocDataParser::parse(UChar* _data, int32_t len) { 385 if (U_FAILURE(ec)) { 386 if (_data) uprv_free(_data); 387 return NULL; 388 } 389 390 pe.line = 0; 391 pe.offset = -1; 392 pe.postContext[0] = 0; 393 pe.preContext[0] = 0; 394 395 if (_data == NULL) { 396 ec = U_ILLEGAL_ARGUMENT_ERROR; 397 return NULL; 398 } 399 400 if (len <= 0) { 401 ec = U_ILLEGAL_ARGUMENT_ERROR; 402 uprv_free(_data); 403 return NULL; 404 } 405 406 data = _data; 407 e = data + len; 408 p = _data; 409 ch = 0xffff; 410 411 return doParse(); 412 } 413 414 415 StringLocalizationInfo* 416 LocDataParser::doParse(void) { 417 skipWhitespace(); 418 if (!checkInc(OPEN_ANGLE)) { 419 ERROR("Missing open angle"); 420 } else { 421 VArray array(DeleteFn); 422 UBool mightHaveNext = TRUE; 423 int32_t requiredLength = -1; 424 while (mightHaveNext) { 425 mightHaveNext = FALSE; 426 UChar** elem = nextArray(requiredLength); 427 skipWhitespace(); 428 UBool haveComma = check(COMMA); 429 if (elem) { 430 array.add(elem, ec); 431 if (haveComma) { 432 inc(); 433 mightHaveNext = TRUE; 434 } 435 } else if (haveComma) { 436 ERROR("Unexpected character"); 437 } 438 } 439 440 skipWhitespace(); 441 if (!checkInc(CLOSE_ANGLE)) { 442 if (check(OPEN_ANGLE)) { 443 ERROR("Missing comma in outer array"); 444 } else { 445 ERROR("Missing close angle bracket in outer array"); 446 } 447 } 448 449 skipWhitespace(); 450 if (p != e) { 451 ERROR("Extra text after close of localization data"); 452 } 453 454 array.add(NULL, ec); 455 if (U_SUCCESS(ec)) { 456 int32_t numLocs = array.length() - 2; // subtract first, NULL 457 UChar*** result = (UChar***)array.release(); 458 459 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 460 } 461 } 462 463 ERROR("Unknown error"); 464 } 465 466 UChar** 467 LocDataParser::nextArray(int32_t& requiredLength) { 468 if (U_FAILURE(ec)) { 469 return NULL; 470 } 471 472 skipWhitespace(); 473 if (!checkInc(OPEN_ANGLE)) { 474 ERROR("Missing open angle"); 475 } 476 477 VArray array; 478 UBool mightHaveNext = TRUE; 479 while (mightHaveNext) { 480 mightHaveNext = FALSE; 481 UChar* elem = nextString(); 482 skipWhitespace(); 483 UBool haveComma = check(COMMA); 484 if (elem) { 485 array.add(elem, ec); 486 if (haveComma) { 487 inc(); 488 mightHaveNext = TRUE; 489 } 490 } else if (haveComma) { 491 ERROR("Unexpected comma"); 492 } 493 } 494 skipWhitespace(); 495 if (!checkInc(CLOSE_ANGLE)) { 496 if (check(OPEN_ANGLE)) { 497 ERROR("Missing close angle bracket in inner array"); 498 } else { 499 ERROR("Missing comma in inner array"); 500 } 501 } 502 503 array.add(NULL, ec); 504 if (U_SUCCESS(ec)) { 505 if (requiredLength == -1) { 506 requiredLength = array.length() + 1; 507 } else if (array.length() != requiredLength) { 508 ec = U_ILLEGAL_ARGUMENT_ERROR; 509 ERROR("Array not of required length"); 510 } 511 512 return (UChar**)array.release(); 513 } 514 ERROR("Unknown Error"); 515 } 516 517 UChar* 518 LocDataParser::nextString() { 519 UChar* result = NULL; 520 521 skipWhitespace(); 522 if (p < e) { 523 const UChar* terminators; 524 UChar c = *p; 525 UBool haveQuote = c == QUOTE || c == TICK; 526 if (haveQuote) { 527 inc(); 528 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 529 } else { 530 terminators = NOQUOTE_STOPLIST; 531 } 532 UChar* start = p; 533 while (p < e && !inList(*p, terminators)) ++p; 534 if (p == e) { 535 ERROR("Unexpected end of data"); 536 } 537 538 UChar x = *p; 539 if (p > start) { 540 ch = x; 541 *p = 0x0; // terminate by writing to data 542 result = start; // just point into data 543 } 544 if (haveQuote) { 545 if (x != c) { 546 ERROR("Missing matching quote"); 547 } else if (p == start) { 548 ERROR("Empty string"); 549 } 550 inc(); 551 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 552 ERROR("Unexpected character in string"); 553 } 554 } 555 556 // ok for there to be no next string 557 return result; 558 } 559 560 void LocDataParser::parseError(const char* EXPLANATION_ARG) 561 { 562 if (!data) { 563 return; 564 } 565 566 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 567 if (start < data) { 568 start = data; 569 } 570 for (UChar* x = p; --x >= start;) { 571 if (!*x) { 572 start = x+1; 573 break; 574 } 575 } 576 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 577 if (limit > e) { 578 limit = e; 579 } 580 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 581 pe.preContext[p-start] = 0; 582 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 583 pe.postContext[limit-p] = 0; 584 pe.offset = (int32_t)(p - data); 585 586 #ifdef RBNF_DEBUG 587 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data); 588 589 UnicodeString msg; 590 msg.append(start, p - start); 591 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 592 msg.append(p, limit-p); 593 msg.append(UNICODE_STRING_SIMPLE("'")); 594 595 char buf[128]; 596 int32_t len = msg.extract(0, msg.length(), buf, 128); 597 if (len >= 128) { 598 buf[127] = 0; 599 } else { 600 buf[len] = 0; 601 } 602 fprintf(stderr, "%s\n", buf); 603 fflush(stderr); 604 #endif 605 606 uprv_free(data); 607 data = NULL; 608 p = NULL; 609 e = NULL; 610 611 if (U_SUCCESS(ec)) { 612 ec = U_PARSE_ERROR; 613 } 614 } 615 616 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 617 618 StringLocalizationInfo* 619 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 620 if (U_FAILURE(status)) { 621 return NULL; 622 } 623 624 int32_t len = info.length(); 625 if (len == 0) { 626 return NULL; // no error; 627 } 628 629 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 630 if (!p) { 631 status = U_MEMORY_ALLOCATION_ERROR; 632 return NULL; 633 } 634 info.extract(p, len, status); 635 if (!U_FAILURE(status)) { 636 status = U_ZERO_ERROR; // clear warning about non-termination 637 } 638 639 LocDataParser parser(perror, status); 640 return parser.parse(p, len); 641 } 642 643 StringLocalizationInfo::~StringLocalizationInfo() { 644 for (UChar*** p = (UChar***)data; *p; ++p) { 645 // remaining data is simply pointer into our unicode string data. 646 if (*p) uprv_free(*p); 647 } 648 if (data) uprv_free(data); 649 if (info) uprv_free(info); 650 } 651 652 653 const UChar* 654 StringLocalizationInfo::getRuleSetName(int32_t index) const { 655 if (index >= 0 && index < getNumberOfRuleSets()) { 656 return data[0][index]; 657 } 658 return NULL; 659 } 660 661 const UChar* 662 StringLocalizationInfo::getLocaleName(int32_t index) const { 663 if (index >= 0 && index < getNumberOfDisplayLocales()) { 664 return data[index+1][0]; 665 } 666 return NULL; 667 } 668 669 const UChar* 670 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 671 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 672 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 673 return data[localeIndex+1][ruleIndex+1]; 674 } 675 return NULL; 676 } 677 678 // ---------- 679 680 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 681 const UnicodeString& locs, 682 const Locale& alocale, UParseError& perror, UErrorCode& status) 683 : fRuleSets(NULL) 684 , ruleSetDescriptions(NULL) 685 , numRuleSets(0) 686 , defaultRuleSet(NULL) 687 , locale(alocale) 688 , collator(NULL) 689 , decimalFormatSymbols(NULL) 690 , defaultInfinityRule(NULL) 691 , defaultNaNRule(NULL) 692 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 693 , lenient(FALSE) 694 , lenientParseRules(NULL) 695 , localizations(NULL) 696 , capitalizationInfoSet(FALSE) 697 , capitalizationForUIListMenu(FALSE) 698 , capitalizationForStandAlone(FALSE) 699 , capitalizationBrkIter(NULL) 700 { 701 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 702 init(description, locinfo, perror, status); 703 } 704 705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 706 const UnicodeString& locs, 707 UParseError& perror, UErrorCode& status) 708 : fRuleSets(NULL) 709 , ruleSetDescriptions(NULL) 710 , numRuleSets(0) 711 , defaultRuleSet(NULL) 712 , locale(Locale::getDefault()) 713 , collator(NULL) 714 , decimalFormatSymbols(NULL) 715 , defaultInfinityRule(NULL) 716 , defaultNaNRule(NULL) 717 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 718 , lenient(FALSE) 719 , lenientParseRules(NULL) 720 , localizations(NULL) 721 , capitalizationInfoSet(FALSE) 722 , capitalizationForUIListMenu(FALSE) 723 , capitalizationForStandAlone(FALSE) 724 , capitalizationBrkIter(NULL) 725 { 726 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 727 init(description, locinfo, perror, status); 728 } 729 730 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 731 LocalizationInfo* info, 732 const Locale& alocale, UParseError& perror, UErrorCode& status) 733 : fRuleSets(NULL) 734 , ruleSetDescriptions(NULL) 735 , numRuleSets(0) 736 , defaultRuleSet(NULL) 737 , locale(alocale) 738 , collator(NULL) 739 , decimalFormatSymbols(NULL) 740 , defaultInfinityRule(NULL) 741 , defaultNaNRule(NULL) 742 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 743 , lenient(FALSE) 744 , lenientParseRules(NULL) 745 , localizations(NULL) 746 , capitalizationInfoSet(FALSE) 747 , capitalizationForUIListMenu(FALSE) 748 , capitalizationForStandAlone(FALSE) 749 , capitalizationBrkIter(NULL) 750 { 751 init(description, info, perror, status); 752 } 753 754 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 755 UParseError& perror, 756 UErrorCode& status) 757 : fRuleSets(NULL) 758 , ruleSetDescriptions(NULL) 759 , numRuleSets(0) 760 , defaultRuleSet(NULL) 761 , locale(Locale::getDefault()) 762 , collator(NULL) 763 , decimalFormatSymbols(NULL) 764 , defaultInfinityRule(NULL) 765 , defaultNaNRule(NULL) 766 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 767 , lenient(FALSE) 768 , lenientParseRules(NULL) 769 , localizations(NULL) 770 , capitalizationInfoSet(FALSE) 771 , capitalizationForUIListMenu(FALSE) 772 , capitalizationForStandAlone(FALSE) 773 , capitalizationBrkIter(NULL) 774 { 775 init(description, NULL, perror, status); 776 } 777 778 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 779 const Locale& aLocale, 780 UParseError& perror, 781 UErrorCode& status) 782 : fRuleSets(NULL) 783 , ruleSetDescriptions(NULL) 784 , numRuleSets(0) 785 , defaultRuleSet(NULL) 786 , locale(aLocale) 787 , collator(NULL) 788 , decimalFormatSymbols(NULL) 789 , defaultInfinityRule(NULL) 790 , defaultNaNRule(NULL) 791 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 792 , lenient(FALSE) 793 , lenientParseRules(NULL) 794 , localizations(NULL) 795 , capitalizationInfoSet(FALSE) 796 , capitalizationForUIListMenu(FALSE) 797 , capitalizationForStandAlone(FALSE) 798 , capitalizationBrkIter(NULL) 799 { 800 init(description, NULL, perror, status); 801 } 802 803 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 804 : fRuleSets(NULL) 805 , ruleSetDescriptions(NULL) 806 , numRuleSets(0) 807 , defaultRuleSet(NULL) 808 , locale(alocale) 809 , collator(NULL) 810 , decimalFormatSymbols(NULL) 811 , defaultInfinityRule(NULL) 812 , defaultNaNRule(NULL) 813 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 814 , lenient(FALSE) 815 , lenientParseRules(NULL) 816 , localizations(NULL) 817 , capitalizationInfoSet(FALSE) 818 , capitalizationForUIListMenu(FALSE) 819 , capitalizationForStandAlone(FALSE) 820 , capitalizationBrkIter(NULL) 821 { 822 if (U_FAILURE(status)) { 823 return; 824 } 825 826 const char* rules_tag = "RBNFRules"; 827 const char* fmt_tag = ""; 828 switch (tag) { 829 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 830 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 831 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 832 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 833 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 834 } 835 836 // TODO: read localization info from resource 837 LocalizationInfo* locinfo = NULL; 838 839 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 840 if (U_SUCCESS(status)) { 841 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 842 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 843 844 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 845 if (U_FAILURE(status)) { 846 ures_close(nfrb); 847 } 848 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 849 if (U_FAILURE(status)) { 850 ures_close(rbnfRules); 851 ures_close(nfrb); 852 return; 853 } 854 855 UnicodeString desc; 856 while (ures_hasNext(ruleSets)) { 857 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); 858 } 859 UParseError perror; 860 861 init(desc, locinfo, perror, status); 862 863 ures_close(ruleSets); 864 ures_close(rbnfRules); 865 } 866 ures_close(nfrb); 867 } 868 869 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 870 : NumberFormat(rhs) 871 , fRuleSets(NULL) 872 , ruleSetDescriptions(NULL) 873 , numRuleSets(0) 874 , defaultRuleSet(NULL) 875 , locale(rhs.locale) 876 , collator(NULL) 877 , decimalFormatSymbols(NULL) 878 , defaultInfinityRule(NULL) 879 , defaultNaNRule(NULL) 880 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 881 , lenient(FALSE) 882 , lenientParseRules(NULL) 883 , localizations(NULL) 884 , capitalizationInfoSet(FALSE) 885 , capitalizationForUIListMenu(FALSE) 886 , capitalizationForStandAlone(FALSE) 887 , capitalizationBrkIter(NULL) 888 { 889 this->operator=(rhs); 890 } 891 892 // -------- 893 894 RuleBasedNumberFormat& 895 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 896 { 897 if (this == &rhs) { 898 return *this; 899 } 900 NumberFormat::operator=(rhs); 901 UErrorCode status = U_ZERO_ERROR; 902 dispose(); 903 locale = rhs.locale; 904 lenient = rhs.lenient; 905 906 UParseError perror; 907 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); 908 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 909 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); 910 setRoundingMode(rhs.getRoundingMode()); 911 912 capitalizationInfoSet = rhs.capitalizationInfoSet; 913 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; 914 capitalizationForStandAlone = rhs.capitalizationForStandAlone; 915 #if !UCONFIG_NO_BREAK_ITERATION 916 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL; 917 #endif 918 919 return *this; 920 } 921 922 RuleBasedNumberFormat::~RuleBasedNumberFormat() 923 { 924 dispose(); 925 } 926 927 Format* 928 RuleBasedNumberFormat::clone(void) const 929 { 930 return new RuleBasedNumberFormat(*this); 931 } 932 933 UBool 934 RuleBasedNumberFormat::operator==(const Format& other) const 935 { 936 if (this == &other) { 937 return TRUE; 938 } 939 940 if (typeid(*this) == typeid(other)) { 941 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 942 // test for capitalization info equality is adequately handled 943 // by the NumberFormat test for fCapitalizationContext equality; 944 // the info here is just derived from that. 945 if (locale == rhs.locale && 946 lenient == rhs.lenient && 947 (localizations == NULL 948 ? rhs.localizations == NULL 949 : (rhs.localizations == NULL 950 ? FALSE 951 : *localizations == rhs.localizations))) { 952 953 NFRuleSet** p = fRuleSets; 954 NFRuleSet** q = rhs.fRuleSets; 955 if (p == NULL) { 956 return q == NULL; 957 } else if (q == NULL) { 958 return FALSE; 959 } 960 while (*p && *q && (**p == **q)) { 961 ++p; 962 ++q; 963 } 964 return *q == NULL && *p == NULL; 965 } 966 } 967 968 return FALSE; 969 } 970 971 UnicodeString 972 RuleBasedNumberFormat::getRules() const 973 { 974 UnicodeString result; 975 if (fRuleSets != NULL) { 976 for (NFRuleSet** p = fRuleSets; *p; ++p) { 977 (*p)->appendRules(result); 978 } 979 } 980 return result; 981 } 982 983 UnicodeString 984 RuleBasedNumberFormat::getRuleSetName(int32_t index) const 985 { 986 if (localizations) { 987 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 988 return string; 989 } 990 else if (fRuleSets) { 991 UnicodeString result; 992 for (NFRuleSet** p = fRuleSets; *p; ++p) { 993 NFRuleSet* rs = *p; 994 if (rs->isPublic()) { 995 if (--index == -1) { 996 rs->getName(result); 997 return result; 998 } 999 } 1000 } 1001 } 1002 UnicodeString empty; 1003 return empty; 1004 } 1005 1006 int32_t 1007 RuleBasedNumberFormat::getNumberOfRuleSetNames() const 1008 { 1009 int32_t result = 0; 1010 if (localizations) { 1011 result = localizations->getNumberOfRuleSets(); 1012 } 1013 else if (fRuleSets) { 1014 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1015 if ((**p).isPublic()) { 1016 ++result; 1017 } 1018 } 1019 } 1020 return result; 1021 } 1022 1023 int32_t 1024 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 1025 if (localizations) { 1026 return localizations->getNumberOfDisplayLocales(); 1027 } 1028 return 0; 1029 } 1030 1031 Locale 1032 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 1033 if (U_FAILURE(status)) { 1034 return Locale(""); 1035 } 1036 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 1037 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 1038 char buffer[64]; 1039 int32_t cap = name.length() + 1; 1040 char* bp = buffer; 1041 if (cap > 64) { 1042 bp = (char *)uprv_malloc(cap); 1043 if (bp == NULL) { 1044 status = U_MEMORY_ALLOCATION_ERROR; 1045 return Locale(""); 1046 } 1047 } 1048 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 1049 Locale retLocale(bp); 1050 if (bp != buffer) { 1051 uprv_free(bp); 1052 } 1053 return retLocale; 1054 } 1055 status = U_ILLEGAL_ARGUMENT_ERROR; 1056 Locale retLocale; 1057 return retLocale; 1058 } 1059 1060 UnicodeString 1061 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 1062 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 1063 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 1064 int32_t len = localeName.length(); 1065 UChar* localeStr = localeName.getBuffer(len + 1); 1066 while (len >= 0) { 1067 localeStr[len] = 0; 1068 int32_t ix = localizations->indexForLocale(localeStr); 1069 if (ix >= 0) { 1070 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 1071 return name; 1072 } 1073 1074 // trim trailing portion, skipping over ommitted sections 1075 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1076 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1077 } 1078 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 1079 return name; 1080 } 1081 UnicodeString bogus; 1082 bogus.setToBogus(); 1083 return bogus; 1084 } 1085 1086 UnicodeString 1087 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1088 if (localizations) { 1089 UnicodeString rsn(ruleSetName); 1090 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1091 return getRuleSetDisplayName(ix, localeParam); 1092 } 1093 UnicodeString bogus; 1094 bogus.setToBogus(); 1095 return bogus; 1096 } 1097 1098 NFRuleSet* 1099 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1100 { 1101 if (U_SUCCESS(status) && fRuleSets) { 1102 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1103 NFRuleSet* rs = *p; 1104 if (rs->isNamed(name)) { 1105 return rs; 1106 } 1107 } 1108 status = U_ILLEGAL_ARGUMENT_ERROR; 1109 } 1110 return NULL; 1111 } 1112 1113 UnicodeString& 1114 RuleBasedNumberFormat::format(const DecimalQuantity &number, 1115 UnicodeString &appendTo, 1116 FieldPositionIterator *posIter, 1117 UErrorCode &status) const { 1118 if (U_FAILURE(status)) { 1119 return appendTo; 1120 } 1121 DecimalQuantity copy(number); 1122 if (copy.fitsInLong()) { 1123 format(number.toLong(), appendTo, posIter, status); 1124 } 1125 else { 1126 copy.roundToMagnitude(0, number::impl::RoundingMode::UNUM_ROUND_HALFEVEN, status); 1127 if (copy.fitsInLong()) { 1128 format(number.toDouble(), appendTo, posIter, status); 1129 } 1130 else { 1131 // We're outside of our normal range that this framework can handle. 1132 // The DecimalFormat will provide more accurate results. 1133 1134 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1135 LocalPointer<NumberFormat> decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status); 1136 if (decimalFormat.isNull()) { 1137 return appendTo; 1138 } 1139 Formattable f; 1140 LocalPointer<DecimalQuantity> decimalQuantity(new DecimalQuantity(number), status); 1141 if (decimalQuantity.isNull()) { 1142 return appendTo; 1143 } 1144 f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity. 1145 decimalFormat->format(f, appendTo, posIter, status); 1146 } 1147 } 1148 return appendTo; 1149 } 1150 1151 1152 UnicodeString& 1153 RuleBasedNumberFormat::format(const DecimalQuantity &number, 1154 UnicodeString& appendTo, 1155 FieldPosition& pos, 1156 UErrorCode &status) const { 1157 if (U_FAILURE(status)) { 1158 return appendTo; 1159 } 1160 DecimalQuantity copy(number); 1161 if (copy.fitsInLong()) { 1162 format(number.toLong(), appendTo, pos, status); 1163 } 1164 else { 1165 copy.roundToMagnitude(0, number::impl::RoundingMode::UNUM_ROUND_HALFEVEN, status); 1166 if (copy.fitsInLong()) { 1167 format(number.toDouble(), appendTo, pos, status); 1168 } 1169 else { 1170 // We're outside of our normal range that this framework can handle. 1171 // The DecimalFormat will provide more accurate results. 1172 1173 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1174 LocalPointer<NumberFormat> decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status); 1175 if (decimalFormat.isNull()) { 1176 return appendTo; 1177 } 1178 Formattable f; 1179 LocalPointer<DecimalQuantity> decimalQuantity(new DecimalQuantity(number), status); 1180 if (decimalQuantity.isNull()) { 1181 return appendTo; 1182 } 1183 f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity. 1184 decimalFormat->format(f, appendTo, pos, status); 1185 } 1186 } 1187 return appendTo; 1188 } 1189 1190 UnicodeString& 1191 RuleBasedNumberFormat::format(int32_t number, 1192 UnicodeString& toAppendTo, 1193 FieldPosition& pos) const 1194 { 1195 return format((int64_t)number, toAppendTo, pos); 1196 } 1197 1198 1199 UnicodeString& 1200 RuleBasedNumberFormat::format(int64_t number, 1201 UnicodeString& toAppendTo, 1202 FieldPosition& /* pos */) const 1203 { 1204 if (defaultRuleSet) { 1205 UErrorCode status = U_ZERO_ERROR; 1206 format(number, defaultRuleSet, toAppendTo, status); 1207 } 1208 return toAppendTo; 1209 } 1210 1211 1212 UnicodeString& 1213 RuleBasedNumberFormat::format(double number, 1214 UnicodeString& toAppendTo, 1215 FieldPosition& /* pos */) const 1216 { 1217 UErrorCode status = U_ZERO_ERROR; 1218 if (defaultRuleSet) { 1219 format(number, *defaultRuleSet, toAppendTo, status); 1220 } 1221 return toAppendTo; 1222 } 1223 1224 1225 UnicodeString& 1226 RuleBasedNumberFormat::format(int32_t number, 1227 const UnicodeString& ruleSetName, 1228 UnicodeString& toAppendTo, 1229 FieldPosition& pos, 1230 UErrorCode& status) const 1231 { 1232 return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1233 } 1234 1235 1236 UnicodeString& 1237 RuleBasedNumberFormat::format(int64_t number, 1238 const UnicodeString& ruleSetName, 1239 UnicodeString& toAppendTo, 1240 FieldPosition& /* pos */, 1241 UErrorCode& status) const 1242 { 1243 if (U_SUCCESS(status)) { 1244 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1245 // throw new IllegalArgumentException("Can't use internal rule set"); 1246 status = U_ILLEGAL_ARGUMENT_ERROR; 1247 } else { 1248 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1249 if (rs) { 1250 format(number, rs, toAppendTo, status); 1251 } 1252 } 1253 } 1254 return toAppendTo; 1255 } 1256 1257 1258 UnicodeString& 1259 RuleBasedNumberFormat::format(double number, 1260 const UnicodeString& ruleSetName, 1261 UnicodeString& toAppendTo, 1262 FieldPosition& /* pos */, 1263 UErrorCode& status) const 1264 { 1265 if (U_SUCCESS(status)) { 1266 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1267 // throw new IllegalArgumentException("Can't use internal rule set"); 1268 status = U_ILLEGAL_ARGUMENT_ERROR; 1269 } else { 1270 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1271 if (rs) { 1272 format(number, *rs, toAppendTo, status); 1273 } 1274 } 1275 } 1276 return toAppendTo; 1277 } 1278 1279 void 1280 RuleBasedNumberFormat::format(double number, 1281 NFRuleSet& rs, 1282 UnicodeString& toAppendTo, 1283 UErrorCode& status) const 1284 { 1285 int32_t startPos = toAppendTo.length(); 1286 if (getRoundingMode() != DecimalFormat::ERoundingMode::kRoundUnnecessary && !uprv_isNaN(number) && !uprv_isInfinite(number)) { 1287 DecimalQuantity digitList; 1288 digitList.setToDouble(number); 1289 digitList.roundToMagnitude( 1290 -getMaximumFractionDigits(), 1291 static_cast<UNumberFormatRoundingMode>(getRoundingMode()), 1292 status); 1293 number = digitList.toDouble(); 1294 } 1295 rs.format(number, toAppendTo, toAppendTo.length(), 0, status); 1296 adjustForCapitalizationContext(startPos, toAppendTo, status); 1297 } 1298 1299 /** 1300 * Bottleneck through which all the public format() methods 1301 * that take a long pass. By the time we get here, we know 1302 * which rule set we're using to do the formatting. 1303 * @param number The number to format 1304 * @param ruleSet The rule set to use to format the number 1305 * @return The text that resulted from formatting the number 1306 */ 1307 UnicodeString& 1308 RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const 1309 { 1310 // all API format() routines that take a double vector through 1311 // here. We have these two identical functions-- one taking a 1312 // double and one taking a long-- the couple digits of precision 1313 // that long has but double doesn't (both types are 8 bytes long, 1314 // but double has to borrow some of the mantissa bits to hold 1315 // the exponent). 1316 // Create an empty string buffer where the result will 1317 // be built, and pass it to the rule set (along with an insertion 1318 // position of 0 and the number being formatted) to the rule set 1319 // for formatting 1320 1321 if (U_SUCCESS(status)) { 1322 if (number == U_INT64_MIN) { 1323 // We can't handle this value right now. Provide an accurate default value. 1324 1325 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1326 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); 1327 if (decimalFormat == nullptr) { 1328 return toAppendTo; 1329 } 1330 Formattable f; 1331 FieldPosition pos(FieldPosition::DONT_CARE); 1332 DecimalQuantity *decimalQuantity = new DecimalQuantity(); 1333 if (decimalQuantity == nullptr) { 1334 status = U_MEMORY_ALLOCATION_ERROR; 1335 delete decimalFormat; 1336 return toAppendTo; 1337 } 1338 decimalQuantity->setToLong(number); 1339 f.adoptDecimalQuantity(decimalQuantity); // f now owns decimalQuantity. 1340 decimalFormat->format(f, toAppendTo, pos, status); 1341 delete decimalFormat; 1342 } 1343 else { 1344 int32_t startPos = toAppendTo.length(); 1345 ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); 1346 adjustForCapitalizationContext(startPos, toAppendTo, status); 1347 } 1348 } 1349 return toAppendTo; 1350 } 1351 1352 UnicodeString& 1353 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, 1354 UnicodeString& currentResult, 1355 UErrorCode& status) const 1356 { 1357 #if !UCONFIG_NO_BREAK_ITERATION 1358 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1359 if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) { 1360 // capitalize currentResult according to context 1361 UChar32 ch = currentResult.char32At(0); 1362 if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL && 1363 ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1364 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1365 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1366 // titlecase first word of currentResult, here use sentence iterator unlike current implementations 1367 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format 1368 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 1369 } 1370 } 1371 #endif 1372 return currentResult; 1373 } 1374 1375 1376 void 1377 RuleBasedNumberFormat::parse(const UnicodeString& text, 1378 Formattable& result, 1379 ParsePosition& parsePosition) const 1380 { 1381 if (!fRuleSets) { 1382 parsePosition.setErrorIndex(0); 1383 return; 1384 } 1385 1386 UnicodeString workingText(text, parsePosition.getIndex()); 1387 ParsePosition workingPos(0); 1388 1389 ParsePosition high_pp(0); 1390 Formattable high_result; 1391 1392 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1393 NFRuleSet *rp = *p; 1394 if (rp->isPublic() && rp->isParseable()) { 1395 ParsePosition working_pp(0); 1396 Formattable working_result; 1397 1398 rp->parse(workingText, working_pp, kMaxDouble, 0, working_result); 1399 if (working_pp.getIndex() > high_pp.getIndex()) { 1400 high_pp = working_pp; 1401 high_result = working_result; 1402 1403 if (high_pp.getIndex() == workingText.length()) { 1404 break; 1405 } 1406 } 1407 } 1408 } 1409 1410 int32_t startIndex = parsePosition.getIndex(); 1411 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1412 if (high_pp.getIndex() > 0) { 1413 parsePosition.setErrorIndex(-1); 1414 } else { 1415 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1416 parsePosition.setErrorIndex(startIndex + errorIndex); 1417 } 1418 result = high_result; 1419 if (result.getType() == Formattable::kDouble) { 1420 double d = result.getDouble(); 1421 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) { 1422 // Note: casting a double to an int when the double is too large or small 1423 // to fit the destination is undefined behavior. The explicit range checks, 1424 // above, are required. Just casting and checking the result value is undefined. 1425 result.setLong(static_cast<int32_t>(d)); 1426 } 1427 } 1428 } 1429 1430 #if !UCONFIG_NO_COLLATION 1431 1432 void 1433 RuleBasedNumberFormat::setLenient(UBool enabled) 1434 { 1435 lenient = enabled; 1436 if (!enabled && collator) { 1437 delete collator; 1438 collator = NULL; 1439 } 1440 } 1441 1442 #endif 1443 1444 void 1445 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1446 if (U_SUCCESS(status)) { 1447 if (ruleSetName.isEmpty()) { 1448 if (localizations) { 1449 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1450 defaultRuleSet = findRuleSet(name, status); 1451 } else { 1452 initDefaultRuleSet(); 1453 } 1454 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1455 status = U_ILLEGAL_ARGUMENT_ERROR; 1456 } else { 1457 NFRuleSet* result = findRuleSet(ruleSetName, status); 1458 if (result != NULL) { 1459 defaultRuleSet = result; 1460 } 1461 } 1462 } 1463 } 1464 1465 UnicodeString 1466 RuleBasedNumberFormat::getDefaultRuleSetName() const { 1467 UnicodeString result; 1468 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1469 defaultRuleSet->getName(result); 1470 } else { 1471 result.setToBogus(); 1472 } 1473 return result; 1474 } 1475 1476 void 1477 RuleBasedNumberFormat::initDefaultRuleSet() 1478 { 1479 defaultRuleSet = NULL; 1480 if (!fRuleSets) { 1481 return; 1482 } 1483 1484 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering")); 1485 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal")); 1486 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration")); 1487 1488 NFRuleSet**p = &fRuleSets[0]; 1489 while (*p) { 1490 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1491 defaultRuleSet = *p; 1492 return; 1493 } else { 1494 ++p; 1495 } 1496 } 1497 1498 defaultRuleSet = *--p; 1499 if (!defaultRuleSet->isPublic()) { 1500 while (p != fRuleSets) { 1501 if ((*--p)->isPublic()) { 1502 defaultRuleSet = *p; 1503 break; 1504 } 1505 } 1506 } 1507 } 1508 1509 1510 void 1511 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1512 UParseError& pErr, UErrorCode& status) 1513 { 1514 // TODO: implement UParseError 1515 uprv_memset(&pErr, 0, sizeof(UParseError)); 1516 // Note: this can leave ruleSets == NULL, so remaining code should check 1517 if (U_FAILURE(status)) { 1518 return; 1519 } 1520 1521 initializeDecimalFormatSymbols(status); 1522 initializeDefaultInfinityRule(status); 1523 initializeDefaultNaNRule(status); 1524 if (U_FAILURE(status)) { 1525 return; 1526 } 1527 1528 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1529 1530 UnicodeString description(rules); 1531 if (!description.length()) { 1532 status = U_MEMORY_ALLOCATION_ERROR; 1533 return; 1534 } 1535 1536 // start by stripping the trailing whitespace from all the rules 1537 // (this is all the whitespace follwing each semicolon in the 1538 // description). This allows us to look for rule-set boundaries 1539 // by searching for ";%" without having to worry about whitespace 1540 // between the ; and the % 1541 stripWhitespace(description); 1542 1543 // check to see if there's a set of lenient-parse rules. If there 1544 // is, pull them out into our temporary holding place for them, 1545 // and delete them from the description before the real desciption- 1546 // parsing code sees them 1547 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1548 if (lp != -1) { 1549 // we've got to make sure we're not in the middle of a rule 1550 // (where "%%lenient-parse" would actually get treated as 1551 // rule text) 1552 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1553 // locate the beginning and end of the actual collation 1554 // rules (there may be whitespace between the name and 1555 // the first token in the description) 1556 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1557 1558 if (lpEnd == -1) { 1559 lpEnd = description.length() - 1; 1560 } 1561 int lpStart = lp + u_strlen(gLenientParse); 1562 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1563 ++lpStart; 1564 } 1565 1566 // copy out the lenient-parse rules and delete them 1567 // from the description 1568 lenientParseRules = new UnicodeString(); 1569 /* test for NULL */ 1570 if (lenientParseRules == nullptr) { 1571 status = U_MEMORY_ALLOCATION_ERROR; 1572 return; 1573 } 1574 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1575 1576 description.remove(lp, lpEnd + 1 - lp); 1577 } 1578 } 1579 1580 // pre-flight parsing the description and count the number of 1581 // rule sets (";%" marks the end of one rule set and the beginning 1582 // of the next) 1583 numRuleSets = 0; 1584 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1585 ++numRuleSets; 1586 ++p; 1587 } 1588 ++numRuleSets; 1589 1590 // our rule list is an array of the appropriate size 1591 fRuleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1592 /* test for NULL */ 1593 if (fRuleSets == 0) { 1594 status = U_MEMORY_ALLOCATION_ERROR; 1595 return; 1596 } 1597 1598 for (int i = 0; i <= numRuleSets; ++i) { 1599 fRuleSets[i] = NULL; 1600 } 1601 1602 // divide up the descriptions into individual rule-set descriptions 1603 // and store them in a temporary array. At each step, we also 1604 // new up a rule set, but all this does is initialize its name 1605 // and remove it from its description. We can't actually parse 1606 // the rest of the descriptions and finish initializing everything 1607 // because we have to know the names and locations of all the rule 1608 // sets before we can actually set everything up 1609 if(!numRuleSets) { 1610 status = U_ILLEGAL_ARGUMENT_ERROR; 1611 return; 1612 } 1613 1614 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1615 if (ruleSetDescriptions == nullptr) { 1616 status = U_MEMORY_ALLOCATION_ERROR; 1617 return; 1618 } 1619 1620 { 1621 int curRuleSet = 0; 1622 int32_t start = 0; 1623 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1624 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1625 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1626 if (fRuleSets[curRuleSet] == nullptr) { 1627 status = U_MEMORY_ALLOCATION_ERROR; 1628 return; 1629 } 1630 ++curRuleSet; 1631 start = p + 1; 1632 } 1633 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1634 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1635 if (fRuleSets[curRuleSet] == nullptr) { 1636 status = U_MEMORY_ALLOCATION_ERROR; 1637 return; 1638 } 1639 } 1640 1641 // now we can take note of the formatter's default rule set, which 1642 // is the last public rule set in the description (it's the last 1643 // rather than the first so that a user can create a new formatter 1644 // from an existing formatter and change its default behavior just 1645 // by appending more rule sets to the end) 1646 1647 // {dlf} Initialization of a fraction rule set requires the default rule 1648 // set to be known. For purposes of initialization, this is always the 1649 // last public rule set, no matter what the localization data says. 1650 initDefaultRuleSet(); 1651 1652 // finally, we can go back through the temporary descriptions 1653 // list and finish setting up the substructure (and we throw 1654 // away the temporary descriptions as we go) 1655 { 1656 for (int i = 0; i < numRuleSets; i++) { 1657 fRuleSets[i]->parseRules(ruleSetDescriptions[i], status); 1658 } 1659 } 1660 1661 // Now that the rules are initialized, the 'real' default rule 1662 // set can be adjusted by the localization data. 1663 1664 // The C code keeps the localization array as is, rather than building 1665 // a separate array of the public rule set names, so we have less work 1666 // to do here-- but we still need to check the names. 1667 1668 if (localizationInfos) { 1669 // confirm the names, if any aren't in the rules, that's an error 1670 // it is ok if the rules contain public rule sets that are not in this list 1671 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1672 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1673 NFRuleSet* rs = findRuleSet(name, status); 1674 if (rs == NULL) { 1675 break; // error 1676 } 1677 if (i == 0) { 1678 defaultRuleSet = rs; 1679 } 1680 } 1681 } else { 1682 defaultRuleSet = getDefaultRuleSet(); 1683 } 1684 originalDescription = rules; 1685 } 1686 1687 // override the NumberFormat implementation in order to 1688 // lazily initialize relevant items 1689 void 1690 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) 1691 { 1692 NumberFormat::setContext(value, status); 1693 if (U_SUCCESS(status)) { 1694 if (!capitalizationInfoSet && 1695 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { 1696 initCapitalizationContextInfo(locale); 1697 capitalizationInfoSet = TRUE; 1698 } 1699 #if !UCONFIG_NO_BREAK_ITERATION 1700 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1701 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1702 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1703 status = U_ZERO_ERROR; 1704 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); 1705 if (U_FAILURE(status)) { 1706 delete capitalizationBrkIter; 1707 capitalizationBrkIter = NULL; 1708 } 1709 } 1710 #endif 1711 } 1712 } 1713 1714 void 1715 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) 1716 { 1717 #if !UCONFIG_NO_BREAK_ITERATION 1718 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL; 1719 UErrorCode status = U_ZERO_ERROR; 1720 UResourceBundle *rb = ures_open(NULL, localeID, &status); 1721 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); 1722 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status); 1723 if (U_SUCCESS(status) && rb != NULL) { 1724 int32_t len = 0; 1725 const int32_t * intVector = ures_getIntVector(rb, &len, &status); 1726 if (U_SUCCESS(status) && intVector != NULL && len >= 2) { 1727 capitalizationForUIListMenu = static_cast<UBool>(intVector[0]); 1728 capitalizationForStandAlone = static_cast<UBool>(intVector[1]); 1729 } 1730 } 1731 ures_close(rb); 1732 #endif 1733 } 1734 1735 void 1736 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1737 { 1738 // iterate through the characters... 1739 UnicodeString result; 1740 1741 int start = 0; 1742 while (start != -1 && start < description.length()) { 1743 // seek to the first non-whitespace character... 1744 while (start < description.length() 1745 && PatternProps::isWhiteSpace(description.charAt(start))) { 1746 ++start; 1747 } 1748 1749 // locate the next semicolon in the text and copy the text from 1750 // our current position up to that semicolon into the result 1751 int32_t p = description.indexOf(gSemiColon, start); 1752 if (p == -1) { 1753 // or if we don't find a semicolon, just copy the rest of 1754 // the string into the result 1755 result.append(description, start, description.length() - start); 1756 start = -1; 1757 } 1758 else if (p < description.length()) { 1759 result.append(description, start, p + 1 - start); 1760 start = p + 1; 1761 } 1762 1763 // when we get here, we've seeked off the end of the string, and 1764 // we terminate the loop (we continue until *start* is -1 rather 1765 // than until *p* is -1, because otherwise we'd miss the last 1766 // rule in the description) 1767 else { 1768 start = -1; 1769 } 1770 } 1771 1772 description.setTo(result); 1773 } 1774 1775 1776 void 1777 RuleBasedNumberFormat::dispose() 1778 { 1779 if (fRuleSets) { 1780 for (NFRuleSet** p = fRuleSets; *p; ++p) { 1781 delete *p; 1782 } 1783 uprv_free(fRuleSets); 1784 fRuleSets = NULL; 1785 } 1786 1787 if (ruleSetDescriptions) { 1788 delete [] ruleSetDescriptions; 1789 ruleSetDescriptions = NULL; 1790 } 1791 1792 #if !UCONFIG_NO_COLLATION 1793 delete collator; 1794 #endif 1795 collator = NULL; 1796 1797 delete decimalFormatSymbols; 1798 decimalFormatSymbols = NULL; 1799 1800 delete defaultInfinityRule; 1801 defaultInfinityRule = NULL; 1802 1803 delete defaultNaNRule; 1804 defaultNaNRule = NULL; 1805 1806 delete lenientParseRules; 1807 lenientParseRules = NULL; 1808 1809 #if !UCONFIG_NO_BREAK_ITERATION 1810 delete capitalizationBrkIter; 1811 capitalizationBrkIter = NULL; 1812 #endif 1813 1814 if (localizations) { 1815 localizations = localizations->unref(); 1816 } 1817 } 1818 1819 1820 //----------------------------------------------------------------------- 1821 // package-internal API 1822 //----------------------------------------------------------------------- 1823 1824 /** 1825 * Returns the collator to use for lenient parsing. The collator is lazily created: 1826 * this function creates it the first time it's called. 1827 * @return The collator to use for lenient parsing, or null if lenient parsing 1828 * is turned off. 1829 */ 1830 const RuleBasedCollator* 1831 RuleBasedNumberFormat::getCollator() const 1832 { 1833 #if !UCONFIG_NO_COLLATION 1834 if (!fRuleSets) { 1835 return NULL; 1836 } 1837 1838 // lazy-evaluate the collator 1839 if (collator == NULL && lenient) { 1840 // create a default collator based on the formatter's locale, 1841 // then pull out that collator's rules, append any additional 1842 // rules specified in the description, and create a _new_ 1843 // collator based on the combination of those rules 1844 1845 UErrorCode status = U_ZERO_ERROR; 1846 1847 Collator* temp = Collator::createInstance(locale, status); 1848 RuleBasedCollator* newCollator; 1849 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { 1850 if (lenientParseRules) { 1851 UnicodeString rules(newCollator->getRules()); 1852 rules.append(*lenientParseRules); 1853 1854 newCollator = new RuleBasedCollator(rules, status); 1855 // Exit if newCollator could not be created. 1856 if (newCollator == NULL) { 1857 return NULL; 1858 } 1859 } else { 1860 temp = NULL; 1861 } 1862 if (U_SUCCESS(status)) { 1863 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1864 // cast away const 1865 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1866 } else { 1867 delete newCollator; 1868 } 1869 } 1870 delete temp; 1871 } 1872 #endif 1873 1874 // if lenient-parse mode is off, this will be null 1875 // (see setLenientParseMode()) 1876 return collator; 1877 } 1878 1879 1880 DecimalFormatSymbols* 1881 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status) 1882 { 1883 // lazy-evaluate the DecimalFormatSymbols object. This object 1884 // is shared by all DecimalFormat instances belonging to this 1885 // formatter 1886 if (decimalFormatSymbols == nullptr) { 1887 LocalPointer<DecimalFormatSymbols> temp(new DecimalFormatSymbols(locale, status), status); 1888 if (U_SUCCESS(status)) { 1889 decimalFormatSymbols = temp.orphan(); 1890 } 1891 } 1892 return decimalFormatSymbols; 1893 } 1894 1895 /** 1896 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1897 * instances owned by this formatter. 1898 */ 1899 const DecimalFormatSymbols* 1900 RuleBasedNumberFormat::getDecimalFormatSymbols() const 1901 { 1902 return decimalFormatSymbols; 1903 } 1904 1905 NFRule* 1906 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status) 1907 { 1908 if (U_FAILURE(status)) { 1909 return nullptr; 1910 } 1911 if (defaultInfinityRule == NULL) { 1912 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: ")); 1913 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol)); 1914 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status); 1915 if (U_SUCCESS(status)) { 1916 defaultInfinityRule = temp.orphan(); 1917 } 1918 } 1919 return defaultInfinityRule; 1920 } 1921 1922 const NFRule* 1923 RuleBasedNumberFormat::getDefaultInfinityRule() const 1924 { 1925 return defaultInfinityRule; 1926 } 1927 1928 NFRule* 1929 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status) 1930 { 1931 if (U_FAILURE(status)) { 1932 return nullptr; 1933 } 1934 if (defaultNaNRule == nullptr) { 1935 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: ")); 1936 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol)); 1937 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status); 1938 if (U_SUCCESS(status)) { 1939 defaultNaNRule = temp.orphan(); 1940 } 1941 } 1942 return defaultNaNRule; 1943 } 1944 1945 const NFRule* 1946 RuleBasedNumberFormat::getDefaultNaNRule() const 1947 { 1948 return defaultNaNRule; 1949 } 1950 1951 // De-owning the current localized symbols and adopt the new symbols. 1952 void 1953 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1954 { 1955 if (symbolsToAdopt == NULL) { 1956 return; // do not allow caller to set decimalFormatSymbols to NULL 1957 } 1958 1959 if (decimalFormatSymbols != NULL) { 1960 delete decimalFormatSymbols; 1961 } 1962 1963 decimalFormatSymbols = symbolsToAdopt; 1964 1965 { 1966 // Apply the new decimalFormatSymbols by reparsing the rulesets 1967 UErrorCode status = U_ZERO_ERROR; 1968 1969 delete defaultInfinityRule; 1970 defaultInfinityRule = NULL; 1971 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols 1972 1973 delete defaultNaNRule; 1974 defaultNaNRule = NULL; 1975 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols 1976 1977 if (fRuleSets) { 1978 for (int32_t i = 0; i < numRuleSets; i++) { 1979 fRuleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status); 1980 } 1981 } 1982 } 1983 } 1984 1985 // Setting the symbols is equivalent to adopting a newly created localized symbols. 1986 void 1987 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1988 { 1989 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1990 } 1991 1992 PluralFormat * 1993 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, 1994 const UnicodeString &pattern, 1995 UErrorCode& status) const 1996 { 1997 auto *pf = new PluralFormat(locale, pluralType, pattern, status); 1998 if (pf == nullptr) { 1999 status = U_MEMORY_ALLOCATION_ERROR; 2000 } 2001 return pf; 2002 } 2003 2004 /** 2005 * Get the rounding mode. 2006 * @return A rounding mode 2007 */ 2008 DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const { 2009 return fRoundingMode; 2010 } 2011 2012 /** 2013 * Set the rounding mode. This has no effect unless the rounding 2014 * increment is greater than zero. 2015 * @param roundingMode A rounding mode 2016 */ 2017 void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) { 2018 fRoundingMode = roundingMode; 2019 } 2020 2021 U_NAMESPACE_END 2022 2023 /* U_HAVE_RBNF */ 2024 #endif 2025