1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2014, International Business Machines Corporation 4 * and others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 #include "unicode/utypes.h" 9 #include "utypeinfo.h" // for 'typeid' to work 10 11 #include "unicode/rbnf.h" 12 13 #if U_HAVE_RBNF 14 15 #include "unicode/normlzr.h" 16 #include "unicode/plurfmt.h" 17 #include "unicode/tblcoll.h" 18 #include "unicode/uchar.h" 19 #include "unicode/ucol.h" 20 #include "unicode/uloc.h" 21 #include "unicode/unum.h" 22 #include "unicode/ures.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utf16.h" 25 #include "unicode/udata.h" 26 #include "unicode/udisplaycontext.h" 27 #include "unicode/brkiter.h" 28 #include "nfrs.h" 29 30 #include "cmemory.h" 31 #include "cstring.h" 32 #include "patternprops.h" 33 #include "uresimp.h" 34 35 // debugging 36 // #define RBNF_DEBUG 37 38 #ifdef RBNF_DEBUG 39 #include "stdio.h" 40 #endif 41 42 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 43 44 static const UChar gPercentPercent[] = 45 { 46 0x25, 0x25, 0 47 }; /* "%%" */ 48 49 // All urbnf objects are created through openRules, so we init all of the 50 // Unicode string constants required by rbnf, nfrs, or nfr here. 51 static const UChar gLenientParse[] = 52 { 53 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 54 }; /* "%%lenient-parse:" */ 55 static const UChar gSemiColon = 0x003B; 56 static const UChar gSemiPercent[] = 57 { 58 0x3B, 0x25, 0 59 }; /* ";%" */ 60 61 #define kSomeNumberOfBitsDiv2 22 62 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 63 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 64 65 U_NAMESPACE_BEGIN 66 67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 68 69 /* 70 This is a utility class. It does not use ICU's RTTI. 71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 72 Please make sure that intltest passes on Windows in Release mode, 73 since the string pooling per compilation unit will mess up how RTTI works. 74 The RTTI code was also removed due to lack of code coverage. 75 */ 76 class LocalizationInfo : public UMemory { 77 protected: 78 virtual ~LocalizationInfo(); 79 uint32_t refcount; 80 81 public: 82 LocalizationInfo() : refcount(0) {} 83 84 LocalizationInfo* ref(void) { 85 ++refcount; 86 return this; 87 } 88 89 LocalizationInfo* unref(void) { 90 if (refcount && --refcount == 0) { 91 delete this; 92 } 93 return NULL; 94 } 95 96 virtual UBool operator==(const LocalizationInfo* rhs) const; 97 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 98 99 virtual int32_t getNumberOfRuleSets(void) const = 0; 100 virtual const UChar* getRuleSetName(int32_t index) const = 0; 101 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 102 virtual const UChar* getLocaleName(int32_t index) const = 0; 103 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 104 105 virtual int32_t indexForLocale(const UChar* locale) const; 106 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 107 108 // virtual UClassID getDynamicClassID() const = 0; 109 // static UClassID getStaticClassID(void); 110 }; 111 112 LocalizationInfo::~LocalizationInfo() {} 113 114 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 115 116 // if both strings are NULL, this returns TRUE 117 static UBool 118 streq(const UChar* lhs, const UChar* rhs) { 119 if (rhs == lhs) { 120 return TRUE; 121 } 122 if (lhs && rhs) { 123 return u_strcmp(lhs, rhs) == 0; 124 } 125 return FALSE; 126 } 127 128 UBool 129 LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 130 if (rhs) { 131 if (this == rhs) { 132 return TRUE; 133 } 134 135 int32_t rsc = getNumberOfRuleSets(); 136 if (rsc == rhs->getNumberOfRuleSets()) { 137 for (int i = 0; i < rsc; ++i) { 138 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 139 return FALSE; 140 } 141 } 142 int32_t dlc = getNumberOfDisplayLocales(); 143 if (dlc == rhs->getNumberOfDisplayLocales()) { 144 for (int i = 0; i < dlc; ++i) { 145 const UChar* locale = getLocaleName(i); 146 int32_t ix = rhs->indexForLocale(locale); 147 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 148 if (!streq(locale, rhs->getLocaleName(ix))) { 149 return FALSE; 150 } 151 for (int j = 0; j < rsc; ++j) { 152 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 153 return FALSE; 154 } 155 } 156 } 157 return TRUE; 158 } 159 } 160 } 161 return FALSE; 162 } 163 164 int32_t 165 LocalizationInfo::indexForLocale(const UChar* locale) const { 166 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 167 if (streq(locale, getLocaleName(i))) { 168 return i; 169 } 170 } 171 return -1; 172 } 173 174 int32_t 175 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 176 if (ruleset) { 177 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 178 if (streq(ruleset, getRuleSetName(i))) { 179 return i; 180 } 181 } 182 } 183 return -1; 184 } 185 186 187 typedef void (*Fn_Deleter)(void*); 188 189 class VArray { 190 void** buf; 191 int32_t cap; 192 int32_t size; 193 Fn_Deleter deleter; 194 public: 195 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 196 197 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 198 199 ~VArray() { 200 if (deleter) { 201 for (int i = 0; i < size; ++i) { 202 (*deleter)(buf[i]); 203 } 204 } 205 uprv_free(buf); 206 } 207 208 int32_t length() { 209 return size; 210 } 211 212 void add(void* elem, UErrorCode& status) { 213 if (U_SUCCESS(status)) { 214 if (size == cap) { 215 if (cap == 0) { 216 cap = 1; 217 } else if (cap < 256) { 218 cap *= 2; 219 } else { 220 cap += 256; 221 } 222 if (buf == NULL) { 223 buf = (void**)uprv_malloc(cap * sizeof(void*)); 224 } else { 225 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 226 } 227 if (buf == NULL) { 228 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 229 status = U_MEMORY_ALLOCATION_ERROR; 230 return; 231 } 232 void* start = &buf[size]; 233 size_t count = (cap - size) * sizeof(void*); 234 uprv_memset(start, 0, count); // fill with nulls, just because 235 } 236 buf[size++] = elem; 237 } 238 } 239 240 void** release(void) { 241 void** result = buf; 242 buf = NULL; 243 cap = 0; 244 size = 0; 245 return result; 246 } 247 }; 248 249 class LocDataParser; 250 251 class StringLocalizationInfo : public LocalizationInfo { 252 UChar* info; 253 UChar*** data; 254 int32_t numRuleSets; 255 int32_t numLocales; 256 257 friend class LocDataParser; 258 259 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 260 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 261 { 262 } 263 264 public: 265 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 266 267 virtual ~StringLocalizationInfo(); 268 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 269 virtual const UChar* getRuleSetName(int32_t index) const; 270 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 271 virtual const UChar* getLocaleName(int32_t index) const; 272 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 273 274 // virtual UClassID getDynamicClassID() const; 275 // static UClassID getStaticClassID(void); 276 277 private: 278 void init(UErrorCode& status) const; 279 }; 280 281 282 enum { 283 OPEN_ANGLE = 0x003c, /* '<' */ 284 CLOSE_ANGLE = 0x003e, /* '>' */ 285 COMMA = 0x002c, 286 TICK = 0x0027, 287 QUOTE = 0x0022, 288 SPACE = 0x0020 289 }; 290 291 /** 292 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 293 */ 294 class LocDataParser { 295 UChar* data; 296 const UChar* e; 297 UChar* p; 298 UChar ch; 299 UParseError& pe; 300 UErrorCode& ec; 301 302 public: 303 LocDataParser(UParseError& parseError, UErrorCode& status) 304 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 305 ~LocDataParser() {} 306 307 /* 308 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 309 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 310 */ 311 StringLocalizationInfo* parse(UChar* data, int32_t len); 312 313 private: 314 315 void inc(void) { ++p; ch = 0xffff; } 316 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } 317 UBool check(UChar c) { return p < e && (ch == c || *p == c); } 318 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} 319 UBool inList(UChar c, const UChar* list) const { 320 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; 321 while (*list && *list != c) ++list; return *list == c; 322 } 323 void parseError(const char* msg); 324 325 StringLocalizationInfo* doParse(void); 326 327 UChar** nextArray(int32_t& requiredLength); 328 UChar* nextString(void); 329 }; 330 331 #ifdef RBNF_DEBUG 332 #define ERROR(msg) parseError(msg); return NULL; 333 #define EXPLANATION_ARG explanationArg 334 #else 335 #define ERROR(msg) parseError(NULL); return NULL; 336 #define EXPLANATION_ARG 337 #endif 338 339 340 static const UChar DQUOTE_STOPLIST[] = { 341 QUOTE, 0 342 }; 343 344 static const UChar SQUOTE_STOPLIST[] = { 345 TICK, 0 346 }; 347 348 static const UChar NOQUOTE_STOPLIST[] = { 349 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 350 }; 351 352 static void 353 DeleteFn(void* p) { 354 uprv_free(p); 355 } 356 357 StringLocalizationInfo* 358 LocDataParser::parse(UChar* _data, int32_t len) { 359 if (U_FAILURE(ec)) { 360 if (_data) uprv_free(_data); 361 return NULL; 362 } 363 364 pe.line = 0; 365 pe.offset = -1; 366 pe.postContext[0] = 0; 367 pe.preContext[0] = 0; 368 369 if (_data == NULL) { 370 ec = U_ILLEGAL_ARGUMENT_ERROR; 371 return NULL; 372 } 373 374 if (len <= 0) { 375 ec = U_ILLEGAL_ARGUMENT_ERROR; 376 uprv_free(_data); 377 return NULL; 378 } 379 380 data = _data; 381 e = data + len; 382 p = _data; 383 ch = 0xffff; 384 385 return doParse(); 386 } 387 388 389 StringLocalizationInfo* 390 LocDataParser::doParse(void) { 391 skipWhitespace(); 392 if (!checkInc(OPEN_ANGLE)) { 393 ERROR("Missing open angle"); 394 } else { 395 VArray array(DeleteFn); 396 UBool mightHaveNext = TRUE; 397 int32_t requiredLength = -1; 398 while (mightHaveNext) { 399 mightHaveNext = FALSE; 400 UChar** elem = nextArray(requiredLength); 401 skipWhitespace(); 402 UBool haveComma = check(COMMA); 403 if (elem) { 404 array.add(elem, ec); 405 if (haveComma) { 406 inc(); 407 mightHaveNext = TRUE; 408 } 409 } else if (haveComma) { 410 ERROR("Unexpected character"); 411 } 412 } 413 414 skipWhitespace(); 415 if (!checkInc(CLOSE_ANGLE)) { 416 if (check(OPEN_ANGLE)) { 417 ERROR("Missing comma in outer array"); 418 } else { 419 ERROR("Missing close angle bracket in outer array"); 420 } 421 } 422 423 skipWhitespace(); 424 if (p != e) { 425 ERROR("Extra text after close of localization data"); 426 } 427 428 array.add(NULL, ec); 429 if (U_SUCCESS(ec)) { 430 int32_t numLocs = array.length() - 2; // subtract first, NULL 431 UChar*** result = (UChar***)array.release(); 432 433 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 434 } 435 } 436 437 ERROR("Unknown error"); 438 } 439 440 UChar** 441 LocDataParser::nextArray(int32_t& requiredLength) { 442 if (U_FAILURE(ec)) { 443 return NULL; 444 } 445 446 skipWhitespace(); 447 if (!checkInc(OPEN_ANGLE)) { 448 ERROR("Missing open angle"); 449 } 450 451 VArray array; 452 UBool mightHaveNext = TRUE; 453 while (mightHaveNext) { 454 mightHaveNext = FALSE; 455 UChar* elem = nextString(); 456 skipWhitespace(); 457 UBool haveComma = check(COMMA); 458 if (elem) { 459 array.add(elem, ec); 460 if (haveComma) { 461 inc(); 462 mightHaveNext = TRUE; 463 } 464 } else if (haveComma) { 465 ERROR("Unexpected comma"); 466 } 467 } 468 skipWhitespace(); 469 if (!checkInc(CLOSE_ANGLE)) { 470 if (check(OPEN_ANGLE)) { 471 ERROR("Missing close angle bracket in inner array"); 472 } else { 473 ERROR("Missing comma in inner array"); 474 } 475 } 476 477 array.add(NULL, ec); 478 if (U_SUCCESS(ec)) { 479 if (requiredLength == -1) { 480 requiredLength = array.length() + 1; 481 } else if (array.length() != requiredLength) { 482 ec = U_ILLEGAL_ARGUMENT_ERROR; 483 ERROR("Array not of required length"); 484 } 485 486 return (UChar**)array.release(); 487 } 488 ERROR("Unknown Error"); 489 } 490 491 UChar* 492 LocDataParser::nextString() { 493 UChar* result = NULL; 494 495 skipWhitespace(); 496 if (p < e) { 497 const UChar* terminators; 498 UChar c = *p; 499 UBool haveQuote = c == QUOTE || c == TICK; 500 if (haveQuote) { 501 inc(); 502 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 503 } else { 504 terminators = NOQUOTE_STOPLIST; 505 } 506 UChar* start = p; 507 while (p < e && !inList(*p, terminators)) ++p; 508 if (p == e) { 509 ERROR("Unexpected end of data"); 510 } 511 512 UChar x = *p; 513 if (p > start) { 514 ch = x; 515 *p = 0x0; // terminate by writing to data 516 result = start; // just point into data 517 } 518 if (haveQuote) { 519 if (x != c) { 520 ERROR("Missing matching quote"); 521 } else if (p == start) { 522 ERROR("Empty string"); 523 } 524 inc(); 525 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 526 ERROR("Unexpected character in string"); 527 } 528 } 529 530 // ok for there to be no next string 531 return result; 532 } 533 534 void LocDataParser::parseError(const char* EXPLANATION_ARG) 535 { 536 if (!data) { 537 return; 538 } 539 540 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 541 if (start < data) { 542 start = data; 543 } 544 for (UChar* x = p; --x >= start;) { 545 if (!*x) { 546 start = x+1; 547 break; 548 } 549 } 550 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 551 if (limit > e) { 552 limit = e; 553 } 554 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 555 pe.preContext[p-start] = 0; 556 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 557 pe.postContext[limit-p] = 0; 558 pe.offset = (int32_t)(p - data); 559 560 #ifdef RBNF_DEBUG 561 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data); 562 563 UnicodeString msg; 564 msg.append(start, p - start); 565 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 566 msg.append(p, limit-p); 567 msg.append(UNICODE_STRING_SIMPLE("'")); 568 569 char buf[128]; 570 int32_t len = msg.extract(0, msg.length(), buf, 128); 571 if (len >= 128) { 572 buf[127] = 0; 573 } else { 574 buf[len] = 0; 575 } 576 fprintf(stderr, "%s\n", buf); 577 fflush(stderr); 578 #endif 579 580 uprv_free(data); 581 data = NULL; 582 p = NULL; 583 e = NULL; 584 585 if (U_SUCCESS(ec)) { 586 ec = U_PARSE_ERROR; 587 } 588 } 589 590 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 591 592 StringLocalizationInfo* 593 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 594 if (U_FAILURE(status)) { 595 return NULL; 596 } 597 598 int32_t len = info.length(); 599 if (len == 0) { 600 return NULL; // no error; 601 } 602 603 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 604 if (!p) { 605 status = U_MEMORY_ALLOCATION_ERROR; 606 return NULL; 607 } 608 info.extract(p, len, status); 609 if (!U_FAILURE(status)) { 610 status = U_ZERO_ERROR; // clear warning about non-termination 611 } 612 613 LocDataParser parser(perror, status); 614 return parser.parse(p, len); 615 } 616 617 StringLocalizationInfo::~StringLocalizationInfo() { 618 for (UChar*** p = (UChar***)data; *p; ++p) { 619 // remaining data is simply pointer into our unicode string data. 620 if (*p) uprv_free(*p); 621 } 622 if (data) uprv_free(data); 623 if (info) uprv_free(info); 624 } 625 626 627 const UChar* 628 StringLocalizationInfo::getRuleSetName(int32_t index) const { 629 if (index >= 0 && index < getNumberOfRuleSets()) { 630 return data[0][index]; 631 } 632 return NULL; 633 } 634 635 const UChar* 636 StringLocalizationInfo::getLocaleName(int32_t index) const { 637 if (index >= 0 && index < getNumberOfDisplayLocales()) { 638 return data[index+1][0]; 639 } 640 return NULL; 641 } 642 643 const UChar* 644 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 645 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 646 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 647 return data[localeIndex+1][ruleIndex+1]; 648 } 649 return NULL; 650 } 651 652 // ---------- 653 654 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 655 const UnicodeString& locs, 656 const Locale& alocale, UParseError& perror, UErrorCode& status) 657 : ruleSets(NULL) 658 , ruleSetDescriptions(NULL) 659 , numRuleSets(0) 660 , defaultRuleSet(NULL) 661 , locale(alocale) 662 , collator(NULL) 663 , decimalFormatSymbols(NULL) 664 , lenient(FALSE) 665 , lenientParseRules(NULL) 666 , localizations(NULL) 667 , capitalizationInfoSet(FALSE) 668 , capitalizationForUIListMenu(FALSE) 669 , capitalizationForStandAlone(FALSE) 670 , capitalizationBrkIter(NULL) 671 { 672 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 673 init(description, locinfo, perror, status); 674 } 675 676 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 677 const UnicodeString& locs, 678 UParseError& perror, UErrorCode& status) 679 : ruleSets(NULL) 680 , ruleSetDescriptions(NULL) 681 , numRuleSets(0) 682 , defaultRuleSet(NULL) 683 , locale(Locale::getDefault()) 684 , collator(NULL) 685 , decimalFormatSymbols(NULL) 686 , lenient(FALSE) 687 , lenientParseRules(NULL) 688 , localizations(NULL) 689 , capitalizationInfoSet(FALSE) 690 , capitalizationForUIListMenu(FALSE) 691 , capitalizationForStandAlone(FALSE) 692 , capitalizationBrkIter(NULL) 693 { 694 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 695 init(description, locinfo, perror, status); 696 } 697 698 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 699 LocalizationInfo* info, 700 const Locale& alocale, UParseError& perror, UErrorCode& status) 701 : ruleSets(NULL) 702 , ruleSetDescriptions(NULL) 703 , numRuleSets(0) 704 , defaultRuleSet(NULL) 705 , locale(alocale) 706 , collator(NULL) 707 , decimalFormatSymbols(NULL) 708 , lenient(FALSE) 709 , lenientParseRules(NULL) 710 , localizations(NULL) 711 , capitalizationInfoSet(FALSE) 712 , capitalizationForUIListMenu(FALSE) 713 , capitalizationForStandAlone(FALSE) 714 , capitalizationBrkIter(NULL) 715 { 716 init(description, info, perror, status); 717 } 718 719 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 720 UParseError& perror, 721 UErrorCode& status) 722 : ruleSets(NULL) 723 , ruleSetDescriptions(NULL) 724 , numRuleSets(0) 725 , defaultRuleSet(NULL) 726 , locale(Locale::getDefault()) 727 , collator(NULL) 728 , decimalFormatSymbols(NULL) 729 , lenient(FALSE) 730 , lenientParseRules(NULL) 731 , localizations(NULL) 732 , capitalizationInfoSet(FALSE) 733 , capitalizationForUIListMenu(FALSE) 734 , capitalizationForStandAlone(FALSE) 735 , capitalizationBrkIter(NULL) 736 { 737 init(description, NULL, perror, status); 738 } 739 740 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 741 const Locale& aLocale, 742 UParseError& perror, 743 UErrorCode& status) 744 : ruleSets(NULL) 745 , ruleSetDescriptions(NULL) 746 , numRuleSets(0) 747 , defaultRuleSet(NULL) 748 , locale(aLocale) 749 , collator(NULL) 750 , decimalFormatSymbols(NULL) 751 , lenient(FALSE) 752 , lenientParseRules(NULL) 753 , localizations(NULL) 754 , capitalizationInfoSet(FALSE) 755 , capitalizationForUIListMenu(FALSE) 756 , capitalizationForStandAlone(FALSE) 757 , capitalizationBrkIter(NULL) 758 { 759 init(description, NULL, perror, status); 760 } 761 762 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 763 : ruleSets(NULL) 764 , ruleSetDescriptions(NULL) 765 , numRuleSets(0) 766 , defaultRuleSet(NULL) 767 , locale(alocale) 768 , collator(NULL) 769 , decimalFormatSymbols(NULL) 770 , lenient(FALSE) 771 , lenientParseRules(NULL) 772 , localizations(NULL) 773 , capitalizationInfoSet(FALSE) 774 , capitalizationForUIListMenu(FALSE) 775 , capitalizationForStandAlone(FALSE) 776 , capitalizationBrkIter(NULL) 777 { 778 if (U_FAILURE(status)) { 779 return; 780 } 781 782 const char* rules_tag = "RBNFRules"; 783 const char* fmt_tag = ""; 784 switch (tag) { 785 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 786 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 787 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 788 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 789 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 790 } 791 792 // TODO: read localization info from resource 793 LocalizationInfo* locinfo = NULL; 794 795 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 796 if (U_SUCCESS(status)) { 797 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 798 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 799 800 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 801 if (U_FAILURE(status)) { 802 ures_close(nfrb); 803 } 804 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 805 if (U_FAILURE(status)) { 806 ures_close(rbnfRules); 807 ures_close(nfrb); 808 return; 809 } 810 811 UnicodeString desc; 812 while (ures_hasNext(ruleSets)) { 813 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); 814 } 815 UParseError perror; 816 817 init (desc, locinfo, perror, status); 818 819 ures_close(ruleSets); 820 ures_close(rbnfRules); 821 } 822 ures_close(nfrb); 823 } 824 825 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 826 : NumberFormat(rhs) 827 , ruleSets(NULL) 828 , ruleSetDescriptions(NULL) 829 , numRuleSets(0) 830 , defaultRuleSet(NULL) 831 , locale(rhs.locale) 832 , collator(NULL) 833 , decimalFormatSymbols(NULL) 834 , lenient(FALSE) 835 , lenientParseRules(NULL) 836 , localizations(NULL) 837 , capitalizationInfoSet(FALSE) 838 , capitalizationForUIListMenu(FALSE) 839 , capitalizationForStandAlone(FALSE) 840 , capitalizationBrkIter(NULL) 841 { 842 this->operator=(rhs); 843 } 844 845 // -------- 846 847 RuleBasedNumberFormat& 848 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 849 { 850 if (this == &rhs) { 851 return *this; 852 } 853 NumberFormat::operator=(rhs); 854 UErrorCode status = U_ZERO_ERROR; 855 dispose(); 856 locale = rhs.locale; 857 lenient = rhs.lenient; 858 859 UParseError perror; 860 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 861 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); 862 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); 863 864 capitalizationInfoSet = rhs.capitalizationInfoSet; 865 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; 866 capitalizationForStandAlone = rhs.capitalizationForStandAlone; 867 #if !UCONFIG_NO_BREAK_ITERATION 868 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL; 869 #endif 870 871 return *this; 872 } 873 874 RuleBasedNumberFormat::~RuleBasedNumberFormat() 875 { 876 dispose(); 877 } 878 879 Format* 880 RuleBasedNumberFormat::clone(void) const 881 { 882 return new RuleBasedNumberFormat(*this); 883 } 884 885 UBool 886 RuleBasedNumberFormat::operator==(const Format& other) const 887 { 888 if (this == &other) { 889 return TRUE; 890 } 891 892 if (typeid(*this) == typeid(other)) { 893 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 894 // test for capitalization info equality is adequately handled 895 // by the NumberFormat test for fCapitalizationContext equality; 896 // the info here is just derived from that. 897 if (locale == rhs.locale && 898 lenient == rhs.lenient && 899 (localizations == NULL 900 ? rhs.localizations == NULL 901 : (rhs.localizations == NULL 902 ? FALSE 903 : *localizations == rhs.localizations))) { 904 905 NFRuleSet** p = ruleSets; 906 NFRuleSet** q = rhs.ruleSets; 907 if (p == NULL) { 908 return q == NULL; 909 } else if (q == NULL) { 910 return FALSE; 911 } 912 while (*p && *q && (**p == **q)) { 913 ++p; 914 ++q; 915 } 916 return *q == NULL && *p == NULL; 917 } 918 } 919 920 return FALSE; 921 } 922 923 UnicodeString 924 RuleBasedNumberFormat::getRules() const 925 { 926 UnicodeString result; 927 if (ruleSets != NULL) { 928 for (NFRuleSet** p = ruleSets; *p; ++p) { 929 (*p)->appendRules(result); 930 } 931 } 932 return result; 933 } 934 935 UnicodeString 936 RuleBasedNumberFormat::getRuleSetName(int32_t index) const 937 { 938 if (localizations) { 939 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 940 return string; 941 } else if (ruleSets) { 942 UnicodeString result; 943 for (NFRuleSet** p = ruleSets; *p; ++p) { 944 NFRuleSet* rs = *p; 945 if (rs->isPublic()) { 946 if (--index == -1) { 947 rs->getName(result); 948 return result; 949 } 950 } 951 } 952 } 953 UnicodeString empty; 954 return empty; 955 } 956 957 int32_t 958 RuleBasedNumberFormat::getNumberOfRuleSetNames() const 959 { 960 int32_t result = 0; 961 if (localizations) { 962 result = localizations->getNumberOfRuleSets(); 963 } else if (ruleSets) { 964 for (NFRuleSet** p = ruleSets; *p; ++p) { 965 if ((**p).isPublic()) { 966 ++result; 967 } 968 } 969 } 970 return result; 971 } 972 973 int32_t 974 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 975 if (localizations) { 976 return localizations->getNumberOfDisplayLocales(); 977 } 978 return 0; 979 } 980 981 Locale 982 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 983 if (U_FAILURE(status)) { 984 return Locale(""); 985 } 986 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 987 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 988 char buffer[64]; 989 int32_t cap = name.length() + 1; 990 char* bp = buffer; 991 if (cap > 64) { 992 bp = (char *)uprv_malloc(cap); 993 if (bp == NULL) { 994 status = U_MEMORY_ALLOCATION_ERROR; 995 return Locale(""); 996 } 997 } 998 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 999 Locale retLocale(bp); 1000 if (bp != buffer) { 1001 uprv_free(bp); 1002 } 1003 return retLocale; 1004 } 1005 status = U_ILLEGAL_ARGUMENT_ERROR; 1006 Locale retLocale; 1007 return retLocale; 1008 } 1009 1010 UnicodeString 1011 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 1012 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 1013 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 1014 int32_t len = localeName.length(); 1015 UChar* localeStr = localeName.getBuffer(len + 1); 1016 while (len >= 0) { 1017 localeStr[len] = 0; 1018 int32_t ix = localizations->indexForLocale(localeStr); 1019 if (ix >= 0) { 1020 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 1021 return name; 1022 } 1023 1024 // trim trailing portion, skipping over ommitted sections 1025 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1026 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1027 } 1028 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 1029 return name; 1030 } 1031 UnicodeString bogus; 1032 bogus.setToBogus(); 1033 return bogus; 1034 } 1035 1036 UnicodeString 1037 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1038 if (localizations) { 1039 UnicodeString rsn(ruleSetName); 1040 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1041 return getRuleSetDisplayName(ix, localeParam); 1042 } 1043 UnicodeString bogus; 1044 bogus.setToBogus(); 1045 return bogus; 1046 } 1047 1048 NFRuleSet* 1049 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1050 { 1051 if (U_SUCCESS(status) && ruleSets) { 1052 for (NFRuleSet** p = ruleSets; *p; ++p) { 1053 NFRuleSet* rs = *p; 1054 if (rs->isNamed(name)) { 1055 return rs; 1056 } 1057 } 1058 status = U_ILLEGAL_ARGUMENT_ERROR; 1059 } 1060 return NULL; 1061 } 1062 1063 UnicodeString& 1064 RuleBasedNumberFormat::format(int32_t number, 1065 UnicodeString& toAppendTo, 1066 FieldPosition& /* pos */) const 1067 { 1068 if (defaultRuleSet) { 1069 UErrorCode status = U_ZERO_ERROR; 1070 int32_t startPos = toAppendTo.length(); 1071 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), status); 1072 adjustForCapitalizationContext(startPos, toAppendTo); 1073 } 1074 return toAppendTo; 1075 } 1076 1077 1078 UnicodeString& 1079 RuleBasedNumberFormat::format(int64_t number, 1080 UnicodeString& toAppendTo, 1081 FieldPosition& /* pos */) const 1082 { 1083 if (defaultRuleSet) { 1084 UErrorCode status = U_ZERO_ERROR; 1085 int32_t startPos = toAppendTo.length(); 1086 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status); 1087 adjustForCapitalizationContext(startPos, toAppendTo); 1088 } 1089 return toAppendTo; 1090 } 1091 1092 1093 UnicodeString& 1094 RuleBasedNumberFormat::format(double number, 1095 UnicodeString& toAppendTo, 1096 FieldPosition& /* pos */) const 1097 { 1098 int32_t startPos = toAppendTo.length(); 1099 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does. 1100 if (uprv_isNaN(number)) { 1101 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal 1102 if (decFmtSyms) { 1103 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol); 1104 } 1105 } else if (defaultRuleSet) { 1106 UErrorCode status = U_ZERO_ERROR; 1107 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status); 1108 } 1109 return adjustForCapitalizationContext(startPos, toAppendTo); 1110 } 1111 1112 1113 UnicodeString& 1114 RuleBasedNumberFormat::format(int32_t number, 1115 const UnicodeString& ruleSetName, 1116 UnicodeString& toAppendTo, 1117 FieldPosition& /* pos */, 1118 UErrorCode& status) const 1119 { 1120 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1121 if (U_SUCCESS(status)) { 1122 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1123 // throw new IllegalArgumentException("Can't use internal rule set"); 1124 status = U_ILLEGAL_ARGUMENT_ERROR; 1125 } else { 1126 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1127 if (rs) { 1128 int32_t startPos = toAppendTo.length(); 1129 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), status); 1130 adjustForCapitalizationContext(startPos, toAppendTo); 1131 } 1132 } 1133 } 1134 return toAppendTo; 1135 } 1136 1137 1138 UnicodeString& 1139 RuleBasedNumberFormat::format(int64_t number, 1140 const UnicodeString& ruleSetName, 1141 UnicodeString& toAppendTo, 1142 FieldPosition& /* pos */, 1143 UErrorCode& status) const 1144 { 1145 if (U_SUCCESS(status)) { 1146 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1147 // throw new IllegalArgumentException("Can't use internal rule set"); 1148 status = U_ILLEGAL_ARGUMENT_ERROR; 1149 } else { 1150 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1151 if (rs) { 1152 int32_t startPos = toAppendTo.length(); 1153 rs->format(number, toAppendTo, toAppendTo.length(), status); 1154 adjustForCapitalizationContext(startPos, toAppendTo); 1155 } 1156 } 1157 } 1158 return toAppendTo; 1159 } 1160 1161 1162 UnicodeString& 1163 RuleBasedNumberFormat::format(double number, 1164 const UnicodeString& ruleSetName, 1165 UnicodeString& toAppendTo, 1166 FieldPosition& /* pos */, 1167 UErrorCode& status) const 1168 { 1169 if (U_SUCCESS(status)) { 1170 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1171 // throw new IllegalArgumentException("Can't use internal rule set"); 1172 status = U_ILLEGAL_ARGUMENT_ERROR; 1173 } else { 1174 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1175 if (rs) { 1176 int32_t startPos = toAppendTo.length(); 1177 rs->format(number, toAppendTo, toAppendTo.length(), status); 1178 adjustForCapitalizationContext(startPos, toAppendTo); 1179 } 1180 } 1181 } 1182 return toAppendTo; 1183 } 1184 1185 UnicodeString& 1186 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, 1187 UnicodeString& currentResult) const 1188 { 1189 #if !UCONFIG_NO_BREAK_ITERATION 1190 if (startPos==0 && currentResult.length() > 0) { 1191 // capitalize currentResult according to context 1192 UChar32 ch = currentResult.char32At(0); 1193 UErrorCode status = U_ZERO_ERROR; 1194 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1195 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL && 1196 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1197 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1198 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1199 // titlecase first word of currentResult, here use sentence iterator unlike current implementations 1200 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format 1201 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 1202 } 1203 } 1204 #endif 1205 return currentResult; 1206 } 1207 1208 1209 void 1210 RuleBasedNumberFormat::parse(const UnicodeString& text, 1211 Formattable& result, 1212 ParsePosition& parsePosition) const 1213 { 1214 if (!ruleSets) { 1215 parsePosition.setErrorIndex(0); 1216 return; 1217 } 1218 1219 UnicodeString workingText(text, parsePosition.getIndex()); 1220 ParsePosition workingPos(0); 1221 1222 ParsePosition high_pp(0); 1223 Formattable high_result; 1224 1225 for (NFRuleSet** p = ruleSets; *p; ++p) { 1226 NFRuleSet *rp = *p; 1227 if (rp->isPublic() && rp->isParseable()) { 1228 ParsePosition working_pp(0); 1229 Formattable working_result; 1230 1231 rp->parse(workingText, working_pp, kMaxDouble, working_result); 1232 if (working_pp.getIndex() > high_pp.getIndex()) { 1233 high_pp = working_pp; 1234 high_result = working_result; 1235 1236 if (high_pp.getIndex() == workingText.length()) { 1237 break; 1238 } 1239 } 1240 } 1241 } 1242 1243 int32_t startIndex = parsePosition.getIndex(); 1244 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1245 if (high_pp.getIndex() > 0) { 1246 parsePosition.setErrorIndex(-1); 1247 } else { 1248 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1249 parsePosition.setErrorIndex(startIndex + errorIndex); 1250 } 1251 result = high_result; 1252 if (result.getType() == Formattable::kDouble) { 1253 int32_t r = (int32_t)result.getDouble(); 1254 if ((double)r == result.getDouble()) { 1255 result.setLong(r); 1256 } 1257 } 1258 } 1259 1260 #if !UCONFIG_NO_COLLATION 1261 1262 void 1263 RuleBasedNumberFormat::setLenient(UBool enabled) 1264 { 1265 lenient = enabled; 1266 if (!enabled && collator) { 1267 delete collator; 1268 collator = NULL; 1269 } 1270 } 1271 1272 #endif 1273 1274 void 1275 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1276 if (U_SUCCESS(status)) { 1277 if (ruleSetName.isEmpty()) { 1278 if (localizations) { 1279 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1280 defaultRuleSet = findRuleSet(name, status); 1281 } else { 1282 initDefaultRuleSet(); 1283 } 1284 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1285 status = U_ILLEGAL_ARGUMENT_ERROR; 1286 } else { 1287 NFRuleSet* result = findRuleSet(ruleSetName, status); 1288 if (result != NULL) { 1289 defaultRuleSet = result; 1290 } 1291 } 1292 } 1293 } 1294 1295 UnicodeString 1296 RuleBasedNumberFormat::getDefaultRuleSetName() const { 1297 UnicodeString result; 1298 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1299 defaultRuleSet->getName(result); 1300 } else { 1301 result.setToBogus(); 1302 } 1303 return result; 1304 } 1305 1306 void 1307 RuleBasedNumberFormat::initDefaultRuleSet() 1308 { 1309 defaultRuleSet = NULL; 1310 if (!ruleSets) { 1311 return; 1312 } 1313 1314 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering"); 1315 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal"); 1316 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration"); 1317 1318 NFRuleSet**p = &ruleSets[0]; 1319 while (*p) { 1320 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1321 defaultRuleSet = *p; 1322 return; 1323 } else { 1324 ++p; 1325 } 1326 } 1327 1328 defaultRuleSet = *--p; 1329 if (!defaultRuleSet->isPublic()) { 1330 while (p != ruleSets) { 1331 if ((*--p)->isPublic()) { 1332 defaultRuleSet = *p; 1333 break; 1334 } 1335 } 1336 } 1337 } 1338 1339 1340 void 1341 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1342 UParseError& pErr, UErrorCode& status) 1343 { 1344 // TODO: implement UParseError 1345 uprv_memset(&pErr, 0, sizeof(UParseError)); 1346 // Note: this can leave ruleSets == NULL, so remaining code should check 1347 if (U_FAILURE(status)) { 1348 return; 1349 } 1350 1351 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1352 1353 UnicodeString description(rules); 1354 if (!description.length()) { 1355 status = U_MEMORY_ALLOCATION_ERROR; 1356 return; 1357 } 1358 1359 // start by stripping the trailing whitespace from all the rules 1360 // (this is all the whitespace follwing each semicolon in the 1361 // description). This allows us to look for rule-set boundaries 1362 // by searching for ";%" without having to worry about whitespace 1363 // between the ; and the % 1364 stripWhitespace(description); 1365 1366 // check to see if there's a set of lenient-parse rules. If there 1367 // is, pull them out into our temporary holding place for them, 1368 // and delete them from the description before the real desciption- 1369 // parsing code sees them 1370 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1371 if (lp != -1) { 1372 // we've got to make sure we're not in the middle of a rule 1373 // (where "%%lenient-parse" would actually get treated as 1374 // rule text) 1375 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1376 // locate the beginning and end of the actual collation 1377 // rules (there may be whitespace between the name and 1378 // the first token in the description) 1379 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1380 1381 if (lpEnd == -1) { 1382 lpEnd = description.length() - 1; 1383 } 1384 int lpStart = lp + u_strlen(gLenientParse); 1385 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1386 ++lpStart; 1387 } 1388 1389 // copy out the lenient-parse rules and delete them 1390 // from the description 1391 lenientParseRules = new UnicodeString(); 1392 /* test for NULL */ 1393 if (lenientParseRules == 0) { 1394 status = U_MEMORY_ALLOCATION_ERROR; 1395 return; 1396 } 1397 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1398 1399 description.remove(lp, lpEnd + 1 - lp); 1400 } 1401 } 1402 1403 // pre-flight parsing the description and count the number of 1404 // rule sets (";%" marks the end of one rule set and the beginning 1405 // of the next) 1406 numRuleSets = 0; 1407 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1408 ++numRuleSets; 1409 ++p; 1410 } 1411 ++numRuleSets; 1412 1413 // our rule list is an array of the appropriate size 1414 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1415 /* test for NULL */ 1416 if (ruleSets == 0) { 1417 status = U_MEMORY_ALLOCATION_ERROR; 1418 return; 1419 } 1420 1421 for (int i = 0; i <= numRuleSets; ++i) { 1422 ruleSets[i] = NULL; 1423 } 1424 1425 // divide up the descriptions into individual rule-set descriptions 1426 // and store them in a temporary array. At each step, we also 1427 // new up a rule set, but all this does is initialize its name 1428 // and remove it from its description. We can't actually parse 1429 // the rest of the descriptions and finish initializing everything 1430 // because we have to know the names and locations of all the rule 1431 // sets before we can actually set everything up 1432 if(!numRuleSets) { 1433 status = U_ILLEGAL_ARGUMENT_ERROR; 1434 return; 1435 } 1436 1437 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1438 if (ruleSetDescriptions == 0) { 1439 status = U_MEMORY_ALLOCATION_ERROR; 1440 return; 1441 } 1442 1443 { 1444 int curRuleSet = 0; 1445 int32_t start = 0; 1446 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1447 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1448 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1449 if (ruleSets[curRuleSet] == 0) { 1450 status = U_MEMORY_ALLOCATION_ERROR; 1451 return; 1452 } 1453 ++curRuleSet; 1454 start = p + 1; 1455 } 1456 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1457 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1458 if (ruleSets[curRuleSet] == 0) { 1459 status = U_MEMORY_ALLOCATION_ERROR; 1460 return; 1461 } 1462 } 1463 1464 // now we can take note of the formatter's default rule set, which 1465 // is the last public rule set in the description (it's the last 1466 // rather than the first so that a user can create a new formatter 1467 // from an existing formatter and change its default behavior just 1468 // by appending more rule sets to the end) 1469 1470 // {dlf} Initialization of a fraction rule set requires the default rule 1471 // set to be known. For purposes of initialization, this is always the 1472 // last public rule set, no matter what the localization data says. 1473 initDefaultRuleSet(); 1474 1475 // finally, we can go back through the temporary descriptions 1476 // list and finish seting up the substructure (and we throw 1477 // away the temporary descriptions as we go) 1478 { 1479 for (int i = 0; i < numRuleSets; i++) { 1480 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); 1481 } 1482 } 1483 1484 // Now that the rules are initialized, the 'real' default rule 1485 // set can be adjusted by the localization data. 1486 1487 // The C code keeps the localization array as is, rather than building 1488 // a separate array of the public rule set names, so we have less work 1489 // to do here-- but we still need to check the names. 1490 1491 if (localizationInfos) { 1492 // confirm the names, if any aren't in the rules, that's an error 1493 // it is ok if the rules contain public rule sets that are not in this list 1494 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1495 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1496 NFRuleSet* rs = findRuleSet(name, status); 1497 if (rs == NULL) { 1498 break; // error 1499 } 1500 if (i == 0) { 1501 defaultRuleSet = rs; 1502 } 1503 } 1504 } else { 1505 defaultRuleSet = getDefaultRuleSet(); 1506 } 1507 originalDescription = rules; 1508 } 1509 1510 // override the NumberFormat implementation in order to 1511 // lazily initialize relevant items 1512 void 1513 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) 1514 { 1515 NumberFormat::setContext(value, status); 1516 if (U_SUCCESS(status)) { 1517 if (!capitalizationInfoSet && 1518 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { 1519 initCapitalizationContextInfo(locale); 1520 capitalizationInfoSet = TRUE; 1521 } 1522 #if !UCONFIG_NO_BREAK_ITERATION 1523 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1524 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1525 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1526 UErrorCode status = U_ZERO_ERROR; 1527 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); 1528 if (U_FAILURE(status)) { 1529 delete capitalizationBrkIter; 1530 capitalizationBrkIter = NULL; 1531 } 1532 } 1533 #endif 1534 } 1535 } 1536 1537 void 1538 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) 1539 { 1540 #if !UCONFIG_NO_BREAK_ITERATION 1541 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL; 1542 UErrorCode status = U_ZERO_ERROR; 1543 UResourceBundle *rb = ures_open(NULL, localeID, &status); 1544 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); 1545 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status); 1546 if (U_SUCCESS(status) && rb != NULL) { 1547 int32_t len = 0; 1548 const int32_t * intVector = ures_getIntVector(rb, &len, &status); 1549 if (U_SUCCESS(status) && intVector != NULL && len >= 2) { 1550 capitalizationForUIListMenu = intVector[0]; 1551 capitalizationForStandAlone = intVector[1]; 1552 } 1553 } 1554 ures_close(rb); 1555 #endif 1556 } 1557 1558 void 1559 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1560 { 1561 // iterate through the characters... 1562 UnicodeString result; 1563 1564 int start = 0; 1565 while (start != -1 && start < description.length()) { 1566 // seek to the first non-whitespace character... 1567 while (start < description.length() 1568 && PatternProps::isWhiteSpace(description.charAt(start))) { 1569 ++start; 1570 } 1571 1572 // locate the next semicolon in the text and copy the text from 1573 // our current position up to that semicolon into the result 1574 int32_t p = description.indexOf(gSemiColon, start); 1575 if (p == -1) { 1576 // or if we don't find a semicolon, just copy the rest of 1577 // the string into the result 1578 result.append(description, start, description.length() - start); 1579 start = -1; 1580 } 1581 else if (p < description.length()) { 1582 result.append(description, start, p + 1 - start); 1583 start = p + 1; 1584 } 1585 1586 // when we get here, we've seeked off the end of the sring, and 1587 // we terminate the loop (we continue until *start* is -1 rather 1588 // than until *p* is -1, because otherwise we'd miss the last 1589 // rule in the description) 1590 else { 1591 start = -1; 1592 } 1593 } 1594 1595 description.setTo(result); 1596 } 1597 1598 1599 void 1600 RuleBasedNumberFormat::dispose() 1601 { 1602 if (ruleSets) { 1603 for (NFRuleSet** p = ruleSets; *p; ++p) { 1604 delete *p; 1605 } 1606 uprv_free(ruleSets); 1607 ruleSets = NULL; 1608 } 1609 1610 if (ruleSetDescriptions) { 1611 delete [] ruleSetDescriptions; 1612 } 1613 1614 #if !UCONFIG_NO_COLLATION 1615 delete collator; 1616 #endif 1617 collator = NULL; 1618 1619 delete decimalFormatSymbols; 1620 decimalFormatSymbols = NULL; 1621 1622 delete lenientParseRules; 1623 lenientParseRules = NULL; 1624 1625 #if !UCONFIG_NO_BREAK_ITERATION 1626 delete capitalizationBrkIter; 1627 capitalizationBrkIter = NULL; 1628 #endif 1629 1630 if (localizations) localizations = localizations->unref(); 1631 } 1632 1633 1634 //----------------------------------------------------------------------- 1635 // package-internal API 1636 //----------------------------------------------------------------------- 1637 1638 /** 1639 * Returns the collator to use for lenient parsing. The collator is lazily created: 1640 * this function creates it the first time it's called. 1641 * @return The collator to use for lenient parsing, or null if lenient parsing 1642 * is turned off. 1643 */ 1644 const RuleBasedCollator* 1645 RuleBasedNumberFormat::getCollator() const 1646 { 1647 #if !UCONFIG_NO_COLLATION 1648 if (!ruleSets) { 1649 return NULL; 1650 } 1651 1652 // lazy-evaluate the collator 1653 if (collator == NULL && lenient) { 1654 // create a default collator based on the formatter's locale, 1655 // then pull out that collator's rules, append any additional 1656 // rules specified in the description, and create a _new_ 1657 // collator based on the combinaiton of those rules 1658 1659 UErrorCode status = U_ZERO_ERROR; 1660 1661 Collator* temp = Collator::createInstance(locale, status); 1662 RuleBasedCollator* newCollator; 1663 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { 1664 if (lenientParseRules) { 1665 UnicodeString rules(newCollator->getRules()); 1666 rules.append(*lenientParseRules); 1667 1668 newCollator = new RuleBasedCollator(rules, status); 1669 // Exit if newCollator could not be created. 1670 if (newCollator == NULL) { 1671 return NULL; 1672 } 1673 } else { 1674 temp = NULL; 1675 } 1676 if (U_SUCCESS(status)) { 1677 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1678 // cast away const 1679 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1680 } else { 1681 delete newCollator; 1682 } 1683 } 1684 delete temp; 1685 } 1686 #endif 1687 1688 // if lenient-parse mode is off, this will be null 1689 // (see setLenientParseMode()) 1690 return collator; 1691 } 1692 1693 1694 /** 1695 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1696 * instances owned by this formatter. This object is lazily created: this function 1697 * creates it the first time it's called. 1698 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat 1699 * instances owned by this formatter. 1700 */ 1701 DecimalFormatSymbols* 1702 RuleBasedNumberFormat::getDecimalFormatSymbols() const 1703 { 1704 // lazy-evaluate the DecimalFormatSymbols object. This object 1705 // is shared by all DecimalFormat instances belonging to this 1706 // formatter 1707 if (decimalFormatSymbols == NULL) { 1708 UErrorCode status = U_ZERO_ERROR; 1709 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); 1710 if (U_SUCCESS(status)) { 1711 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp; 1712 } else { 1713 delete temp; 1714 } 1715 } 1716 return decimalFormatSymbols; 1717 } 1718 1719 // De-owning the current localized symbols and adopt the new symbols. 1720 void 1721 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1722 { 1723 if (symbolsToAdopt == NULL) { 1724 return; // do not allow caller to set decimalFormatSymbols to NULL 1725 } 1726 1727 if (decimalFormatSymbols != NULL) { 1728 delete decimalFormatSymbols; 1729 } 1730 1731 decimalFormatSymbols = symbolsToAdopt; 1732 1733 { 1734 // Apply the new decimalFormatSymbols by reparsing the rulesets 1735 UErrorCode status = U_ZERO_ERROR; 1736 1737 for (int32_t i = 0; i < numRuleSets; i++) { 1738 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); 1739 } 1740 } 1741 } 1742 1743 // Setting the symbols is equlivalent to adopting a newly created localized symbols. 1744 void 1745 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1746 { 1747 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1748 } 1749 1750 PluralFormat * 1751 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, 1752 const UnicodeString &pattern, 1753 UErrorCode& status) const 1754 { 1755 return new PluralFormat(locale, pluralType, pattern, status); 1756 } 1757 1758 U_NAMESPACE_END 1759 1760 /* U_HAVE_RBNF */ 1761 #endif 1762