1 /* 2 ****************************************************************************** 3 * Copyright (C) 1997-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ****************************************************************************** 6 * file name: nfrs.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * Modification history 12 * Date Name Comments 13 * 10/11/2001 Doug Ported from ICU4J 14 */ 15 16 #include "nfrs.h" 17 18 #if U_HAVE_RBNF 19 20 #include "unicode/uchar.h" 21 #include "nfrule.h" 22 #include "nfrlist.h" 23 #include "patternprops.h" 24 25 #ifdef RBNF_DEBUG 26 #include "cmemory.h" 27 #endif 28 29 enum { 30 /** -x */ 31 NEGATIVE_RULE_INDEX = 0, 32 /** x.x */ 33 IMPROPER_FRACTION_RULE_INDEX = 1, 34 /** 0.x */ 35 PROPER_FRACTION_RULE_INDEX = 2, 36 /** x.0 */ 37 MASTER_RULE_INDEX = 3, 38 /** Inf */ 39 INFINITY_RULE_INDEX = 4, 40 /** NaN */ 41 NAN_RULE_INDEX = 5, 42 NON_NUMERICAL_RULE_LENGTH = 6 43 }; 44 45 U_NAMESPACE_BEGIN 46 47 #if 0 48 // euclid's algorithm works with doubles 49 // note, doubles only get us up to one quadrillion or so, which 50 // isn't as much range as we get with longs. We probably still 51 // want either 64-bit math, or BigInteger. 52 53 static int64_t 54 util_lcm(int64_t x, int64_t y) 55 { 56 x.abs(); 57 y.abs(); 58 59 if (x == 0 || y == 0) { 60 return 0; 61 } else { 62 do { 63 if (x < y) { 64 int64_t t = x; x = y; y = t; 65 } 66 x -= y * (x/y); 67 } while (x != 0); 68 69 return y; 70 } 71 } 72 73 #else 74 /** 75 * Calculates the least common multiple of x and y. 76 */ 77 static int64_t 78 util_lcm(int64_t x, int64_t y) 79 { 80 // binary gcd algorithm from Knuth, "The Art of Computer Programming," 81 // vol. 2, 1st ed., pp. 298-299 82 int64_t x1 = x; 83 int64_t y1 = y; 84 85 int p2 = 0; 86 while ((x1 & 1) == 0 && (y1 & 1) == 0) { 87 ++p2; 88 x1 >>= 1; 89 y1 >>= 1; 90 } 91 92 int64_t t; 93 if ((x1 & 1) == 1) { 94 t = -y1; 95 } else { 96 t = x1; 97 } 98 99 while (t != 0) { 100 while ((t & 1) == 0) { 101 t = t >> 1; 102 } 103 if (t > 0) { 104 x1 = t; 105 } else { 106 y1 = -t; 107 } 108 t = x1 - y1; 109 } 110 111 int64_t gcd = x1 << p2; 112 113 // x * y == gcd(x, y) * lcm(x, y) 114 return x / gcd * y; 115 } 116 #endif 117 118 static const UChar gPercent = 0x0025; 119 static const UChar gColon = 0x003a; 120 static const UChar gSemicolon = 0x003b; 121 static const UChar gLineFeed = 0x000a; 122 123 static const UChar gPercentPercent[] = 124 { 125 0x25, 0x25, 0 126 }; /* "%%" */ 127 128 static const UChar gNoparse[] = 129 { 130 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 131 }; /* "@noparse" */ 132 133 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status) 134 : name() 135 , rules(0) 136 , owner(_owner) 137 , fractionRules() 138 , fIsFractionRuleSet(FALSE) 139 , fIsPublic(FALSE) 140 , fIsParseable(TRUE) 141 { 142 for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { 143 nonNumericalRules[i] = NULL; 144 } 145 146 if (U_FAILURE(status)) { 147 return; 148 } 149 150 UnicodeString& description = descriptions[index]; // !!! make sure index is valid 151 152 if (description.length() == 0) { 153 // throw new IllegalArgumentException("Empty rule set description"); 154 status = U_PARSE_ERROR; 155 return; 156 } 157 158 // if the description begins with a rule set name (the rule set 159 // name can be omitted in formatter descriptions that consist 160 // of only one rule set), copy it out into our "name" member 161 // and delete it from the description 162 if (description.charAt(0) == gPercent) { 163 int32_t pos = description.indexOf(gColon); 164 if (pos == -1) { 165 // throw new IllegalArgumentException("Rule set name doesn't end in colon"); 166 status = U_PARSE_ERROR; 167 } else { 168 name.setTo(description, 0, pos); 169 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { 170 } 171 description.remove(0, pos); 172 } 173 } else { 174 name.setTo(UNICODE_STRING_SIMPLE("%default")); 175 } 176 177 if (description.length() == 0) { 178 // throw new IllegalArgumentException("Empty rule set description"); 179 status = U_PARSE_ERROR; 180 } 181 182 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; 183 184 if ( name.endsWith(gNoparse,8) ) { 185 fIsParseable = FALSE; 186 name.truncate(name.length()-8); // remove the @noparse from the name 187 } 188 189 // all of the other members of NFRuleSet are initialized 190 // by parseRules() 191 } 192 193 void 194 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) 195 { 196 // start by creating a Vector whose elements are Strings containing 197 // the descriptions of the rules (one rule per element). The rules 198 // are separated by semicolons (there's no escape facility: ALL 199 // semicolons are rule delimiters) 200 201 if (U_FAILURE(status)) { 202 return; 203 } 204 205 // ensure we are starting with an empty rule list 206 rules.deleteAll(); 207 208 // dlf - the original code kept a separate description array for no reason, 209 // so I got rid of it. The loop was too complex so I simplified it. 210 211 UnicodeString currentDescription; 212 int32_t oldP = 0; 213 while (oldP < description.length()) { 214 int32_t p = description.indexOf(gSemicolon, oldP); 215 if (p == -1) { 216 p = description.length(); 217 } 218 currentDescription.setTo(description, oldP, p - oldP); 219 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); 220 oldP = p + 1; 221 } 222 223 // for rules that didn't specify a base value, their base values 224 // were initialized to 0. Make another pass through the list and 225 // set all those rules' base values. We also remove any special 226 // rules from the list and put them into their own member variables 227 int64_t defaultBaseValue = 0; 228 229 // (this isn't a for loop because we might be deleting items from 230 // the vector-- we want to make sure we only increment i when 231 // we _didn't_ delete aything from the vector) 232 int32_t rulesSize = rules.size(); 233 for (int32_t i = 0; i < rulesSize; i++) { 234 NFRule* rule = rules[i]; 235 int64_t baseValue = rule->getBaseValue(); 236 237 if (baseValue == 0) { 238 // if the rule's base value is 0, fill in a default 239 // base value (this will be 1 plus the preceding 240 // rule's base value for regular rule sets, and the 241 // same as the preceding rule's base value in fraction 242 // rule sets) 243 rule->setBaseValue(defaultBaseValue, status); 244 } 245 else { 246 // if it's a regular rule that already knows its base value, 247 // check to make sure the rules are in order, and update 248 // the default base value for the next rule 249 if (baseValue < defaultBaseValue) { 250 // throw new IllegalArgumentException("Rules are not in order"); 251 status = U_PARSE_ERROR; 252 return; 253 } 254 defaultBaseValue = baseValue; 255 } 256 if (!fIsFractionRuleSet) { 257 ++defaultBaseValue; 258 } 259 } 260 } 261 262 /** 263 * Set one of the non-numerical rules. 264 * @param rule The rule to set. 265 */ 266 void NFRuleSet::setNonNumericalRule(NFRule *rule) { 267 int64_t baseValue = rule->getBaseValue(); 268 if (baseValue == NFRule::kNegativeNumberRule) { 269 delete nonNumericalRules[NEGATIVE_RULE_INDEX]; 270 nonNumericalRules[NEGATIVE_RULE_INDEX] = rule; 271 } 272 else if (baseValue == NFRule::kImproperFractionRule) { 273 setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE); 274 } 275 else if (baseValue == NFRule::kProperFractionRule) { 276 setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE); 277 } 278 else if (baseValue == NFRule::kMasterRule) { 279 setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE); 280 } 281 else if (baseValue == NFRule::kInfinityRule) { 282 delete nonNumericalRules[INFINITY_RULE_INDEX]; 283 nonNumericalRules[INFINITY_RULE_INDEX] = rule; 284 } 285 else if (baseValue == NFRule::kNaNRule) { 286 delete nonNumericalRules[NAN_RULE_INDEX]; 287 nonNumericalRules[NAN_RULE_INDEX] = rule; 288 } 289 } 290 291 /** 292 * Determine the best fraction rule to use. Rules matching the decimal point from 293 * DecimalFormatSymbols become the main set of rules to use. 294 * @param originalIndex The index into nonNumericalRules 295 * @param newRule The new rule to consider 296 * @param rememberRule Should the new rule be added to fractionRules. 297 */ 298 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) { 299 if (rememberRule) { 300 fractionRules.add(newRule); 301 } 302 NFRule *bestResult = nonNumericalRules[originalIndex]; 303 if (bestResult == NULL) { 304 nonNumericalRules[originalIndex] = newRule; 305 } 306 else { 307 // We have more than one. Which one is better? 308 const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols(); 309 if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0) 310 == newRule->getDecimalPoint()) 311 { 312 nonNumericalRules[originalIndex] = newRule; 313 } 314 // else leave it alone 315 } 316 } 317 318 NFRuleSet::~NFRuleSet() 319 { 320 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 321 if (i != IMPROPER_FRACTION_RULE_INDEX 322 && i != PROPER_FRACTION_RULE_INDEX 323 && i != MASTER_RULE_INDEX) 324 { 325 delete nonNumericalRules[i]; 326 } 327 // else it will be deleted via NFRuleList fractionRules 328 } 329 } 330 331 static UBool 332 util_equalRules(const NFRule* rule1, const NFRule* rule2) 333 { 334 if (rule1) { 335 if (rule2) { 336 return *rule1 == *rule2; 337 } 338 } else if (!rule2) { 339 return TRUE; 340 } 341 return FALSE; 342 } 343 344 UBool 345 NFRuleSet::operator==(const NFRuleSet& rhs) const 346 { 347 if (rules.size() == rhs.rules.size() && 348 fIsFractionRuleSet == rhs.fIsFractionRuleSet && 349 name == rhs.name) { 350 351 // ...then compare the non-numerical rule lists... 352 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 353 if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { 354 return FALSE; 355 } 356 } 357 358 // ...then compare the rule lists... 359 for (uint32_t i = 0; i < rules.size(); ++i) { 360 if (*rules[i] != *rhs.rules[i]) { 361 return FALSE; 362 } 363 } 364 return TRUE; 365 } 366 return FALSE; 367 } 368 369 void 370 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) { 371 for (uint32_t i = 0; i < rules.size(); ++i) { 372 rules[i]->setDecimalFormatSymbols(newSymbols, status); 373 } 374 // Switch the fraction rules to mirror the DecimalFormatSymbols. 375 for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) { 376 if (nonNumericalRules[nonNumericalIdx]) { 377 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { 378 NFRule *fractionRule = fractionRules[fIdx]; 379 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) { 380 setBestFractionRule(nonNumericalIdx, fractionRule, FALSE); 381 } 382 } 383 } 384 } 385 386 for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) { 387 NFRule *rule = nonNumericalRules[nnrIdx]; 388 if (rule) { 389 rule->setDecimalFormatSymbols(newSymbols, status); 390 } 391 } 392 } 393 394 #define RECURSION_LIMIT 64 395 396 void 397 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const 398 { 399 if (recursionCount >= RECURSION_LIMIT) { 400 // stop recursion 401 status = U_INVALID_STATE_ERROR; 402 return; 403 } 404 const NFRule *rule = findNormalRule(number); 405 if (rule) { // else error, but can't report it 406 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); 407 } 408 } 409 410 void 411 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const 412 { 413 if (recursionCount >= RECURSION_LIMIT) { 414 // stop recursion 415 status = U_INVALID_STATE_ERROR; 416 return; 417 } 418 const NFRule *rule = findDoubleRule(number); 419 if (rule) { // else error, but can't report it 420 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); 421 } 422 } 423 424 const NFRule* 425 NFRuleSet::findDoubleRule(double number) const 426 { 427 // if this is a fraction rule set, use findFractionRuleSetRule() 428 if (isFractionRuleSet()) { 429 return findFractionRuleSetRule(number); 430 } 431 432 if (uprv_isNaN(number)) { 433 const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX]; 434 if (!rule) { 435 rule = owner->getDefaultNaNRule(); 436 } 437 return rule; 438 } 439 440 // if the number is negative, return the negative number rule 441 // (if there isn't a negative-number rule, we pretend it's a 442 // positive number) 443 if (number < 0) { 444 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { 445 return nonNumericalRules[NEGATIVE_RULE_INDEX]; 446 } else { 447 number = -number; 448 } 449 } 450 451 if (uprv_isInfinite(number)) { 452 const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX]; 453 if (!rule) { 454 rule = owner->getDefaultInfinityRule(); 455 } 456 return rule; 457 } 458 459 // if the number isn't an integer, we use one of the fraction rules... 460 if (number != uprv_floor(number)) { 461 // if the number is between 0 and 1, return the proper 462 // fraction rule 463 if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) { 464 return nonNumericalRules[PROPER_FRACTION_RULE_INDEX]; 465 } 466 // otherwise, return the improper fraction rule 467 else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) { 468 return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]; 469 } 470 } 471 472 // if there's a master rule, use it to format the number 473 if (nonNumericalRules[MASTER_RULE_INDEX]) { 474 return nonNumericalRules[MASTER_RULE_INDEX]; 475 } 476 477 // and if we haven't yet returned a rule, use findNormalRule() 478 // to find the applicable rule 479 int64_t r = util64_fromDouble(number + 0.5); 480 return findNormalRule(r); 481 } 482 483 const NFRule * 484 NFRuleSet::findNormalRule(int64_t number) const 485 { 486 // if this is a fraction rule set, use findFractionRuleSetRule() 487 // to find the rule (we should only go into this clause if the 488 // value is 0) 489 if (fIsFractionRuleSet) { 490 return findFractionRuleSetRule((double)number); 491 } 492 493 // if the number is negative, return the negative-number rule 494 // (if there isn't one, pretend the number is positive) 495 if (number < 0) { 496 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { 497 return nonNumericalRules[NEGATIVE_RULE_INDEX]; 498 } else { 499 number = -number; 500 } 501 } 502 503 // we have to repeat the preceding two checks, even though we 504 // do them in findRule(), because the version of format() that 505 // takes a long bypasses findRule() and goes straight to this 506 // function. This function does skip the fraction rules since 507 // we know the value is an integer (it also skips the master 508 // rule, since it's considered a fraction rule. Skipping the 509 // master rule in this function is also how we avoid infinite 510 // recursion) 511 512 // {dlf} unfortunately this fails if there are no rules except 513 // special rules. If there are no rules, use the master rule. 514 515 // binary-search the rule list for the applicable rule 516 // (a rule is used for all values from its base value to 517 // the next rule's base value) 518 int32_t hi = rules.size(); 519 if (hi > 0) { 520 int32_t lo = 0; 521 522 while (lo < hi) { 523 int32_t mid = (lo + hi) / 2; 524 if (rules[mid]->getBaseValue() == number) { 525 return rules[mid]; 526 } 527 else if (rules[mid]->getBaseValue() > number) { 528 hi = mid; 529 } 530 else { 531 lo = mid + 1; 532 } 533 } 534 if (hi == 0) { // bad rule set, minimum base > 0 535 return NULL; // want to throw exception here 536 } 537 538 NFRule *result = rules[hi - 1]; 539 540 // use shouldRollBack() to see whether we need to invoke the 541 // rollback rule (see shouldRollBack()'s documentation for 542 // an explanation of the rollback rule). If we do, roll back 543 // one rule and return that one instead of the one we'd normally 544 // return 545 if (result->shouldRollBack((double)number)) { 546 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base 547 return NULL; 548 } 549 result = rules[hi - 2]; 550 } 551 return result; 552 } 553 // else use the master rule 554 return nonNumericalRules[MASTER_RULE_INDEX]; 555 } 556 557 /** 558 * If this rule is a fraction rule set, this function is used by 559 * findRule() to select the most appropriate rule for formatting 560 * the number. Basically, the base value of each rule in the rule 561 * set is treated as the denominator of a fraction. Whichever 562 * denominator can produce the fraction closest in value to the 563 * number passed in is the result. If there's a tie, the earlier 564 * one in the list wins. (If there are two rules in a row with the 565 * same base value, the first one is used when the numerator of the 566 * fraction would be 1, and the second rule is used the rest of the 567 * time. 568 * @param number The number being formatted (which will always be 569 * a number between 0 and 1) 570 * @return The rule to use to format this number 571 */ 572 const NFRule* 573 NFRuleSet::findFractionRuleSetRule(double number) const 574 { 575 // the obvious way to do this (multiply the value being formatted 576 // by each rule's base value until you get an integral result) 577 // doesn't work because of rounding error. This method is more 578 // accurate 579 580 // find the least common multiple of the rules' base values 581 // and multiply this by the number being formatted. This is 582 // all the precision we need, and we can do all of the rest 583 // of the math using integer arithmetic 584 int64_t leastCommonMultiple = rules[0]->getBaseValue(); 585 int64_t numerator; 586 { 587 for (uint32_t i = 1; i < rules.size(); ++i) { 588 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); 589 } 590 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); 591 } 592 // for each rule, do the following... 593 int64_t tempDifference; 594 int64_t difference = util64_fromDouble(uprv_maxMantissa()); 595 int32_t winner = 0; 596 for (uint32_t i = 0; i < rules.size(); ++i) { 597 // "numerator" is the numerator of the fraction if the 598 // denominator is the LCD. The numerator if the rule's 599 // base value is the denominator is "numerator" times the 600 // base value divided bythe LCD. Here we check to see if 601 // that's an integer, and if not, how close it is to being 602 // an integer. 603 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; 604 605 606 // normalize the result of the above calculation: we want 607 // the numerator's distance from the CLOSEST multiple 608 // of the LCD 609 if (leastCommonMultiple - tempDifference < tempDifference) { 610 tempDifference = leastCommonMultiple - tempDifference; 611 } 612 613 // if this is as close as we've come, keep track of how close 614 // that is, and the line number of the rule that did it. If 615 // we've scored a direct hit, we don't have to look at any more 616 // rules 617 if (tempDifference < difference) { 618 difference = tempDifference; 619 winner = i; 620 if (difference == 0) { 621 break; 622 } 623 } 624 } 625 626 // if we have two successive rules that both have the winning base 627 // value, then the first one (the one we found above) is used if 628 // the numerator of the fraction is 1 and the second one is used if 629 // the numerator of the fraction is anything else (this lets us 630 // do things like "one third"/"two thirds" without haveing to define 631 // a whole bunch of extra rule sets) 632 if ((unsigned)(winner + 1) < rules.size() && 633 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { 634 double n = ((double)rules[winner]->getBaseValue()) * number; 635 if (n < 0.5 || n >= 2) { 636 ++winner; 637 } 638 } 639 640 // finally, return the winning rule 641 return rules[winner]; 642 } 643 644 /** 645 * Parses a string. Matches the string to be parsed against each 646 * of its rules (with a base value less than upperBound) and returns 647 * the value produced by the rule that matched the most charcters 648 * in the source string. 649 * @param text The string to parse 650 * @param parsePosition The initial position is ignored and assumed 651 * to be 0. On exit, this object has been updated to point to the 652 * first character position this rule set didn't consume. 653 * @param upperBound Limits the rules that can be allowed to match. 654 * Only rules whose base values are strictly less than upperBound 655 * are considered. 656 * @return The numerical result of parsing this string. This will 657 * be the matching rule's base value, composed appropriately with 658 * the results of matching any of its substitutions. The object 659 * will be an instance of Long if it's an integral value; otherwise, 660 * it will be an instance of Double. This function always returns 661 * a valid object: If nothing matched the input string at all, 662 * this function returns new Long(0), and the parse position is 663 * left unchanged. 664 */ 665 #ifdef RBNF_DEBUG 666 #include <stdio.h> 667 668 static void dumpUS(FILE* f, const UnicodeString& us) { 669 int len = us.length(); 670 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 671 if (buf != NULL) { 672 us.extract(0, len, buf); 673 buf[len] = 0; 674 fprintf(f, "%s", buf); 675 uprv_free(buf); //delete[] buf; 676 } 677 } 678 #endif 679 680 UBool 681 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const 682 { 683 // try matching each rule in the rule set against the text being 684 // parsed. Whichever one matches the most characters is the one 685 // that determines the value we return. 686 687 result.setLong(0); 688 689 // dump out if there's no text to parse 690 if (text.length() == 0) { 691 return 0; 692 } 693 694 ParsePosition highWaterMark; 695 ParsePosition workingPos = pos; 696 697 #ifdef RBNF_DEBUG 698 fprintf(stderr, "<nfrs> %x '", this); 699 dumpUS(stderr, name); 700 fprintf(stderr, "' text '"); 701 dumpUS(stderr, text); 702 fprintf(stderr, "'\n"); 703 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); 704 #endif 705 // Try each of the negative rules, fraction rules, infinity rules and NaN rules 706 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 707 if (nonNumericalRules[i]) { 708 Formattable tempResult; 709 UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); 710 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { 711 result = tempResult; 712 highWaterMark = workingPos; 713 } 714 workingPos = pos; 715 } 716 } 717 #ifdef RBNF_DEBUG 718 fprintf(stderr, "<nfrs> continue other with text '"); 719 dumpUS(stderr, text); 720 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 721 #endif 722 723 // finally, go through the regular rules one at a time. We start 724 // at the end of the list because we want to try matching the most 725 // sigificant rule first (this helps ensure that we parse 726 // "five thousand three hundred six" as 727 // "(five thousand) (three hundred) (six)" rather than 728 // "((five thousand three) hundred) (six)"). Skip rules whose 729 // base values are higher than the upper bound (again, this helps 730 // limit ambiguity by making sure the rules that match a rule's 731 // are less significant than the rule containing the substitutions)/ 732 { 733 int64_t ub = util64_fromDouble(upperBound); 734 #ifdef RBNF_DEBUG 735 { 736 char ubstr[64]; 737 util64_toa(ub, ubstr, 64); 738 char ubstrhex[64]; 739 util64_toa(ub, ubstrhex, 64, 16); 740 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); 741 } 742 #endif 743 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { 744 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { 745 continue; 746 } 747 Formattable tempResult; 748 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); 749 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 750 result = tempResult; 751 highWaterMark = workingPos; 752 } 753 workingPos = pos; 754 } 755 } 756 #ifdef RBNF_DEBUG 757 fprintf(stderr, "<nfrs> exit\n"); 758 #endif 759 // finally, update the parse postion we were passed to point to the 760 // first character we didn't use, and return the result that 761 // corresponds to that string of characters 762 pos = highWaterMark; 763 764 return 1; 765 } 766 767 void 768 NFRuleSet::appendRules(UnicodeString& result) const 769 { 770 uint32_t i; 771 772 // the rule set name goes first... 773 result.append(name); 774 result.append(gColon); 775 result.append(gLineFeed); 776 777 // followed by the regular rules... 778 for (i = 0; i < rules.size(); i++) { 779 rules[i]->_appendRuleText(result); 780 result.append(gLineFeed); 781 } 782 783 // followed by the special rules (if they exist) 784 for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { 785 NFRule *rule = nonNumericalRules[i]; 786 if (nonNumericalRules[i]) { 787 if (rule->getBaseValue() == NFRule::kImproperFractionRule 788 || rule->getBaseValue() == NFRule::kProperFractionRule 789 || rule->getBaseValue() == NFRule::kMasterRule) 790 { 791 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { 792 NFRule *fractionRule = fractionRules[fIdx]; 793 if (fractionRule->getBaseValue() == rule->getBaseValue()) { 794 fractionRule->_appendRuleText(result); 795 result.append(gLineFeed); 796 } 797 } 798 } 799 else { 800 rule->_appendRuleText(result); 801 result.append(gLineFeed); 802 } 803 } 804 } 805 } 806 807 // utility functions 808 809 int64_t util64_fromDouble(double d) { 810 int64_t result = 0; 811 if (!uprv_isNaN(d)) { 812 double mant = uprv_maxMantissa(); 813 if (d < -mant) { 814 d = -mant; 815 } else if (d > mant) { 816 d = mant; 817 } 818 UBool neg = d < 0; 819 if (neg) { 820 d = -d; 821 } 822 result = (int64_t)uprv_floor(d); 823 if (neg) { 824 result = -result; 825 } 826 } 827 return result; 828 } 829 830 int64_t util64_pow(int32_t r, uint32_t e) { 831 if (r == 0) { 832 return 0; 833 } else if (e == 0) { 834 return 1; 835 } else { 836 int64_t n = r; 837 while (--e > 0) { 838 n *= r; 839 } 840 return n; 841 } 842 } 843 844 static const uint8_t asciiDigits[] = { 845 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 846 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, 847 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, 848 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, 849 0x77u, 0x78u, 0x79u, 0x7au, 850 }; 851 852 static const UChar kUMinus = (UChar)0x002d; 853 854 #ifdef RBNF_DEBUG 855 static const char kMinus = '-'; 856 857 static const uint8_t digitInfo[] = { 858 0, 0, 0, 0, 0, 0, 0, 0, 859 0, 0, 0, 0, 0, 0, 0, 0, 860 0, 0, 0, 0, 0, 0, 0, 0, 861 0, 0, 0, 0, 0, 0, 0, 0, 862 0, 0, 0, 0, 0, 0, 0, 0, 863 0, 0, 0, 0, 0, 0, 0, 0, 864 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, 865 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, 866 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 867 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 868 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 869 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 870 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 871 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 872 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 873 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 874 }; 875 876 int64_t util64_atoi(const char* str, uint32_t radix) 877 { 878 if (radix > 36) { 879 radix = 36; 880 } else if (radix < 2) { 881 radix = 2; 882 } 883 int64_t lradix = radix; 884 885 int neg = 0; 886 if (*str == kMinus) { 887 ++str; 888 neg = 1; 889 } 890 int64_t result = 0; 891 uint8_t b; 892 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { 893 result *= lradix; 894 result += (int32_t)b; 895 } 896 if (neg) { 897 result = -result; 898 } 899 return result; 900 } 901 902 int64_t util64_utoi(const UChar* str, uint32_t radix) 903 { 904 if (radix > 36) { 905 radix = 36; 906 } else if (radix < 2) { 907 radix = 2; 908 } 909 int64_t lradix = radix; 910 911 int neg = 0; 912 if (*str == kUMinus) { 913 ++str; 914 neg = 1; 915 } 916 int64_t result = 0; 917 UChar c; 918 uint8_t b; 919 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { 920 result *= lradix; 921 result += (int32_t)b; 922 } 923 if (neg) { 924 result = -result; 925 } 926 return result; 927 } 928 929 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) 930 { 931 if (radix > 36) { 932 radix = 36; 933 } else if (radix < 2) { 934 radix = 2; 935 } 936 int64_t base = radix; 937 938 char* p = buf; 939 if (len && (w < 0) && (radix == 10) && !raw) { 940 w = -w; 941 *p++ = kMinus; 942 --len; 943 } else if (len && (w == 0)) { 944 *p++ = (char)raw ? 0 : asciiDigits[0]; 945 --len; 946 } 947 948 while (len && w != 0) { 949 int64_t n = w / base; 950 int64_t m = n * base; 951 int32_t d = (int32_t)(w-m); 952 *p++ = raw ? (char)d : asciiDigits[d]; 953 w = n; 954 --len; 955 } 956 if (len) { 957 *p = 0; // null terminate if room for caller convenience 958 } 959 960 len = p - buf; 961 if (*buf == kMinus) { 962 ++buf; 963 } 964 while (--p > buf) { 965 char c = *p; 966 *p = *buf; 967 *buf = c; 968 ++buf; 969 } 970 971 return len; 972 } 973 #endif 974 975 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) 976 { 977 if (radix > 36) { 978 radix = 36; 979 } else if (radix < 2) { 980 radix = 2; 981 } 982 int64_t base = radix; 983 984 UChar* p = buf; 985 if (len && (w < 0) && (radix == 10) && !raw) { 986 w = -w; 987 *p++ = kUMinus; 988 --len; 989 } else if (len && (w == 0)) { 990 *p++ = (UChar)raw ? 0 : asciiDigits[0]; 991 --len; 992 } 993 994 while (len && (w != 0)) { 995 int64_t n = w / base; 996 int64_t m = n * base; 997 int32_t d = (int32_t)(w-m); 998 *p++ = (UChar)(raw ? d : asciiDigits[d]); 999 w = n; 1000 --len; 1001 } 1002 if (len) { 1003 *p = 0; // null terminate if room for caller convenience 1004 } 1005 1006 len = (uint32_t)(p - buf); 1007 if (*buf == kUMinus) { 1008 ++buf; 1009 } 1010 while (--p > buf) { 1011 UChar c = *p; 1012 *p = *buf; 1013 *buf = c; 1014 ++buf; 1015 } 1016 1017 return len; 1018 } 1019 1020 1021 U_NAMESPACE_END 1022 1023 /* U_HAVE_RBNF */ 1024 #endif 1025 1026