1 /* 2 ****************************************************************************** 3 * Copyright (C) 1997-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ****************************************************************************** 6 * file name: nfrs.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * Modification history 12 * Date Name Comments 13 * 10/11/2001 Doug Ported from ICU4J 14 */ 15 16 #include "nfrs.h" 17 18 #if U_HAVE_RBNF 19 20 #include "unicode/uchar.h" 21 #include "nfrule.h" 22 #include "nfrlist.h" 23 #include "patternprops.h" 24 25 #ifdef RBNF_DEBUG 26 #include "cmemory.h" 27 #endif 28 29 U_NAMESPACE_BEGIN 30 31 #if 0 32 // euclid's algorithm works with doubles 33 // note, doubles only get us up to one quadrillion or so, which 34 // isn't as much range as we get with longs. We probably still 35 // want either 64-bit math, or BigInteger. 36 37 static int64_t 38 util_lcm(int64_t x, int64_t y) 39 { 40 x.abs(); 41 y.abs(); 42 43 if (x == 0 || y == 0) { 44 return 0; 45 } else { 46 do { 47 if (x < y) { 48 int64_t t = x; x = y; y = t; 49 } 50 x -= y * (x/y); 51 } while (x != 0); 52 53 return y; 54 } 55 } 56 57 #else 58 /** 59 * Calculates the least common multiple of x and y. 60 */ 61 static int64_t 62 util_lcm(int64_t x, int64_t y) 63 { 64 // binary gcd algorithm from Knuth, "The Art of Computer Programming," 65 // vol. 2, 1st ed., pp. 298-299 66 int64_t x1 = x; 67 int64_t y1 = y; 68 69 int p2 = 0; 70 while ((x1 & 1) == 0 && (y1 & 1) == 0) { 71 ++p2; 72 x1 >>= 1; 73 y1 >>= 1; 74 } 75 76 int64_t t; 77 if ((x1 & 1) == 1) { 78 t = -y1; 79 } else { 80 t = x1; 81 } 82 83 while (t != 0) { 84 while ((t & 1) == 0) { 85 t = t >> 1; 86 } 87 if (t > 0) { 88 x1 = t; 89 } else { 90 y1 = -t; 91 } 92 t = x1 - y1; 93 } 94 95 int64_t gcd = x1 << p2; 96 97 // x * y == gcd(x, y) * lcm(x, y) 98 return x / gcd * y; 99 } 100 #endif 101 102 static const UChar gPercent = 0x0025; 103 static const UChar gColon = 0x003a; 104 static const UChar gSemicolon = 0x003b; 105 static const UChar gLineFeed = 0x000a; 106 107 static const UChar gFourSpaces[] = 108 { 109 0x20, 0x20, 0x20, 0x20, 0 110 }; /* " " */ 111 static const UChar gPercentPercent[] = 112 { 113 0x25, 0x25, 0 114 }; /* "%%" */ 115 116 static const UChar gNoparse[] = 117 { 118 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 119 }; /* "@noparse" */ 120 121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status) 122 : name() 123 , rules(0) 124 , negativeNumberRule(NULL) 125 , fIsFractionRuleSet(FALSE) 126 , fIsPublic(FALSE) 127 , fIsParseable(TRUE) 128 , fRecursionCount(0) 129 { 130 for (int i = 0; i < 3; ++i) { 131 fractionRules[i] = NULL; 132 } 133 134 if (U_FAILURE(status)) { 135 return; 136 } 137 138 UnicodeString& description = descriptions[index]; // !!! make sure index is valid 139 140 if (description.length() == 0) { 141 // throw new IllegalArgumentException("Empty rule set description"); 142 status = U_PARSE_ERROR; 143 return; 144 } 145 146 // if the description begins with a rule set name (the rule set 147 // name can be omitted in formatter descriptions that consist 148 // of only one rule set), copy it out into our "name" member 149 // and delete it from the description 150 if (description.charAt(0) == gPercent) { 151 int32_t pos = description.indexOf(gColon); 152 if (pos == -1) { 153 // throw new IllegalArgumentException("Rule set name doesn't end in colon"); 154 status = U_PARSE_ERROR; 155 } else { 156 name.setTo(description, 0, pos); 157 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { 158 } 159 description.remove(0, pos); 160 } 161 } else { 162 name.setTo(UNICODE_STRING_SIMPLE("%default")); 163 } 164 165 if (description.length() == 0) { 166 // throw new IllegalArgumentException("Empty rule set description"); 167 status = U_PARSE_ERROR; 168 } 169 170 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; 171 172 if ( name.endsWith(gNoparse,8) ) { 173 fIsParseable = FALSE; 174 name.truncate(name.length()-8); // remove the @noparse from the name 175 } 176 177 // all of the other members of NFRuleSet are initialized 178 // by parseRules() 179 } 180 181 void 182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status) 183 { 184 // start by creating a Vector whose elements are Strings containing 185 // the descriptions of the rules (one rule per element). The rules 186 // are separated by semicolons (there's no escape facility: ALL 187 // semicolons are rule delimiters) 188 189 if (U_FAILURE(status)) { 190 return; 191 } 192 193 // ensure we are starting with an empty rule list 194 rules.deleteAll(); 195 196 // dlf - the original code kept a separate description array for no reason, 197 // so I got rid of it. The loop was too complex so I simplified it. 198 199 UnicodeString currentDescription; 200 int32_t oldP = 0; 201 while (oldP < description.length()) { 202 int32_t p = description.indexOf(gSemicolon, oldP); 203 if (p == -1) { 204 p = description.length(); 205 } 206 currentDescription.setTo(description, oldP, p - oldP); 207 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); 208 oldP = p + 1; 209 } 210 211 // for rules that didn't specify a base value, their base values 212 // were initialized to 0. Make another pass through the list and 213 // set all those rules' base values. We also remove any special 214 // rules from the list and put them into their own member variables 215 int64_t defaultBaseValue = 0; 216 217 // (this isn't a for loop because we might be deleting items from 218 // the vector-- we want to make sure we only increment i when 219 // we _didn't_ delete aything from the vector) 220 uint32_t i = 0; 221 while (i < rules.size()) { 222 NFRule* rule = rules[i]; 223 224 switch (rule->getType()) { 225 // if the rule's base value is 0, fill in a default 226 // base value (this will be 1 plus the preceding 227 // rule's base value for regular rule sets, and the 228 // same as the preceding rule's base value in fraction 229 // rule sets) 230 case NFRule::kNoBase: 231 rule->setBaseValue(defaultBaseValue, status); 232 if (!isFractionRuleSet()) { 233 ++defaultBaseValue; 234 } 235 ++i; 236 break; 237 238 // if it's the negative-number rule, copy it into its own 239 // data member and delete it from the list 240 case NFRule::kNegativeNumberRule: 241 if (negativeNumberRule) { 242 delete negativeNumberRule; 243 } 244 negativeNumberRule = rules.remove(i); 245 break; 246 247 // if it's the improper fraction rule, copy it into the 248 // correct element of fractionRules 249 case NFRule::kImproperFractionRule: 250 if (fractionRules[0]) { 251 delete fractionRules[0]; 252 } 253 fractionRules[0] = rules.remove(i); 254 break; 255 256 // if it's the proper fraction rule, copy it into the 257 // correct element of fractionRules 258 case NFRule::kProperFractionRule: 259 if (fractionRules[1]) { 260 delete fractionRules[1]; 261 } 262 fractionRules[1] = rules.remove(i); 263 break; 264 265 // if it's the master rule, copy it into the 266 // correct element of fractionRules 267 case NFRule::kMasterRule: 268 if (fractionRules[2]) { 269 delete fractionRules[2]; 270 } 271 fractionRules[2] = rules.remove(i); 272 break; 273 274 // if it's a regular rule that already knows its base value, 275 // check to make sure the rules are in order, and update 276 // the default base value for the next rule 277 default: 278 if (rule->getBaseValue() < defaultBaseValue) { 279 // throw new IllegalArgumentException("Rules are not in order"); 280 status = U_PARSE_ERROR; 281 return; 282 } 283 defaultBaseValue = rule->getBaseValue(); 284 if (!isFractionRuleSet()) { 285 ++defaultBaseValue; 286 } 287 ++i; 288 break; 289 } 290 } 291 } 292 293 NFRuleSet::~NFRuleSet() 294 { 295 delete negativeNumberRule; 296 delete fractionRules[0]; 297 delete fractionRules[1]; 298 delete fractionRules[2]; 299 } 300 301 static UBool 302 util_equalRules(const NFRule* rule1, const NFRule* rule2) 303 { 304 if (rule1) { 305 if (rule2) { 306 return *rule1 == *rule2; 307 } 308 } else if (!rule2) { 309 return TRUE; 310 } 311 return FALSE; 312 } 313 314 UBool 315 NFRuleSet::operator==(const NFRuleSet& rhs) const 316 { 317 if (rules.size() == rhs.rules.size() && 318 fIsFractionRuleSet == rhs.fIsFractionRuleSet && 319 name == rhs.name && 320 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) && 321 util_equalRules(fractionRules[0], rhs.fractionRules[0]) && 322 util_equalRules(fractionRules[1], rhs.fractionRules[1]) && 323 util_equalRules(fractionRules[2], rhs.fractionRules[2])) { 324 325 for (uint32_t i = 0; i < rules.size(); ++i) { 326 if (*rules[i] != *rhs.rules[i]) { 327 return FALSE; 328 } 329 } 330 return TRUE; 331 } 332 return FALSE; 333 } 334 335 #define RECURSION_LIMIT 50 336 337 void 338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const 339 { 340 NFRule *rule = findNormalRule(number); 341 if (rule) { // else error, but can't report it 342 NFRuleSet* ncThis = (NFRuleSet*)this; 343 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 344 // stop recursion 345 ncThis->fRecursionCount = 0; 346 } else { 347 rule->doFormat(number, toAppendTo, pos, status); 348 ncThis->fRecursionCount--; 349 } 350 } 351 } 352 353 void 354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const 355 { 356 NFRule *rule = findDoubleRule(number); 357 if (rule) { // else error, but can't report it 358 NFRuleSet* ncThis = (NFRuleSet*)this; 359 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 360 // stop recursion 361 ncThis->fRecursionCount = 0; 362 } else { 363 rule->doFormat(number, toAppendTo, pos, status); 364 ncThis->fRecursionCount--; 365 } 366 } 367 } 368 369 NFRule* 370 NFRuleSet::findDoubleRule(double number) const 371 { 372 // if this is a fraction rule set, use findFractionRuleSetRule() 373 if (isFractionRuleSet()) { 374 return findFractionRuleSetRule(number); 375 } 376 377 // if the number is negative, return the negative number rule 378 // (if there isn't a negative-number rule, we pretend it's a 379 // positive number) 380 if (number < 0) { 381 if (negativeNumberRule) { 382 return negativeNumberRule; 383 } else { 384 number = -number; 385 } 386 } 387 388 // if the number isn't an integer, we use one of the fraction rules... 389 if (number != uprv_floor(number)) { 390 // if the number is between 0 and 1, return the proper 391 // fraction rule 392 if (number < 1 && fractionRules[1]) { 393 return fractionRules[1]; 394 } 395 // otherwise, return the improper fraction rule 396 else if (fractionRules[0]) { 397 return fractionRules[0]; 398 } 399 } 400 401 // if there's a master rule, use it to format the number 402 if (fractionRules[2]) { 403 return fractionRules[2]; 404 } 405 406 // and if we haven't yet returned a rule, use findNormalRule() 407 // to find the applicable rule 408 int64_t r = util64_fromDouble(number + 0.5); 409 return findNormalRule(r); 410 } 411 412 NFRule * 413 NFRuleSet::findNormalRule(int64_t number) const 414 { 415 // if this is a fraction rule set, use findFractionRuleSetRule() 416 // to find the rule (we should only go into this clause if the 417 // value is 0) 418 if (fIsFractionRuleSet) { 419 return findFractionRuleSetRule((double)number); 420 } 421 422 // if the number is negative, return the negative-number rule 423 // (if there isn't one, pretend the number is positive) 424 if (number < 0) { 425 if (negativeNumberRule) { 426 return negativeNumberRule; 427 } else { 428 number = -number; 429 } 430 } 431 432 // we have to repeat the preceding two checks, even though we 433 // do them in findRule(), because the version of format() that 434 // takes a long bypasses findRule() and goes straight to this 435 // function. This function does skip the fraction rules since 436 // we know the value is an integer (it also skips the master 437 // rule, since it's considered a fraction rule. Skipping the 438 // master rule in this function is also how we avoid infinite 439 // recursion) 440 441 // {dlf} unfortunately this fails if there are no rules except 442 // special rules. If there are no rules, use the master rule. 443 444 // binary-search the rule list for the applicable rule 445 // (a rule is used for all values from its base value to 446 // the next rule's base value) 447 int32_t hi = rules.size(); 448 if (hi > 0) { 449 int32_t lo = 0; 450 451 while (lo < hi) { 452 int32_t mid = (lo + hi) / 2; 453 if (rules[mid]->getBaseValue() == number) { 454 return rules[mid]; 455 } 456 else if (rules[mid]->getBaseValue() > number) { 457 hi = mid; 458 } 459 else { 460 lo = mid + 1; 461 } 462 } 463 if (hi == 0) { // bad rule set, minimum base > 0 464 return NULL; // want to throw exception here 465 } 466 467 NFRule *result = rules[hi - 1]; 468 469 // use shouldRollBack() to see whether we need to invoke the 470 // rollback rule (see shouldRollBack()'s documentation for 471 // an explanation of the rollback rule). If we do, roll back 472 // one rule and return that one instead of the one we'd normally 473 // return 474 if (result->shouldRollBack((double)number)) { 475 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base 476 return NULL; 477 } 478 result = rules[hi - 2]; 479 } 480 return result; 481 } 482 // else use the master rule 483 return fractionRules[2]; 484 } 485 486 /** 487 * If this rule is a fraction rule set, this function is used by 488 * findRule() to select the most appropriate rule for formatting 489 * the number. Basically, the base value of each rule in the rule 490 * set is treated as the denominator of a fraction. Whichever 491 * denominator can produce the fraction closest in value to the 492 * number passed in is the result. If there's a tie, the earlier 493 * one in the list wins. (If there are two rules in a row with the 494 * same base value, the first one is used when the numerator of the 495 * fraction would be 1, and the second rule is used the rest of the 496 * time. 497 * @param number The number being formatted (which will always be 498 * a number between 0 and 1) 499 * @return The rule to use to format this number 500 */ 501 NFRule* 502 NFRuleSet::findFractionRuleSetRule(double number) const 503 { 504 // the obvious way to do this (multiply the value being formatted 505 // by each rule's base value until you get an integral result) 506 // doesn't work because of rounding error. This method is more 507 // accurate 508 509 // find the least common multiple of the rules' base values 510 // and multiply this by the number being formatted. This is 511 // all the precision we need, and we can do all of the rest 512 // of the math using integer arithmetic 513 int64_t leastCommonMultiple = rules[0]->getBaseValue(); 514 int64_t numerator; 515 { 516 for (uint32_t i = 1; i < rules.size(); ++i) { 517 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); 518 } 519 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); 520 } 521 // for each rule, do the following... 522 int64_t tempDifference; 523 int64_t difference = util64_fromDouble(uprv_maxMantissa()); 524 int32_t winner = 0; 525 for (uint32_t i = 0; i < rules.size(); ++i) { 526 // "numerator" is the numerator of the fraction if the 527 // denominator is the LCD. The numerator if the rule's 528 // base value is the denominator is "numerator" times the 529 // base value divided bythe LCD. Here we check to see if 530 // that's an integer, and if not, how close it is to being 531 // an integer. 532 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; 533 534 535 // normalize the result of the above calculation: we want 536 // the numerator's distance from the CLOSEST multiple 537 // of the LCD 538 if (leastCommonMultiple - tempDifference < tempDifference) { 539 tempDifference = leastCommonMultiple - tempDifference; 540 } 541 542 // if this is as close as we've come, keep track of how close 543 // that is, and the line number of the rule that did it. If 544 // we've scored a direct hit, we don't have to look at any more 545 // rules 546 if (tempDifference < difference) { 547 difference = tempDifference; 548 winner = i; 549 if (difference == 0) { 550 break; 551 } 552 } 553 } 554 555 // if we have two successive rules that both have the winning base 556 // value, then the first one (the one we found above) is used if 557 // the numerator of the fraction is 1 and the second one is used if 558 // the numerator of the fraction is anything else (this lets us 559 // do things like "one third"/"two thirds" without haveing to define 560 // a whole bunch of extra rule sets) 561 if ((unsigned)(winner + 1) < rules.size() && 562 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { 563 double n = ((double)rules[winner]->getBaseValue()) * number; 564 if (n < 0.5 || n >= 2) { 565 ++winner; 566 } 567 } 568 569 // finally, return the winning rule 570 return rules[winner]; 571 } 572 573 /** 574 * Parses a string. Matches the string to be parsed against each 575 * of its rules (with a base value less than upperBound) and returns 576 * the value produced by the rule that matched the most charcters 577 * in the source string. 578 * @param text The string to parse 579 * @param parsePosition The initial position is ignored and assumed 580 * to be 0. On exit, this object has been updated to point to the 581 * first character position this rule set didn't consume. 582 * @param upperBound Limits the rules that can be allowed to match. 583 * Only rules whose base values are strictly less than upperBound 584 * are considered. 585 * @return The numerical result of parsing this string. This will 586 * be the matching rule's base value, composed appropriately with 587 * the results of matching any of its substitutions. The object 588 * will be an instance of Long if it's an integral value; otherwise, 589 * it will be an instance of Double. This function always returns 590 * a valid object: If nothing matched the input string at all, 591 * this function returns new Long(0), and the parse position is 592 * left unchanged. 593 */ 594 #ifdef RBNF_DEBUG 595 #include <stdio.h> 596 597 static void dumpUS(FILE* f, const UnicodeString& us) { 598 int len = us.length(); 599 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 600 if (buf != NULL) { 601 us.extract(0, len, buf); 602 buf[len] = 0; 603 fprintf(f, "%s", buf); 604 uprv_free(buf); //delete[] buf; 605 } 606 } 607 #endif 608 609 UBool 610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const 611 { 612 // try matching each rule in the rule set against the text being 613 // parsed. Whichever one matches the most characters is the one 614 // that determines the value we return. 615 616 result.setLong(0); 617 618 // dump out if there's no text to parse 619 if (text.length() == 0) { 620 return 0; 621 } 622 623 ParsePosition highWaterMark; 624 ParsePosition workingPos = pos; 625 626 #ifdef RBNF_DEBUG 627 fprintf(stderr, "<nfrs> %x '", this); 628 dumpUS(stderr, name); 629 fprintf(stderr, "' text '"); 630 dumpUS(stderr, text); 631 fprintf(stderr, "'\n"); 632 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); 633 #endif 634 635 // start by trying the negative number rule (if there is one) 636 if (negativeNumberRule) { 637 Formattable tempResult; 638 #ifdef RBNF_DEBUG 639 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound); 640 #endif 641 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); 642 #ifdef RBNF_DEBUG 643 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex()); 644 #endif 645 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 646 result = tempResult; 647 highWaterMark = workingPos; 648 } 649 workingPos = pos; 650 } 651 #ifdef RBNF_DEBUG 652 fprintf(stderr, "<nfrs> continue fractional with text '"); 653 dumpUS(stderr, text); 654 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 655 #endif 656 // then try each of the fraction rules 657 { 658 for (int i = 0; i < 3; i++) { 659 if (fractionRules[i]) { 660 Formattable tempResult; 661 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); 662 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { 663 result = tempResult; 664 highWaterMark = workingPos; 665 } 666 workingPos = pos; 667 } 668 } 669 } 670 #ifdef RBNF_DEBUG 671 fprintf(stderr, "<nfrs> continue other with text '"); 672 dumpUS(stderr, text); 673 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 674 #endif 675 676 // finally, go through the regular rules one at a time. We start 677 // at the end of the list because we want to try matching the most 678 // sigificant rule first (this helps ensure that we parse 679 // "five thousand three hundred six" as 680 // "(five thousand) (three hundred) (six)" rather than 681 // "((five thousand three) hundred) (six)"). Skip rules whose 682 // base values are higher than the upper bound (again, this helps 683 // limit ambiguity by making sure the rules that match a rule's 684 // are less significant than the rule containing the substitutions)/ 685 { 686 int64_t ub = util64_fromDouble(upperBound); 687 #ifdef RBNF_DEBUG 688 { 689 char ubstr[64]; 690 util64_toa(ub, ubstr, 64); 691 char ubstrhex[64]; 692 util64_toa(ub, ubstrhex, 64, 16); 693 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); 694 } 695 #endif 696 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { 697 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { 698 continue; 699 } 700 Formattable tempResult; 701 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); 702 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 703 result = tempResult; 704 highWaterMark = workingPos; 705 } 706 workingPos = pos; 707 } 708 } 709 #ifdef RBNF_DEBUG 710 fprintf(stderr, "<nfrs> exit\n"); 711 #endif 712 // finally, update the parse postion we were passed to point to the 713 // first character we didn't use, and return the result that 714 // corresponds to that string of characters 715 pos = highWaterMark; 716 717 return 1; 718 } 719 720 void 721 NFRuleSet::appendRules(UnicodeString& result) const 722 { 723 // the rule set name goes first... 724 result.append(name); 725 result.append(gColon); 726 result.append(gLineFeed); 727 728 // followed by the regular rules... 729 for (uint32_t i = 0; i < rules.size(); i++) { 730 result.append(gFourSpaces, 4); 731 rules[i]->_appendRuleText(result); 732 result.append(gLineFeed); 733 } 734 735 // followed by the special rules (if they exist) 736 if (negativeNumberRule) { 737 result.append(gFourSpaces, 4); 738 negativeNumberRule->_appendRuleText(result); 739 result.append(gLineFeed); 740 } 741 742 { 743 for (uint32_t i = 0; i < 3; ++i) { 744 if (fractionRules[i]) { 745 result.append(gFourSpaces, 4); 746 fractionRules[i]->_appendRuleText(result); 747 result.append(gLineFeed); 748 } 749 } 750 } 751 } 752 753 // utility functions 754 755 int64_t util64_fromDouble(double d) { 756 int64_t result = 0; 757 if (!uprv_isNaN(d)) { 758 double mant = uprv_maxMantissa(); 759 if (d < -mant) { 760 d = -mant; 761 } else if (d > mant) { 762 d = mant; 763 } 764 UBool neg = d < 0; 765 if (neg) { 766 d = -d; 767 } 768 result = (int64_t)uprv_floor(d); 769 if (neg) { 770 result = -result; 771 } 772 } 773 return result; 774 } 775 776 int64_t util64_pow(int32_t r, uint32_t e) { 777 if (r == 0) { 778 return 0; 779 } else if (e == 0) { 780 return 1; 781 } else { 782 int64_t n = r; 783 while (--e > 0) { 784 n *= r; 785 } 786 return n; 787 } 788 } 789 790 static const uint8_t asciiDigits[] = { 791 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 792 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, 793 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, 794 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, 795 0x77u, 0x78u, 0x79u, 0x7au, 796 }; 797 798 static const UChar kUMinus = (UChar)0x002d; 799 800 #ifdef RBNF_DEBUG 801 static const char kMinus = '-'; 802 803 static const uint8_t digitInfo[] = { 804 0, 0, 0, 0, 0, 0, 0, 0, 805 0, 0, 0, 0, 0, 0, 0, 0, 806 0, 0, 0, 0, 0, 0, 0, 0, 807 0, 0, 0, 0, 0, 0, 0, 0, 808 0, 0, 0, 0, 0, 0, 0, 0, 809 0, 0, 0, 0, 0, 0, 0, 0, 810 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, 811 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, 812 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 813 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 814 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 815 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 816 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 817 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 818 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 819 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 820 }; 821 822 int64_t util64_atoi(const char* str, uint32_t radix) 823 { 824 if (radix > 36) { 825 radix = 36; 826 } else if (radix < 2) { 827 radix = 2; 828 } 829 int64_t lradix = radix; 830 831 int neg = 0; 832 if (*str == kMinus) { 833 ++str; 834 neg = 1; 835 } 836 int64_t result = 0; 837 uint8_t b; 838 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { 839 result *= lradix; 840 result += (int32_t)b; 841 } 842 if (neg) { 843 result = -result; 844 } 845 return result; 846 } 847 848 int64_t util64_utoi(const UChar* str, uint32_t radix) 849 { 850 if (radix > 36) { 851 radix = 36; 852 } else if (radix < 2) { 853 radix = 2; 854 } 855 int64_t lradix = radix; 856 857 int neg = 0; 858 if (*str == kUMinus) { 859 ++str; 860 neg = 1; 861 } 862 int64_t result = 0; 863 UChar c; 864 uint8_t b; 865 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { 866 result *= lradix; 867 result += (int32_t)b; 868 } 869 if (neg) { 870 result = -result; 871 } 872 return result; 873 } 874 875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) 876 { 877 if (radix > 36) { 878 radix = 36; 879 } else if (radix < 2) { 880 radix = 2; 881 } 882 int64_t base = radix; 883 884 char* p = buf; 885 if (len && (w < 0) && (radix == 10) && !raw) { 886 w = -w; 887 *p++ = kMinus; 888 --len; 889 } else if (len && (w == 0)) { 890 *p++ = (char)raw ? 0 : asciiDigits[0]; 891 --len; 892 } 893 894 while (len && w != 0) { 895 int64_t n = w / base; 896 int64_t m = n * base; 897 int32_t d = (int32_t)(w-m); 898 *p++ = raw ? (char)d : asciiDigits[d]; 899 w = n; 900 --len; 901 } 902 if (len) { 903 *p = 0; // null terminate if room for caller convenience 904 } 905 906 len = p - buf; 907 if (*buf == kMinus) { 908 ++buf; 909 } 910 while (--p > buf) { 911 char c = *p; 912 *p = *buf; 913 *buf = c; 914 ++buf; 915 } 916 917 return len; 918 } 919 #endif 920 921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) 922 { 923 if (radix > 36) { 924 radix = 36; 925 } else if (radix < 2) { 926 radix = 2; 927 } 928 int64_t base = radix; 929 930 UChar* p = buf; 931 if (len && (w < 0) && (radix == 10) && !raw) { 932 w = -w; 933 *p++ = kUMinus; 934 --len; 935 } else if (len && (w == 0)) { 936 *p++ = (UChar)raw ? 0 : asciiDigits[0]; 937 --len; 938 } 939 940 while (len && (w != 0)) { 941 int64_t n = w / base; 942 int64_t m = n * base; 943 int32_t d = (int32_t)(w-m); 944 *p++ = (UChar)(raw ? d : asciiDigits[d]); 945 w = n; 946 --len; 947 } 948 if (len) { 949 *p = 0; // null terminate if room for caller convenience 950 } 951 952 len = (uint32_t)(p - buf); 953 if (*buf == kUMinus) { 954 ++buf; 955 } 956 while (--p > buf) { 957 UChar c = *p; 958 *p = *buf; 959 *buf = c; 960 ++buf; 961 } 962 963 return len; 964 } 965 966 967 U_NAMESPACE_END 968 969 /* U_HAVE_RBNF */ 970 #endif 971 972