1 /* 2 ****************************************************************************** 3 * Copyright (C) 1997-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ****************************************************************************** 6 * file name: nfrs.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * Modification history 12 * Date Name Comments 13 * 10/11/2001 Doug Ported from ICU4J 14 */ 15 16 #include "nfrs.h" 17 18 #if U_HAVE_RBNF 19 20 #include "unicode/uchar.h" 21 #include "nfrule.h" 22 #include "nfrlist.h" 23 #include "patternprops.h" 24 25 #ifdef RBNF_DEBUG 26 #include "cmemory.h" 27 #endif 28 29 U_NAMESPACE_BEGIN 30 31 #if 0 32 // euclid's algorithm works with doubles 33 // note, doubles only get us up to one quadrillion or so, which 34 // isn't as much range as we get with longs. We probably still 35 // want either 64-bit math, or BigInteger. 36 37 static int64_t 38 util_lcm(int64_t x, int64_t y) 39 { 40 x.abs(); 41 y.abs(); 42 43 if (x == 0 || y == 0) { 44 return 0; 45 } else { 46 do { 47 if (x < y) { 48 int64_t t = x; x = y; y = t; 49 } 50 x -= y * (x/y); 51 } while (x != 0); 52 53 return y; 54 } 55 } 56 57 #else 58 /** 59 * Calculates the least common multiple of x and y. 60 */ 61 static int64_t 62 util_lcm(int64_t x, int64_t y) 63 { 64 // binary gcd algorithm from Knuth, "The Art of Computer Programming," 65 // vol. 2, 1st ed., pp. 298-299 66 int64_t x1 = x; 67 int64_t y1 = y; 68 69 int p2 = 0; 70 while ((x1 & 1) == 0 && (y1 & 1) == 0) { 71 ++p2; 72 x1 >>= 1; 73 y1 >>= 1; 74 } 75 76 int64_t t; 77 if ((x1 & 1) == 1) { 78 t = -y1; 79 } else { 80 t = x1; 81 } 82 83 while (t != 0) { 84 while ((t & 1) == 0) { 85 t = t >> 1; 86 } 87 if (t > 0) { 88 x1 = t; 89 } else { 90 y1 = -t; 91 } 92 t = x1 - y1; 93 } 94 95 int64_t gcd = x1 << p2; 96 97 // x * y == gcd(x, y) * lcm(x, y) 98 return x / gcd * y; 99 } 100 #endif 101 102 static const UChar gPercent = 0x0025; 103 static const UChar gColon = 0x003a; 104 static const UChar gSemicolon = 0x003b; 105 static const UChar gLineFeed = 0x000a; 106 107 static const UChar gFourSpaces[] = 108 { 109 0x20, 0x20, 0x20, 0x20, 0 110 }; /* " " */ 111 static const UChar gPercentPercent[] = 112 { 113 0x25, 0x25, 0 114 }; /* "%%" */ 115 116 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status) 117 : name() 118 , rules(0) 119 , negativeNumberRule(NULL) 120 , fIsFractionRuleSet(FALSE) 121 , fIsPublic(FALSE) 122 , fRecursionCount(0) 123 { 124 for (int i = 0; i < 3; ++i) { 125 fractionRules[i] = NULL; 126 } 127 128 if (U_FAILURE(status)) { 129 return; 130 } 131 132 UnicodeString& description = descriptions[index]; // !!! make sure index is valid 133 134 if (description.length() == 0) { 135 // throw new IllegalArgumentException("Empty rule set description"); 136 status = U_PARSE_ERROR; 137 return; 138 } 139 140 // if the description begins with a rule set name (the rule set 141 // name can be omitted in formatter descriptions that consist 142 // of only one rule set), copy it out into our "name" member 143 // and delete it from the description 144 if (description.charAt(0) == gPercent) { 145 int32_t pos = description.indexOf(gColon); 146 if (pos == -1) { 147 // throw new IllegalArgumentException("Rule set name doesn't end in colon"); 148 status = U_PARSE_ERROR; 149 } else { 150 name.setTo(description, 0, pos); 151 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { 152 } 153 description.remove(0, pos); 154 } 155 } else { 156 name.setTo(UNICODE_STRING_SIMPLE("%default")); 157 } 158 159 if (description.length() == 0) { 160 // throw new IllegalArgumentException("Empty rule set description"); 161 status = U_PARSE_ERROR; 162 } 163 164 fIsPublic = name.indexOf(gPercentPercent) != 0; 165 166 // all of the other members of NFRuleSet are initialized 167 // by parseRules() 168 } 169 170 void 171 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status) 172 { 173 // start by creating a Vector whose elements are Strings containing 174 // the descriptions of the rules (one rule per element). The rules 175 // are separated by semicolons (there's no escape facility: ALL 176 // semicolons are rule delimiters) 177 178 if (U_FAILURE(status)) { 179 return; 180 } 181 182 // dlf - the original code kept a separate description array for no reason, 183 // so I got rid of it. The loop was too complex so I simplified it. 184 185 UnicodeString currentDescription; 186 int32_t oldP = 0; 187 while (oldP < description.length()) { 188 int32_t p = description.indexOf(gSemicolon, oldP); 189 if (p == -1) { 190 p = description.length(); 191 } 192 currentDescription.setTo(description, oldP, p - oldP); 193 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); 194 oldP = p + 1; 195 } 196 197 // for rules that didn't specify a base value, their base values 198 // were initialized to 0. Make another pass through the list and 199 // set all those rules' base values. We also remove any special 200 // rules from the list and put them into their own member variables 201 int64_t defaultBaseValue = 0; 202 203 // (this isn't a for loop because we might be deleting items from 204 // the vector-- we want to make sure we only increment i when 205 // we _didn't_ delete aything from the vector) 206 uint32_t i = 0; 207 while (i < rules.size()) { 208 NFRule* rule = rules[i]; 209 210 switch (rule->getType()) { 211 // if the rule's base value is 0, fill in a default 212 // base value (this will be 1 plus the preceding 213 // rule's base value for regular rule sets, and the 214 // same as the preceding rule's base value in fraction 215 // rule sets) 216 case NFRule::kNoBase: 217 rule->setBaseValue(defaultBaseValue, status); 218 if (!isFractionRuleSet()) { 219 ++defaultBaseValue; 220 } 221 ++i; 222 break; 223 224 // if it's the negative-number rule, copy it into its own 225 // data member and delete it from the list 226 case NFRule::kNegativeNumberRule: 227 negativeNumberRule = rules.remove(i); 228 break; 229 230 // if it's the improper fraction rule, copy it into the 231 // correct element of fractionRules 232 case NFRule::kImproperFractionRule: 233 fractionRules[0] = rules.remove(i); 234 break; 235 236 // if it's the proper fraction rule, copy it into the 237 // correct element of fractionRules 238 case NFRule::kProperFractionRule: 239 fractionRules[1] = rules.remove(i); 240 break; 241 242 // if it's the master rule, copy it into the 243 // correct element of fractionRules 244 case NFRule::kMasterRule: 245 fractionRules[2] = rules.remove(i); 246 break; 247 248 // if it's a regular rule that already knows its base value, 249 // check to make sure the rules are in order, and update 250 // the default base value for the next rule 251 default: 252 if (rule->getBaseValue() < defaultBaseValue) { 253 // throw new IllegalArgumentException("Rules are not in order"); 254 status = U_PARSE_ERROR; 255 return; 256 } 257 defaultBaseValue = rule->getBaseValue(); 258 if (!isFractionRuleSet()) { 259 ++defaultBaseValue; 260 } 261 ++i; 262 break; 263 } 264 } 265 } 266 267 NFRuleSet::~NFRuleSet() 268 { 269 delete negativeNumberRule; 270 delete fractionRules[0]; 271 delete fractionRules[1]; 272 delete fractionRules[2]; 273 } 274 275 static UBool 276 util_equalRules(const NFRule* rule1, const NFRule* rule2) 277 { 278 if (rule1) { 279 if (rule2) { 280 return *rule1 == *rule2; 281 } 282 } else if (!rule2) { 283 return TRUE; 284 } 285 return FALSE; 286 } 287 288 UBool 289 NFRuleSet::operator==(const NFRuleSet& rhs) const 290 { 291 if (rules.size() == rhs.rules.size() && 292 fIsFractionRuleSet == rhs.fIsFractionRuleSet && 293 name == rhs.name && 294 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) && 295 util_equalRules(fractionRules[0], rhs.fractionRules[0]) && 296 util_equalRules(fractionRules[1], rhs.fractionRules[1]) && 297 util_equalRules(fractionRules[2], rhs.fractionRules[2])) { 298 299 for (uint32_t i = 0; i < rules.size(); ++i) { 300 if (*rules[i] != *rhs.rules[i]) { 301 return FALSE; 302 } 303 } 304 return TRUE; 305 } 306 return FALSE; 307 } 308 309 #define RECURSION_LIMIT 50 310 311 void 312 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const 313 { 314 NFRule *rule = findNormalRule(number); 315 if (rule) { // else error, but can't report it 316 NFRuleSet* ncThis = (NFRuleSet*)this; 317 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 318 // stop recursion 319 ncThis->fRecursionCount = 0; 320 } else { 321 rule->doFormat(number, toAppendTo, pos); 322 ncThis->fRecursionCount--; 323 } 324 } 325 } 326 327 void 328 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const 329 { 330 NFRule *rule = findDoubleRule(number); 331 if (rule) { // else error, but can't report it 332 NFRuleSet* ncThis = (NFRuleSet*)this; 333 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 334 // stop recursion 335 ncThis->fRecursionCount = 0; 336 } else { 337 rule->doFormat(number, toAppendTo, pos); 338 ncThis->fRecursionCount--; 339 } 340 } 341 } 342 343 NFRule* 344 NFRuleSet::findDoubleRule(double number) const 345 { 346 // if this is a fraction rule set, use findFractionRuleSetRule() 347 if (isFractionRuleSet()) { 348 return findFractionRuleSetRule(number); 349 } 350 351 // if the number is negative, return the negative number rule 352 // (if there isn't a negative-number rule, we pretend it's a 353 // positive number) 354 if (number < 0) { 355 if (negativeNumberRule) { 356 return negativeNumberRule; 357 } else { 358 number = -number; 359 } 360 } 361 362 // if the number isn't an integer, we use one of the fraction rules... 363 if (number != uprv_floor(number)) { 364 // if the number is between 0 and 1, return the proper 365 // fraction rule 366 if (number < 1 && fractionRules[1]) { 367 return fractionRules[1]; 368 } 369 // otherwise, return the improper fraction rule 370 else if (fractionRules[0]) { 371 return fractionRules[0]; 372 } 373 } 374 375 // if there's a master rule, use it to format the number 376 if (fractionRules[2]) { 377 return fractionRules[2]; 378 } 379 380 // and if we haven't yet returned a rule, use findNormalRule() 381 // to find the applicable rule 382 int64_t r = util64_fromDouble(number + 0.5); 383 return findNormalRule(r); 384 } 385 386 NFRule * 387 NFRuleSet::findNormalRule(int64_t number) const 388 { 389 // if this is a fraction rule set, use findFractionRuleSetRule() 390 // to find the rule (we should only go into this clause if the 391 // value is 0) 392 if (fIsFractionRuleSet) { 393 return findFractionRuleSetRule((double)number); 394 } 395 396 // if the number is negative, return the negative-number rule 397 // (if there isn't one, pretend the number is positive) 398 if (number < 0) { 399 if (negativeNumberRule) { 400 return negativeNumberRule; 401 } else { 402 number = -number; 403 } 404 } 405 406 // we have to repeat the preceding two checks, even though we 407 // do them in findRule(), because the version of format() that 408 // takes a long bypasses findRule() and goes straight to this 409 // function. This function does skip the fraction rules since 410 // we know the value is an integer (it also skips the master 411 // rule, since it's considered a fraction rule. Skipping the 412 // master rule in this function is also how we avoid infinite 413 // recursion) 414 415 // {dlf} unfortunately this fails if there are no rules except 416 // special rules. If there are no rules, use the master rule. 417 418 // binary-search the rule list for the applicable rule 419 // (a rule is used for all values from its base value to 420 // the next rule's base value) 421 int32_t hi = rules.size(); 422 if (hi > 0) { 423 int32_t lo = 0; 424 425 while (lo < hi) { 426 int32_t mid = (lo + hi) / 2; 427 if (rules[mid]->getBaseValue() == number) { 428 return rules[mid]; 429 } 430 else if (rules[mid]->getBaseValue() > number) { 431 hi = mid; 432 } 433 else { 434 lo = mid + 1; 435 } 436 } 437 if (hi == 0) { // bad rule set, minimum base > 0 438 return NULL; // want to throw exception here 439 } 440 441 NFRule *result = rules[hi - 1]; 442 443 // use shouldRollBack() to see whether we need to invoke the 444 // rollback rule (see shouldRollBack()'s documentation for 445 // an explanation of the rollback rule). If we do, roll back 446 // one rule and return that one instead of the one we'd normally 447 // return 448 if (result->shouldRollBack((double)number)) { 449 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base 450 return NULL; 451 } 452 result = rules[hi - 2]; 453 } 454 return result; 455 } 456 // else use the master rule 457 return fractionRules[2]; 458 } 459 460 /** 461 * If this rule is a fraction rule set, this function is used by 462 * findRule() to select the most appropriate rule for formatting 463 * the number. Basically, the base value of each rule in the rule 464 * set is treated as the denominator of a fraction. Whichever 465 * denominator can produce the fraction closest in value to the 466 * number passed in is the result. If there's a tie, the earlier 467 * one in the list wins. (If there are two rules in a row with the 468 * same base value, the first one is used when the numerator of the 469 * fraction would be 1, and the second rule is used the rest of the 470 * time. 471 * @param number The number being formatted (which will always be 472 * a number between 0 and 1) 473 * @return The rule to use to format this number 474 */ 475 NFRule* 476 NFRuleSet::findFractionRuleSetRule(double number) const 477 { 478 // the obvious way to do this (multiply the value being formatted 479 // by each rule's base value until you get an integral result) 480 // doesn't work because of rounding error. This method is more 481 // accurate 482 483 // find the least common multiple of the rules' base values 484 // and multiply this by the number being formatted. This is 485 // all the precision we need, and we can do all of the rest 486 // of the math using integer arithmetic 487 int64_t leastCommonMultiple = rules[0]->getBaseValue(); 488 int64_t numerator; 489 { 490 for (uint32_t i = 1; i < rules.size(); ++i) { 491 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); 492 } 493 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); 494 } 495 // for each rule, do the following... 496 int64_t tempDifference; 497 int64_t difference = util64_fromDouble(uprv_maxMantissa()); 498 int32_t winner = 0; 499 for (uint32_t i = 0; i < rules.size(); ++i) { 500 // "numerator" is the numerator of the fraction if the 501 // denominator is the LCD. The numerator if the rule's 502 // base value is the denominator is "numerator" times the 503 // base value divided bythe LCD. Here we check to see if 504 // that's an integer, and if not, how close it is to being 505 // an integer. 506 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; 507 508 509 // normalize the result of the above calculation: we want 510 // the numerator's distance from the CLOSEST multiple 511 // of the LCD 512 if (leastCommonMultiple - tempDifference < tempDifference) { 513 tempDifference = leastCommonMultiple - tempDifference; 514 } 515 516 // if this is as close as we've come, keep track of how close 517 // that is, and the line number of the rule that did it. If 518 // we've scored a direct hit, we don't have to look at any more 519 // rules 520 if (tempDifference < difference) { 521 difference = tempDifference; 522 winner = i; 523 if (difference == 0) { 524 break; 525 } 526 } 527 } 528 529 // if we have two successive rules that both have the winning base 530 // value, then the first one (the one we found above) is used if 531 // the numerator of the fraction is 1 and the second one is used if 532 // the numerator of the fraction is anything else (this lets us 533 // do things like "one third"/"two thirds" without haveing to define 534 // a whole bunch of extra rule sets) 535 if ((unsigned)(winner + 1) < rules.size() && 536 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { 537 double n = ((double)rules[winner]->getBaseValue()) * number; 538 if (n < 0.5 || n >= 2) { 539 ++winner; 540 } 541 } 542 543 // finally, return the winning rule 544 return rules[winner]; 545 } 546 547 /** 548 * Parses a string. Matches the string to be parsed against each 549 * of its rules (with a base value less than upperBound) and returns 550 * the value produced by the rule that matched the most charcters 551 * in the source string. 552 * @param text The string to parse 553 * @param parsePosition The initial position is ignored and assumed 554 * to be 0. On exit, this object has been updated to point to the 555 * first character position this rule set didn't consume. 556 * @param upperBound Limits the rules that can be allowed to match. 557 * Only rules whose base values are strictly less than upperBound 558 * are considered. 559 * @return The numerical result of parsing this string. This will 560 * be the matching rule's base value, composed appropriately with 561 * the results of matching any of its substitutions. The object 562 * will be an instance of Long if it's an integral value; otherwise, 563 * it will be an instance of Double. This function always returns 564 * a valid object: If nothing matched the input string at all, 565 * this function returns new Long(0), and the parse position is 566 * left unchanged. 567 */ 568 #ifdef RBNF_DEBUG 569 #include <stdio.h> 570 571 static void dumpUS(FILE* f, const UnicodeString& us) { 572 int len = us.length(); 573 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 574 if (buf != NULL) { 575 us.extract(0, len, buf); 576 buf[len] = 0; 577 fprintf(f, "%s", buf); 578 uprv_free(buf); //delete[] buf; 579 } 580 } 581 #endif 582 583 UBool 584 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const 585 { 586 // try matching each rule in the rule set against the text being 587 // parsed. Whichever one matches the most characters is the one 588 // that determines the value we return. 589 590 result.setLong(0); 591 592 // dump out if there's no text to parse 593 if (text.length() == 0) { 594 return 0; 595 } 596 597 ParsePosition highWaterMark; 598 ParsePosition workingPos = pos; 599 600 #ifdef RBNF_DEBUG 601 fprintf(stderr, "<nfrs> %x '", this); 602 dumpUS(stderr, name); 603 fprintf(stderr, "' text '"); 604 dumpUS(stderr, text); 605 fprintf(stderr, "'\n"); 606 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); 607 #endif 608 609 // start by trying the negative number rule (if there is one) 610 if (negativeNumberRule) { 611 Formattable tempResult; 612 #ifdef RBNF_DEBUG 613 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound); 614 #endif 615 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); 616 #ifdef RBNF_DEBUG 617 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex()); 618 #endif 619 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 620 result = tempResult; 621 highWaterMark = workingPos; 622 } 623 workingPos = pos; 624 } 625 #ifdef RBNF_DEBUG 626 fprintf(stderr, "<nfrs> continue fractional with text '"); 627 dumpUS(stderr, text); 628 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 629 #endif 630 // then try each of the fraction rules 631 { 632 for (int i = 0; i < 3; i++) { 633 if (fractionRules[i]) { 634 Formattable tempResult; 635 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); 636 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { 637 result = tempResult; 638 highWaterMark = workingPos; 639 } 640 workingPos = pos; 641 } 642 } 643 } 644 #ifdef RBNF_DEBUG 645 fprintf(stderr, "<nfrs> continue other with text '"); 646 dumpUS(stderr, text); 647 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 648 #endif 649 650 // finally, go through the regular rules one at a time. We start 651 // at the end of the list because we want to try matching the most 652 // sigificant rule first (this helps ensure that we parse 653 // "five thousand three hundred six" as 654 // "(five thousand) (three hundred) (six)" rather than 655 // "((five thousand three) hundred) (six)"). Skip rules whose 656 // base values are higher than the upper bound (again, this helps 657 // limit ambiguity by making sure the rules that match a rule's 658 // are less significant than the rule containing the substitutions)/ 659 { 660 int64_t ub = util64_fromDouble(upperBound); 661 #ifdef RBNF_DEBUG 662 { 663 char ubstr[64]; 664 util64_toa(ub, ubstr, 64); 665 char ubstrhex[64]; 666 util64_toa(ub, ubstrhex, 64, 16); 667 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); 668 } 669 #endif 670 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { 671 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { 672 continue; 673 } 674 Formattable tempResult; 675 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); 676 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 677 result = tempResult; 678 highWaterMark = workingPos; 679 } 680 workingPos = pos; 681 } 682 } 683 #ifdef RBNF_DEBUG 684 fprintf(stderr, "<nfrs> exit\n"); 685 #endif 686 // finally, update the parse postion we were passed to point to the 687 // first character we didn't use, and return the result that 688 // corresponds to that string of characters 689 pos = highWaterMark; 690 691 return 1; 692 } 693 694 void 695 NFRuleSet::appendRules(UnicodeString& result) const 696 { 697 // the rule set name goes first... 698 result.append(name); 699 result.append(gColon); 700 result.append(gLineFeed); 701 702 // followed by the regular rules... 703 for (uint32_t i = 0; i < rules.size(); i++) { 704 result.append(gFourSpaces); 705 rules[i]->_appendRuleText(result); 706 result.append(gLineFeed); 707 } 708 709 // followed by the special rules (if they exist) 710 if (negativeNumberRule) { 711 result.append(gFourSpaces); 712 negativeNumberRule->_appendRuleText(result); 713 result.append(gLineFeed); 714 } 715 716 { 717 for (uint32_t i = 0; i < 3; ++i) { 718 if (fractionRules[i]) { 719 result.append(gFourSpaces); 720 fractionRules[i]->_appendRuleText(result); 721 result.append(gLineFeed); 722 } 723 } 724 } 725 } 726 727 // utility functions 728 729 int64_t util64_fromDouble(double d) { 730 int64_t result = 0; 731 if (!uprv_isNaN(d)) { 732 double mant = uprv_maxMantissa(); 733 if (d < -mant) { 734 d = -mant; 735 } else if (d > mant) { 736 d = mant; 737 } 738 UBool neg = d < 0; 739 if (neg) { 740 d = -d; 741 } 742 result = (int64_t)uprv_floor(d); 743 if (neg) { 744 result = -result; 745 } 746 } 747 return result; 748 } 749 750 int64_t util64_pow(int32_t r, uint32_t e) { 751 if (r == 0) { 752 return 0; 753 } else if (e == 0) { 754 return 1; 755 } else { 756 int64_t n = r; 757 while (--e > 0) { 758 n *= r; 759 } 760 return n; 761 } 762 } 763 764 static const uint8_t asciiDigits[] = { 765 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 766 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, 767 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, 768 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, 769 0x77u, 0x78u, 0x79u, 0x7au, 770 }; 771 772 static const UChar kUMinus = (UChar)0x002d; 773 774 #ifdef RBNF_DEBUG 775 static const char kMinus = '-'; 776 777 static const uint8_t digitInfo[] = { 778 0, 0, 0, 0, 0, 0, 0, 0, 779 0, 0, 0, 0, 0, 0, 0, 0, 780 0, 0, 0, 0, 0, 0, 0, 0, 781 0, 0, 0, 0, 0, 0, 0, 0, 782 0, 0, 0, 0, 0, 0, 0, 0, 783 0, 0, 0, 0, 0, 0, 0, 0, 784 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, 785 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, 786 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 787 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 788 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 789 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 790 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 791 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 792 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 793 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 794 }; 795 796 int64_t util64_atoi(const char* str, uint32_t radix) 797 { 798 if (radix > 36) { 799 radix = 36; 800 } else if (radix < 2) { 801 radix = 2; 802 } 803 int64_t lradix = radix; 804 805 int neg = 0; 806 if (*str == kMinus) { 807 ++str; 808 neg = 1; 809 } 810 int64_t result = 0; 811 uint8_t b; 812 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { 813 result *= lradix; 814 result += (int32_t)b; 815 } 816 if (neg) { 817 result = -result; 818 } 819 return result; 820 } 821 822 int64_t util64_utoi(const UChar* str, uint32_t radix) 823 { 824 if (radix > 36) { 825 radix = 36; 826 } else if (radix < 2) { 827 radix = 2; 828 } 829 int64_t lradix = radix; 830 831 int neg = 0; 832 if (*str == kUMinus) { 833 ++str; 834 neg = 1; 835 } 836 int64_t result = 0; 837 UChar c; 838 uint8_t b; 839 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { 840 result *= lradix; 841 result += (int32_t)b; 842 } 843 if (neg) { 844 result = -result; 845 } 846 return result; 847 } 848 849 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) 850 { 851 if (radix > 36) { 852 radix = 36; 853 } else if (radix < 2) { 854 radix = 2; 855 } 856 int64_t base = radix; 857 858 char* p = buf; 859 if (len && (w < 0) && (radix == 10) && !raw) { 860 w = -w; 861 *p++ = kMinus; 862 --len; 863 } else if (len && (w == 0)) { 864 *p++ = (char)raw ? 0 : asciiDigits[0]; 865 --len; 866 } 867 868 while (len && w != 0) { 869 int64_t n = w / base; 870 int64_t m = n * base; 871 int32_t d = (int32_t)(w-m); 872 *p++ = raw ? (char)d : asciiDigits[d]; 873 w = n; 874 --len; 875 } 876 if (len) { 877 *p = 0; // null terminate if room for caller convenience 878 } 879 880 len = p - buf; 881 if (*buf == kMinus) { 882 ++buf; 883 } 884 while (--p > buf) { 885 char c = *p; 886 *p = *buf; 887 *buf = c; 888 ++buf; 889 } 890 891 return len; 892 } 893 #endif 894 895 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) 896 { 897 if (radix > 36) { 898 radix = 36; 899 } else if (radix < 2) { 900 radix = 2; 901 } 902 int64_t base = radix; 903 904 UChar* p = buf; 905 if (len && (w < 0) && (radix == 10) && !raw) { 906 w = -w; 907 *p++ = kUMinus; 908 --len; 909 } else if (len && (w == 0)) { 910 *p++ = (UChar)raw ? 0 : asciiDigits[0]; 911 --len; 912 } 913 914 while (len && (w != 0)) { 915 int64_t n = w / base; 916 int64_t m = n * base; 917 int32_t d = (int32_t)(w-m); 918 *p++ = (UChar)(raw ? d : asciiDigits[d]); 919 w = n; 920 --len; 921 } 922 if (len) { 923 *p = 0; // null terminate if room for caller convenience 924 } 925 926 len = (uint32_t)(p - buf); 927 if (*buf == kUMinus) { 928 ++buf; 929 } 930 while (--p > buf) { 931 UChar c = *p; 932 *p = *buf; 933 *buf = c; 934 ++buf; 935 } 936 937 return len; 938 } 939 940 941 U_NAMESPACE_END 942 943 /* U_HAVE_RBNF */ 944 #endif 945 946