1 /* 2 ****************************************************************************** 3 * Copyright (C) 1997-2008, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ****************************************************************************** 6 * file name: nfrs.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * Modification history 12 * Date Name Comments 13 * 10/11/2001 Doug Ported from ICU4J 14 */ 15 16 #include "nfrs.h" 17 18 #if U_HAVE_RBNF 19 20 #include "unicode/uchar.h" 21 #include "nfrule.h" 22 #include "nfrlist.h" 23 24 #ifdef RBNF_DEBUG 25 #include "cmemory.h" 26 #endif 27 28 #include "util.h" 29 30 U_NAMESPACE_BEGIN 31 32 #if 0 33 // euclid's algorithm works with doubles 34 // note, doubles only get us up to one quadrillion or so, which 35 // isn't as much range as we get with longs. We probably still 36 // want either 64-bit math, or BigInteger. 37 38 static int64_t 39 util_lcm(int64_t x, int64_t y) 40 { 41 x.abs(); 42 y.abs(); 43 44 if (x == 0 || y == 0) { 45 return 0; 46 } else { 47 do { 48 if (x < y) { 49 int64_t t = x; x = y; y = t; 50 } 51 x -= y * (x/y); 52 } while (x != 0); 53 54 return y; 55 } 56 } 57 58 #else 59 /** 60 * Calculates the least common multiple of x and y. 61 */ 62 static int64_t 63 util_lcm(int64_t x, int64_t y) 64 { 65 // binary gcd algorithm from Knuth, "The Art of Computer Programming," 66 // vol. 2, 1st ed., pp. 298-299 67 int64_t x1 = x; 68 int64_t y1 = y; 69 70 int p2 = 0; 71 while ((x1 & 1) == 0 && (y1 & 1) == 0) { 72 ++p2; 73 x1 >>= 1; 74 y1 >>= 1; 75 } 76 77 int64_t t; 78 if ((x1 & 1) == 1) { 79 t = -y1; 80 } else { 81 t = x1; 82 } 83 84 while (t != 0) { 85 while ((t & 1) == 0) { 86 t = t >> 1; 87 } 88 if (t > 0) { 89 x1 = t; 90 } else { 91 y1 = -t; 92 } 93 t = x1 - y1; 94 } 95 96 int64_t gcd = x1 << p2; 97 98 // x * y == gcd(x, y) * lcm(x, y) 99 return x / gcd * y; 100 } 101 #endif 102 103 static const UChar gPercent = 0x0025; 104 static const UChar gColon = 0x003a; 105 static const UChar gSemicolon = 0x003b; 106 static const UChar gLineFeed = 0x000a; 107 108 static const UChar gFourSpaces[] = 109 { 110 0x20, 0x20, 0x20, 0x20, 0 111 }; /* " " */ 112 static const UChar gPercentPercent[] = 113 { 114 0x25, 0x25, 0 115 }; /* "%%" */ 116 117 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status) 118 : name() 119 , rules(0) 120 , negativeNumberRule(NULL) 121 , fIsFractionRuleSet(FALSE) 122 , fIsPublic(FALSE) 123 , fRecursionCount(0) 124 { 125 for (int i = 0; i < 3; ++i) { 126 fractionRules[i] = NULL; 127 } 128 129 if (U_FAILURE(status)) { 130 return; 131 } 132 133 UnicodeString& description = descriptions[index]; // !!! make sure index is valid 134 135 if (description.length() == 0) { 136 // throw new IllegalArgumentException("Empty rule set description"); 137 status = U_PARSE_ERROR; 138 return; 139 } 140 141 // if the description begins with a rule set name (the rule set 142 // name can be omitted in formatter descriptions that consist 143 // of only one rule set), copy it out into our "name" member 144 // and delete it from the description 145 if (description.charAt(0) == gPercent) { 146 int32_t pos = description.indexOf(gColon); 147 if (pos == -1) { 148 // throw new IllegalArgumentException("Rule set name doesn't end in colon"); 149 status = U_PARSE_ERROR; 150 } else { 151 name.setTo(description, 0, pos); 152 while (pos < description.length() && uprv_isRuleWhiteSpace(description.charAt(++pos))) { 153 } 154 description.remove(0, pos); 155 } 156 } else { 157 name.setTo(UNICODE_STRING_SIMPLE("%default")); 158 } 159 160 if (description.length() == 0) { 161 // throw new IllegalArgumentException("Empty rule set description"); 162 status = U_PARSE_ERROR; 163 } 164 165 fIsPublic = name.indexOf(gPercentPercent) != 0; 166 167 // all of the other members of NFRuleSet are initialized 168 // by parseRules() 169 } 170 171 void 172 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status) 173 { 174 // start by creating a Vector whose elements are Strings containing 175 // the descriptions of the rules (one rule per element). The rules 176 // are separated by semicolons (there's no escape facility: ALL 177 // semicolons are rule delimiters) 178 179 if (U_FAILURE(status)) { 180 return; 181 } 182 183 // dlf - the original code kept a separate description array for no reason, 184 // so I got rid of it. The loop was too complex so I simplified it. 185 186 UnicodeString currentDescription; 187 int32_t oldP = 0; 188 while (oldP < description.length()) { 189 int32_t p = description.indexOf(gSemicolon, oldP); 190 if (p == -1) { 191 p = description.length(); 192 } 193 currentDescription.setTo(description, oldP, p - oldP); 194 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); 195 oldP = p + 1; 196 } 197 198 // for rules that didn't specify a base value, their base values 199 // were initialized to 0. Make another pass through the list and 200 // set all those rules' base values. We also remove any special 201 // rules from the list and put them into their own member variables 202 int64_t defaultBaseValue = 0; 203 204 // (this isn't a for loop because we might be deleting items from 205 // the vector-- we want to make sure we only increment i when 206 // we _didn't_ delete aything from the vector) 207 uint32_t i = 0; 208 while (i < rules.size()) { 209 NFRule* rule = rules[i]; 210 211 switch (rule->getType()) { 212 // if the rule's base value is 0, fill in a default 213 // base value (this will be 1 plus the preceding 214 // rule's base value for regular rule sets, and the 215 // same as the preceding rule's base value in fraction 216 // rule sets) 217 case NFRule::kNoBase: 218 rule->setBaseValue(defaultBaseValue, status); 219 if (!isFractionRuleSet()) { 220 ++defaultBaseValue; 221 } 222 ++i; 223 break; 224 225 // if it's the negative-number rule, copy it into its own 226 // data member and delete it from the list 227 case NFRule::kNegativeNumberRule: 228 negativeNumberRule = rules.remove(i); 229 break; 230 231 // if it's the improper fraction rule, copy it into the 232 // correct element of fractionRules 233 case NFRule::kImproperFractionRule: 234 fractionRules[0] = rules.remove(i); 235 break; 236 237 // if it's the proper fraction rule, copy it into the 238 // correct element of fractionRules 239 case NFRule::kProperFractionRule: 240 fractionRules[1] = rules.remove(i); 241 break; 242 243 // if it's the master rule, copy it into the 244 // correct element of fractionRules 245 case NFRule::kMasterRule: 246 fractionRules[2] = rules.remove(i); 247 break; 248 249 // if it's a regular rule that already knows its base value, 250 // check to make sure the rules are in order, and update 251 // the default base value for the next rule 252 default: 253 if (rule->getBaseValue() < defaultBaseValue) { 254 // throw new IllegalArgumentException("Rules are not in order"); 255 status = U_PARSE_ERROR; 256 return; 257 } 258 defaultBaseValue = rule->getBaseValue(); 259 if (!isFractionRuleSet()) { 260 ++defaultBaseValue; 261 } 262 ++i; 263 break; 264 } 265 } 266 } 267 268 NFRuleSet::~NFRuleSet() 269 { 270 delete negativeNumberRule; 271 delete fractionRules[0]; 272 delete fractionRules[1]; 273 delete fractionRules[2]; 274 } 275 276 static UBool 277 util_equalRules(const NFRule* rule1, const NFRule* rule2) 278 { 279 if (rule1) { 280 if (rule2) { 281 return *rule1 == *rule2; 282 } 283 } else if (!rule2) { 284 return TRUE; 285 } 286 return FALSE; 287 } 288 289 UBool 290 NFRuleSet::operator==(const NFRuleSet& rhs) const 291 { 292 if (rules.size() == rhs.rules.size() && 293 fIsFractionRuleSet == rhs.fIsFractionRuleSet && 294 name == rhs.name && 295 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) && 296 util_equalRules(fractionRules[0], rhs.fractionRules[0]) && 297 util_equalRules(fractionRules[1], rhs.fractionRules[1]) && 298 util_equalRules(fractionRules[2], rhs.fractionRules[2])) { 299 300 for (uint32_t i = 0; i < rules.size(); ++i) { 301 if (*rules[i] != *rhs.rules[i]) { 302 return FALSE; 303 } 304 } 305 return TRUE; 306 } 307 return FALSE; 308 } 309 310 #define RECURSION_LIMIT 50 311 312 void 313 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const 314 { 315 NFRule *rule = findNormalRule(number); 316 if (rule) { // else error, but can't report it 317 NFRuleSet* ncThis = (NFRuleSet*)this; 318 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 319 // stop recursion 320 ncThis->fRecursionCount = 0; 321 } else { 322 rule->doFormat(number, toAppendTo, pos); 323 ncThis->fRecursionCount--; 324 } 325 } 326 } 327 328 void 329 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const 330 { 331 NFRule *rule = findDoubleRule(number); 332 if (rule) { // else error, but can't report it 333 NFRuleSet* ncThis = (NFRuleSet*)this; 334 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 335 // stop recursion 336 ncThis->fRecursionCount = 0; 337 } else { 338 rule->doFormat(number, toAppendTo, pos); 339 ncThis->fRecursionCount--; 340 } 341 } 342 } 343 344 NFRule* 345 NFRuleSet::findDoubleRule(double number) const 346 { 347 // if this is a fraction rule set, use findFractionRuleSetRule() 348 if (isFractionRuleSet()) { 349 return findFractionRuleSetRule(number); 350 } 351 352 // if the number is negative, return the negative number rule 353 // (if there isn't a negative-number rule, we pretend it's a 354 // positive number) 355 if (number < 0) { 356 if (negativeNumberRule) { 357 return negativeNumberRule; 358 } else { 359 number = -number; 360 } 361 } 362 363 // if the number isn't an integer, we use one of the fraction rules... 364 if (number != uprv_floor(number)) { 365 // if the number is between 0 and 1, return the proper 366 // fraction rule 367 if (number < 1 && fractionRules[1]) { 368 return fractionRules[1]; 369 } 370 // otherwise, return the improper fraction rule 371 else if (fractionRules[0]) { 372 return fractionRules[0]; 373 } 374 } 375 376 // if there's a master rule, use it to format the number 377 if (fractionRules[2]) { 378 return fractionRules[2]; 379 } 380 381 // and if we haven't yet returned a rule, use findNormalRule() 382 // to find the applicable rule 383 int64_t r = util64_fromDouble(number + 0.5); 384 return findNormalRule(r); 385 } 386 387 NFRule * 388 NFRuleSet::findNormalRule(int64_t number) const 389 { 390 // if this is a fraction rule set, use findFractionRuleSetRule() 391 // to find the rule (we should only go into this clause if the 392 // value is 0) 393 if (fIsFractionRuleSet) { 394 return findFractionRuleSetRule((double)number); 395 } 396 397 // if the number is negative, return the negative-number rule 398 // (if there isn't one, pretend the number is positive) 399 if (number < 0) { 400 if (negativeNumberRule) { 401 return negativeNumberRule; 402 } else { 403 number = -number; 404 } 405 } 406 407 // we have to repeat the preceding two checks, even though we 408 // do them in findRule(), because the version of format() that 409 // takes a long bypasses findRule() and goes straight to this 410 // function. This function does skip the fraction rules since 411 // we know the value is an integer (it also skips the master 412 // rule, since it's considered a fraction rule. Skipping the 413 // master rule in this function is also how we avoid infinite 414 // recursion) 415 416 // {dlf} unfortunately this fails if there are no rules except 417 // special rules. If there are no rules, use the master rule. 418 419 // binary-search the rule list for the applicable rule 420 // (a rule is used for all values from its base value to 421 // the next rule's base value) 422 int32_t hi = rules.size(); 423 if (hi > 0) { 424 int32_t lo = 0; 425 426 while (lo < hi) { 427 int32_t mid = (lo + hi) / 2; 428 if (rules[mid]->getBaseValue() == number) { 429 return rules[mid]; 430 } 431 else if (rules[mid]->getBaseValue() > number) { 432 hi = mid; 433 } 434 else { 435 lo = mid + 1; 436 } 437 } 438 if (hi == 0) { // bad rule set, minimum base > 0 439 return NULL; // want to throw exception here 440 } 441 442 NFRule *result = rules[hi - 1]; 443 444 // use shouldRollBack() to see whether we need to invoke the 445 // rollback rule (see shouldRollBack()'s documentation for 446 // an explanation of the rollback rule). If we do, roll back 447 // one rule and return that one instead of the one we'd normally 448 // return 449 if (result->shouldRollBack((double)number)) { 450 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base 451 return NULL; 452 } 453 result = rules[hi - 2]; 454 } 455 return result; 456 } 457 // else use the master rule 458 return fractionRules[2]; 459 } 460 461 /** 462 * If this rule is a fraction rule set, this function is used by 463 * findRule() to select the most appropriate rule for formatting 464 * the number. Basically, the base value of each rule in the rule 465 * set is treated as the denominator of a fraction. Whichever 466 * denominator can produce the fraction closest in value to the 467 * number passed in is the result. If there's a tie, the earlier 468 * one in the list wins. (If there are two rules in a row with the 469 * same base value, the first one is used when the numerator of the 470 * fraction would be 1, and the second rule is used the rest of the 471 * time. 472 * @param number The number being formatted (which will always be 473 * a number between 0 and 1) 474 * @return The rule to use to format this number 475 */ 476 NFRule* 477 NFRuleSet::findFractionRuleSetRule(double number) const 478 { 479 // the obvious way to do this (multiply the value being formatted 480 // by each rule's base value until you get an integral result) 481 // doesn't work because of rounding error. This method is more 482 // accurate 483 484 // find the least common multiple of the rules' base values 485 // and multiply this by the number being formatted. This is 486 // all the precision we need, and we can do all of the rest 487 // of the math using integer arithmetic 488 int64_t leastCommonMultiple = rules[0]->getBaseValue(); 489 int64_t numerator; 490 { 491 for (uint32_t i = 1; i < rules.size(); ++i) { 492 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); 493 } 494 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); 495 } 496 // for each rule, do the following... 497 int64_t tempDifference; 498 int64_t difference = util64_fromDouble(uprv_maxMantissa()); 499 int32_t winner = 0; 500 for (uint32_t i = 0; i < rules.size(); ++i) { 501 // "numerator" is the numerator of the fraction if the 502 // denominator is the LCD. The numerator if the rule's 503 // base value is the denominator is "numerator" times the 504 // base value divided bythe LCD. Here we check to see if 505 // that's an integer, and if not, how close it is to being 506 // an integer. 507 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; 508 509 510 // normalize the result of the above calculation: we want 511 // the numerator's distance from the CLOSEST multiple 512 // of the LCD 513 if (leastCommonMultiple - tempDifference < tempDifference) { 514 tempDifference = leastCommonMultiple - tempDifference; 515 } 516 517 // if this is as close as we've come, keep track of how close 518 // that is, and the line number of the rule that did it. If 519 // we've scored a direct hit, we don't have to look at any more 520 // rules 521 if (tempDifference < difference) { 522 difference = tempDifference; 523 winner = i; 524 if (difference == 0) { 525 break; 526 } 527 } 528 } 529 530 // if we have two successive rules that both have the winning base 531 // value, then the first one (the one we found above) is used if 532 // the numerator of the fraction is 1 and the second one is used if 533 // the numerator of the fraction is anything else (this lets us 534 // do things like "one third"/"two thirds" without haveing to define 535 // a whole bunch of extra rule sets) 536 if ((unsigned)(winner + 1) < rules.size() && 537 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { 538 double n = ((double)rules[winner]->getBaseValue()) * number; 539 if (n < 0.5 || n >= 2) { 540 ++winner; 541 } 542 } 543 544 // finally, return the winning rule 545 return rules[winner]; 546 } 547 548 /** 549 * Parses a string. Matches the string to be parsed against each 550 * of its rules (with a base value less than upperBound) and returns 551 * the value produced by the rule that matched the most charcters 552 * in the source string. 553 * @param text The string to parse 554 * @param parsePosition The initial position is ignored and assumed 555 * to be 0. On exit, this object has been updated to point to the 556 * first character position this rule set didn't consume. 557 * @param upperBound Limits the rules that can be allowed to match. 558 * Only rules whose base values are strictly less than upperBound 559 * are considered. 560 * @return The numerical result of parsing this string. This will 561 * be the matching rule's base value, composed appropriately with 562 * the results of matching any of its substitutions. The object 563 * will be an instance of Long if it's an integral value; otherwise, 564 * it will be an instance of Double. This function always returns 565 * a valid object: If nothing matched the input string at all, 566 * this function returns new Long(0), and the parse position is 567 * left unchanged. 568 */ 569 #ifdef RBNF_DEBUG 570 #include <stdio.h> 571 572 static void dumpUS(FILE* f, const UnicodeString& us) { 573 int len = us.length(); 574 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 575 if (buf != NULL) { 576 us.extract(0, len, buf); 577 buf[len] = 0; 578 fprintf(f, "%s", buf); 579 uprv_free(buf); //delete[] buf; 580 } 581 } 582 #endif 583 584 UBool 585 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const 586 { 587 // try matching each rule in the rule set against the text being 588 // parsed. Whichever one matches the most characters is the one 589 // that determines the value we return. 590 591 result.setLong(0); 592 593 // dump out if there's no text to parse 594 if (text.length() == 0) { 595 return 0; 596 } 597 598 ParsePosition highWaterMark; 599 ParsePosition workingPos = pos; 600 601 #ifdef RBNF_DEBUG 602 fprintf(stderr, "<nfrs> %x '", this); 603 dumpUS(stderr, name); 604 fprintf(stderr, "' text '"); 605 dumpUS(stderr, text); 606 fprintf(stderr, "'\n"); 607 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); 608 #endif 609 610 // start by trying the negative number rule (if there is one) 611 if (negativeNumberRule) { 612 Formattable tempResult; 613 #ifdef RBNF_DEBUG 614 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound); 615 #endif 616 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); 617 #ifdef RBNF_DEBUG 618 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex()); 619 #endif 620 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 621 result = tempResult; 622 highWaterMark = workingPos; 623 } 624 workingPos = pos; 625 } 626 #ifdef RBNF_DEBUG 627 fprintf(stderr, "<nfrs> continue fractional with text '"); 628 dumpUS(stderr, text); 629 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 630 #endif 631 // then try each of the fraction rules 632 { 633 for (int i = 0; i < 3; i++) { 634 if (fractionRules[i]) { 635 Formattable tempResult; 636 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); 637 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { 638 result = tempResult; 639 highWaterMark = workingPos; 640 } 641 workingPos = pos; 642 } 643 } 644 } 645 #ifdef RBNF_DEBUG 646 fprintf(stderr, "<nfrs> continue other with text '"); 647 dumpUS(stderr, text); 648 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 649 #endif 650 651 // finally, go through the regular rules one at a time. We start 652 // at the end of the list because we want to try matching the most 653 // sigificant rule first (this helps ensure that we parse 654 // "five thousand three hundred six" as 655 // "(five thousand) (three hundred) (six)" rather than 656 // "((five thousand three) hundred) (six)"). Skip rules whose 657 // base values are higher than the upper bound (again, this helps 658 // limit ambiguity by making sure the rules that match a rule's 659 // are less significant than the rule containing the substitutions)/ 660 { 661 int64_t ub = util64_fromDouble(upperBound); 662 #ifdef RBNF_DEBUG 663 { 664 char ubstr[64]; 665 util64_toa(ub, ubstr, 64); 666 char ubstrhex[64]; 667 util64_toa(ub, ubstrhex, 64, 16); 668 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); 669 } 670 #endif 671 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { 672 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { 673 continue; 674 } 675 Formattable tempResult; 676 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); 677 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 678 result = tempResult; 679 highWaterMark = workingPos; 680 } 681 workingPos = pos; 682 } 683 } 684 #ifdef RBNF_DEBUG 685 fprintf(stderr, "<nfrs> exit\n"); 686 #endif 687 // finally, update the parse postion we were passed to point to the 688 // first character we didn't use, and return the result that 689 // corresponds to that string of characters 690 pos = highWaterMark; 691 692 return 1; 693 } 694 695 void 696 NFRuleSet::appendRules(UnicodeString& result) const 697 { 698 // the rule set name goes first... 699 result.append(name); 700 result.append(gColon); 701 result.append(gLineFeed); 702 703 // followed by the regular rules... 704 for (uint32_t i = 0; i < rules.size(); i++) { 705 result.append(gFourSpaces); 706 rules[i]->_appendRuleText(result); 707 result.append(gLineFeed); 708 } 709 710 // followed by the special rules (if they exist) 711 if (negativeNumberRule) { 712 result.append(gFourSpaces); 713 negativeNumberRule->_appendRuleText(result); 714 result.append(gLineFeed); 715 } 716 717 { 718 for (uint32_t i = 0; i < 3; ++i) { 719 if (fractionRules[i]) { 720 result.append(gFourSpaces); 721 fractionRules[i]->_appendRuleText(result); 722 result.append(gLineFeed); 723 } 724 } 725 } 726 } 727 728 // utility functions 729 730 int64_t util64_fromDouble(double d) { 731 int64_t result = 0; 732 if (!uprv_isNaN(d)) { 733 double mant = uprv_maxMantissa(); 734 if (d < -mant) { 735 d = -mant; 736 } else if (d > mant) { 737 d = mant; 738 } 739 UBool neg = d < 0; 740 if (neg) { 741 d = -d; 742 } 743 result = (int64_t)uprv_floor(d); 744 if (neg) { 745 result = -result; 746 } 747 } 748 return result; 749 } 750 751 int64_t util64_pow(int32_t r, uint32_t e) { 752 if (r == 0) { 753 return 0; 754 } else if (e == 0) { 755 return 1; 756 } else { 757 int64_t n = r; 758 while (--e > 0) { 759 n *= r; 760 } 761 return n; 762 } 763 } 764 765 static const uint8_t asciiDigits[] = { 766 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 767 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, 768 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, 769 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, 770 0x77u, 0x78u, 0x79u, 0x7au, 771 }; 772 773 static const UChar kUMinus = (UChar)0x002d; 774 775 #ifdef RBNF_DEBUG 776 static const char kMinus = '-'; 777 778 static const uint8_t digitInfo[] = { 779 0, 0, 0, 0, 0, 0, 0, 0, 780 0, 0, 0, 0, 0, 0, 0, 0, 781 0, 0, 0, 0, 0, 0, 0, 0, 782 0, 0, 0, 0, 0, 0, 0, 0, 783 0, 0, 0, 0, 0, 0, 0, 0, 784 0, 0, 0, 0, 0, 0, 0, 0, 785 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, 786 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, 787 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 788 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 789 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 790 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 791 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 792 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 793 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 794 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 795 }; 796 797 int64_t util64_atoi(const char* str, uint32_t radix) 798 { 799 if (radix > 36) { 800 radix = 36; 801 } else if (radix < 2) { 802 radix = 2; 803 } 804 int64_t lradix = radix; 805 806 int neg = 0; 807 if (*str == kMinus) { 808 ++str; 809 neg = 1; 810 } 811 int64_t result = 0; 812 uint8_t b; 813 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { 814 result *= lradix; 815 result += (int32_t)b; 816 } 817 if (neg) { 818 result = -result; 819 } 820 return result; 821 } 822 823 int64_t util64_utoi(const UChar* str, uint32_t radix) 824 { 825 if (radix > 36) { 826 radix = 36; 827 } else if (radix < 2) { 828 radix = 2; 829 } 830 int64_t lradix = radix; 831 832 int neg = 0; 833 if (*str == kUMinus) { 834 ++str; 835 neg = 1; 836 } 837 int64_t result = 0; 838 UChar c; 839 uint8_t b; 840 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { 841 result *= lradix; 842 result += (int32_t)b; 843 } 844 if (neg) { 845 result = -result; 846 } 847 return result; 848 } 849 850 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) 851 { 852 if (radix > 36) { 853 radix = 36; 854 } else if (radix < 2) { 855 radix = 2; 856 } 857 int64_t base = radix; 858 859 char* p = buf; 860 if (len && (w < 0) && (radix == 10) && !raw) { 861 w = -w; 862 *p++ = kMinus; 863 --len; 864 } else if (len && (w == 0)) { 865 *p++ = (char)raw ? 0 : asciiDigits[0]; 866 --len; 867 } 868 869 while (len && w != 0) { 870 int64_t n = w / base; 871 int64_t m = n * base; 872 int32_t d = (int32_t)(w-m); 873 *p++ = raw ? (char)d : asciiDigits[d]; 874 w = n; 875 --len; 876 } 877 if (len) { 878 *p = 0; // null terminate if room for caller convenience 879 } 880 881 len = p - buf; 882 if (*buf == kMinus) { 883 ++buf; 884 } 885 while (--p > buf) { 886 char c = *p; 887 *p = *buf; 888 *buf = c; 889 ++buf; 890 } 891 892 return len; 893 } 894 #endif 895 896 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) 897 { 898 if (radix > 36) { 899 radix = 36; 900 } else if (radix < 2) { 901 radix = 2; 902 } 903 int64_t base = radix; 904 905 UChar* p = buf; 906 if (len && (w < 0) && (radix == 10) && !raw) { 907 w = -w; 908 *p++ = kUMinus; 909 --len; 910 } else if (len && (w == 0)) { 911 *p++ = (UChar)raw ? 0 : asciiDigits[0]; 912 --len; 913 } 914 915 while (len && (w != 0)) { 916 int64_t n = w / base; 917 int64_t m = n * base; 918 int32_t d = (int32_t)(w-m); 919 *p++ = (UChar)(raw ? d : asciiDigits[d]); 920 w = n; 921 --len; 922 } 923 if (len) { 924 *p = 0; // null terminate if room for caller convenience 925 } 926 927 len = (uint32_t)(p - buf); 928 if (*buf == kUMinus) { 929 ++buf; 930 } 931 while (--p > buf) { 932 UChar c = *p; 933 *p = *buf; 934 *buf = c; 935 ++buf; 936 } 937 938 return len; 939 } 940 941 942 U_NAMESPACE_END 943 944 /* U_HAVE_RBNF */ 945 #endif 946 947