Home | History | Annotate | Download | only in i18n
      1 /*
      2 ******************************************************************************
      3 *   Copyright (C) 1997-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 ******************************************************************************
      6 *   file name:  nfrs.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 * Modification history
     12 * Date        Name      Comments
     13 * 10/11/2001  Doug      Ported from ICU4J
     14 */
     15 
     16 #include "nfrs.h"
     17 
     18 #if U_HAVE_RBNF
     19 
     20 #include "unicode/uchar.h"
     21 #include "nfrule.h"
     22 #include "nfrlist.h"
     23 #include "patternprops.h"
     24 
     25 #ifdef RBNF_DEBUG
     26 #include "cmemory.h"
     27 #endif
     28 
     29 U_NAMESPACE_BEGIN
     30 
     31 #if 0
     32 // euclid's algorithm works with doubles
     33 // note, doubles only get us up to one quadrillion or so, which
     34 // isn't as much range as we get with longs.  We probably still
     35 // want either 64-bit math, or BigInteger.
     36 
     37 static int64_t
     38 util_lcm(int64_t x, int64_t y)
     39 {
     40     x.abs();
     41     y.abs();
     42 
     43     if (x == 0 || y == 0) {
     44         return 0;
     45     } else {
     46         do {
     47             if (x < y) {
     48                 int64_t t = x; x = y; y = t;
     49             }
     50             x -= y * (x/y);
     51         } while (x != 0);
     52 
     53         return y;
     54     }
     55 }
     56 
     57 #else
     58 /**
     59  * Calculates the least common multiple of x and y.
     60  */
     61 static int64_t
     62 util_lcm(int64_t x, int64_t y)
     63 {
     64     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
     65     // vol. 2, 1st ed., pp. 298-299
     66     int64_t x1 = x;
     67     int64_t y1 = y;
     68 
     69     int p2 = 0;
     70     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
     71         ++p2;
     72         x1 >>= 1;
     73         y1 >>= 1;
     74     }
     75 
     76     int64_t t;
     77     if ((x1 & 1) == 1) {
     78         t = -y1;
     79     } else {
     80         t = x1;
     81     }
     82 
     83     while (t != 0) {
     84         while ((t & 1) == 0) {
     85             t = t >> 1;
     86         }
     87         if (t > 0) {
     88             x1 = t;
     89         } else {
     90             y1 = -t;
     91         }
     92         t = x1 - y1;
     93     }
     94 
     95     int64_t gcd = x1 << p2;
     96 
     97     // x * y == gcd(x, y) * lcm(x, y)
     98     return x / gcd * y;
     99 }
    100 #endif
    101 
    102 static const UChar gPercent = 0x0025;
    103 static const UChar gColon = 0x003a;
    104 static const UChar gSemicolon = 0x003b;
    105 static const UChar gLineFeed = 0x000a;
    106 
    107 static const UChar gFourSpaces[] =
    108 {
    109     0x20, 0x20, 0x20, 0x20, 0
    110 }; /* "    " */
    111 static const UChar gPercentPercent[] =
    112 {
    113     0x25, 0x25, 0
    114 }; /* "%%" */
    115 
    116 static const UChar gNoparse[] =
    117 {
    118     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
    119 }; /* "@noparse" */
    120 
    121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
    122   : name()
    123   , rules(0)
    124   , negativeNumberRule(NULL)
    125   , fIsFractionRuleSet(FALSE)
    126   , fIsPublic(FALSE)
    127   , fIsParseable(TRUE)
    128   , fRecursionCount(0)
    129 {
    130     for (int i = 0; i < 3; ++i) {
    131         fractionRules[i] = NULL;
    132     }
    133 
    134     if (U_FAILURE(status)) {
    135         return;
    136     }
    137 
    138     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
    139 
    140     if (description.length() == 0) {
    141         // throw new IllegalArgumentException("Empty rule set description");
    142         status = U_PARSE_ERROR;
    143         return;
    144     }
    145 
    146     // if the description begins with a rule set name (the rule set
    147     // name can be omitted in formatter descriptions that consist
    148     // of only one rule set), copy it out into our "name" member
    149     // and delete it from the description
    150     if (description.charAt(0) == gPercent) {
    151         int32_t pos = description.indexOf(gColon);
    152         if (pos == -1) {
    153             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
    154             status = U_PARSE_ERROR;
    155         } else {
    156             name.setTo(description, 0, pos);
    157             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
    158             }
    159             description.remove(0, pos);
    160         }
    161     } else {
    162         name.setTo(UNICODE_STRING_SIMPLE("%default"));
    163     }
    164 
    165     if (description.length() == 0) {
    166         // throw new IllegalArgumentException("Empty rule set description");
    167         status = U_PARSE_ERROR;
    168     }
    169 
    170     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
    171 
    172     if ( name.endsWith(gNoparse,8) ) {
    173         fIsParseable = FALSE;
    174         name.truncate(name.length()-8); // remove the @noparse from the name
    175     }
    176 
    177     // all of the other members of NFRuleSet are initialized
    178     // by parseRules()
    179 }
    180 
    181 void
    182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
    183 {
    184     // start by creating a Vector whose elements are Strings containing
    185     // the descriptions of the rules (one rule per element).  The rules
    186     // are separated by semicolons (there's no escape facility: ALL
    187     // semicolons are rule delimiters)
    188 
    189     if (U_FAILURE(status)) {
    190         return;
    191     }
    192 
    193     // ensure we are starting with an empty rule list
    194     rules.deleteAll();
    195 
    196     // dlf - the original code kept a separate description array for no reason,
    197     // so I got rid of it.  The loop was too complex so I simplified it.
    198 
    199     UnicodeString currentDescription;
    200     int32_t oldP = 0;
    201     while (oldP < description.length()) {
    202         int32_t p = description.indexOf(gSemicolon, oldP);
    203         if (p == -1) {
    204             p = description.length();
    205         }
    206         currentDescription.setTo(description, oldP, p - oldP);
    207         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
    208         oldP = p + 1;
    209     }
    210 
    211     // for rules that didn't specify a base value, their base values
    212     // were initialized to 0.  Make another pass through the list and
    213     // set all those rules' base values.  We also remove any special
    214     // rules from the list and put them into their own member variables
    215     int64_t defaultBaseValue = 0;
    216 
    217     // (this isn't a for loop because we might be deleting items from
    218     // the vector-- we want to make sure we only increment i when
    219     // we _didn't_ delete aything from the vector)
    220     uint32_t i = 0;
    221     while (i < rules.size()) {
    222         NFRule* rule = rules[i];
    223 
    224         switch (rule->getType()) {
    225             // if the rule's base value is 0, fill in a default
    226             // base value (this will be 1 plus the preceding
    227             // rule's base value for regular rule sets, and the
    228             // same as the preceding rule's base value in fraction
    229             // rule sets)
    230         case NFRule::kNoBase:
    231             rule->setBaseValue(defaultBaseValue, status);
    232             if (!isFractionRuleSet()) {
    233                 ++defaultBaseValue;
    234             }
    235             ++i;
    236             break;
    237 
    238             // if it's the negative-number rule, copy it into its own
    239             // data member and delete it from the list
    240         case NFRule::kNegativeNumberRule:
    241             if (negativeNumberRule) {
    242                 delete negativeNumberRule;
    243             }
    244             negativeNumberRule = rules.remove(i);
    245             break;
    246 
    247             // if it's the improper fraction rule, copy it into the
    248             // correct element of fractionRules
    249         case NFRule::kImproperFractionRule:
    250             if (fractionRules[0]) {
    251                 delete fractionRules[0];
    252             }
    253             fractionRules[0] = rules.remove(i);
    254             break;
    255 
    256             // if it's the proper fraction rule, copy it into the
    257             // correct element of fractionRules
    258         case NFRule::kProperFractionRule:
    259             if (fractionRules[1]) {
    260                 delete fractionRules[1];
    261             }
    262             fractionRules[1] = rules.remove(i);
    263             break;
    264 
    265             // if it's the master rule, copy it into the
    266             // correct element of fractionRules
    267         case NFRule::kMasterRule:
    268             if (fractionRules[2]) {
    269                 delete fractionRules[2];
    270             }
    271             fractionRules[2] = rules.remove(i);
    272             break;
    273 
    274             // if it's a regular rule that already knows its base value,
    275             // check to make sure the rules are in order, and update
    276             // the default base value for the next rule
    277         default:
    278             if (rule->getBaseValue() < defaultBaseValue) {
    279                 // throw new IllegalArgumentException("Rules are not in order");
    280                 status = U_PARSE_ERROR;
    281                 return;
    282             }
    283             defaultBaseValue = rule->getBaseValue();
    284             if (!isFractionRuleSet()) {
    285                 ++defaultBaseValue;
    286             }
    287             ++i;
    288             break;
    289         }
    290     }
    291 }
    292 
    293 NFRuleSet::~NFRuleSet()
    294 {
    295     delete negativeNumberRule;
    296     delete fractionRules[0];
    297     delete fractionRules[1];
    298     delete fractionRules[2];
    299 }
    300 
    301 static UBool
    302 util_equalRules(const NFRule* rule1, const NFRule* rule2)
    303 {
    304     if (rule1) {
    305         if (rule2) {
    306             return *rule1 == *rule2;
    307         }
    308     } else if (!rule2) {
    309         return TRUE;
    310     }
    311     return FALSE;
    312 }
    313 
    314 UBool
    315 NFRuleSet::operator==(const NFRuleSet& rhs) const
    316 {
    317     if (rules.size() == rhs.rules.size() &&
    318         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
    319         name == rhs.name &&
    320         util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
    321         util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
    322         util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
    323         util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
    324 
    325         for (uint32_t i = 0; i < rules.size(); ++i) {
    326             if (*rules[i] != *rhs.rules[i]) {
    327                 return FALSE;
    328             }
    329         }
    330         return TRUE;
    331     }
    332     return FALSE;
    333 }
    334 
    335 #define RECURSION_LIMIT 50
    336 
    337 void
    338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const
    339 {
    340     NFRule *rule = findNormalRule(number);
    341     if (rule) { // else error, but can't report it
    342         NFRuleSet* ncThis = (NFRuleSet*)this;
    343         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
    344             // stop recursion
    345             ncThis->fRecursionCount = 0;
    346         } else {
    347             rule->doFormat(number, toAppendTo, pos, status);
    348             ncThis->fRecursionCount--;
    349         }
    350     }
    351 }
    352 
    353 void
    354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const
    355 {
    356     NFRule *rule = findDoubleRule(number);
    357     if (rule) { // else error, but can't report it
    358         NFRuleSet* ncThis = (NFRuleSet*)this;
    359         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
    360             // stop recursion
    361             ncThis->fRecursionCount = 0;
    362         } else {
    363             rule->doFormat(number, toAppendTo, pos, status);
    364             ncThis->fRecursionCount--;
    365         }
    366     }
    367 }
    368 
    369 NFRule*
    370 NFRuleSet::findDoubleRule(double number) const
    371 {
    372     // if this is a fraction rule set, use findFractionRuleSetRule()
    373     if (isFractionRuleSet()) {
    374         return findFractionRuleSetRule(number);
    375     }
    376 
    377     // if the number is negative, return the negative number rule
    378     // (if there isn't a negative-number rule, we pretend it's a
    379     // positive number)
    380     if (number < 0) {
    381         if (negativeNumberRule) {
    382             return  negativeNumberRule;
    383         } else {
    384             number = -number;
    385         }
    386     }
    387 
    388     // if the number isn't an integer, we use one of the fraction rules...
    389     if (number != uprv_floor(number)) {
    390         // if the number is between 0 and 1, return the proper
    391         // fraction rule
    392         if (number < 1 && fractionRules[1]) {
    393             return fractionRules[1];
    394         }
    395         // otherwise, return the improper fraction rule
    396         else if (fractionRules[0]) {
    397             return fractionRules[0];
    398         }
    399     }
    400 
    401     // if there's a master rule, use it to format the number
    402     if (fractionRules[2]) {
    403         return fractionRules[2];
    404     }
    405 
    406     // and if we haven't yet returned a rule, use findNormalRule()
    407     // to find the applicable rule
    408     int64_t r = util64_fromDouble(number + 0.5);
    409     return findNormalRule(r);
    410 }
    411 
    412 NFRule *
    413 NFRuleSet::findNormalRule(int64_t number) const
    414 {
    415     // if this is a fraction rule set, use findFractionRuleSetRule()
    416     // to find the rule (we should only go into this clause if the
    417     // value is 0)
    418     if (fIsFractionRuleSet) {
    419         return findFractionRuleSetRule((double)number);
    420     }
    421 
    422     // if the number is negative, return the negative-number rule
    423     // (if there isn't one, pretend the number is positive)
    424     if (number < 0) {
    425         if (negativeNumberRule) {
    426             return negativeNumberRule;
    427         } else {
    428             number = -number;
    429         }
    430     }
    431 
    432     // we have to repeat the preceding two checks, even though we
    433     // do them in findRule(), because the version of format() that
    434     // takes a long bypasses findRule() and goes straight to this
    435     // function.  This function does skip the fraction rules since
    436     // we know the value is an integer (it also skips the master
    437     // rule, since it's considered a fraction rule.  Skipping the
    438     // master rule in this function is also how we avoid infinite
    439     // recursion)
    440 
    441     // {dlf} unfortunately this fails if there are no rules except
    442     // special rules.  If there are no rules, use the master rule.
    443 
    444     // binary-search the rule list for the applicable rule
    445     // (a rule is used for all values from its base value to
    446     // the next rule's base value)
    447     int32_t hi = rules.size();
    448     if (hi > 0) {
    449         int32_t lo = 0;
    450 
    451         while (lo < hi) {
    452             int32_t mid = (lo + hi) / 2;
    453             if (rules[mid]->getBaseValue() == number) {
    454                 return rules[mid];
    455             }
    456             else if (rules[mid]->getBaseValue() > number) {
    457                 hi = mid;
    458             }
    459             else {
    460                 lo = mid + 1;
    461             }
    462         }
    463         if (hi == 0) { // bad rule set, minimum base > 0
    464             return NULL; // want to throw exception here
    465         }
    466 
    467         NFRule *result = rules[hi - 1];
    468 
    469         // use shouldRollBack() to see whether we need to invoke the
    470         // rollback rule (see shouldRollBack()'s documentation for
    471         // an explanation of the rollback rule).  If we do, roll back
    472         // one rule and return that one instead of the one we'd normally
    473         // return
    474         if (result->shouldRollBack((double)number)) {
    475             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
    476                 return NULL;
    477             }
    478             result = rules[hi - 2];
    479         }
    480         return result;
    481     }
    482     // else use the master rule
    483     return fractionRules[2];
    484 }
    485 
    486 /**
    487  * If this rule is a fraction rule set, this function is used by
    488  * findRule() to select the most appropriate rule for formatting
    489  * the number.  Basically, the base value of each rule in the rule
    490  * set is treated as the denominator of a fraction.  Whichever
    491  * denominator can produce the fraction closest in value to the
    492  * number passed in is the result.  If there's a tie, the earlier
    493  * one in the list wins.  (If there are two rules in a row with the
    494  * same base value, the first one is used when the numerator of the
    495  * fraction would be 1, and the second rule is used the rest of the
    496  * time.
    497  * @param number The number being formatted (which will always be
    498  * a number between 0 and 1)
    499  * @return The rule to use to format this number
    500  */
    501 NFRule*
    502 NFRuleSet::findFractionRuleSetRule(double number) const
    503 {
    504     // the obvious way to do this (multiply the value being formatted
    505     // by each rule's base value until you get an integral result)
    506     // doesn't work because of rounding error.  This method is more
    507     // accurate
    508 
    509     // find the least common multiple of the rules' base values
    510     // and multiply this by the number being formatted.  This is
    511     // all the precision we need, and we can do all of the rest
    512     // of the math using integer arithmetic
    513     int64_t leastCommonMultiple = rules[0]->getBaseValue();
    514     int64_t numerator;
    515     {
    516         for (uint32_t i = 1; i < rules.size(); ++i) {
    517             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
    518         }
    519         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
    520     }
    521     // for each rule, do the following...
    522     int64_t tempDifference;
    523     int64_t difference = util64_fromDouble(uprv_maxMantissa());
    524     int32_t winner = 0;
    525     for (uint32_t i = 0; i < rules.size(); ++i) {
    526         // "numerator" is the numerator of the fraction if the
    527         // denominator is the LCD.  The numerator if the rule's
    528         // base value is the denominator is "numerator" times the
    529         // base value divided bythe LCD.  Here we check to see if
    530         // that's an integer, and if not, how close it is to being
    531         // an integer.
    532         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
    533 
    534 
    535         // normalize the result of the above calculation: we want
    536         // the numerator's distance from the CLOSEST multiple
    537         // of the LCD
    538         if (leastCommonMultiple - tempDifference < tempDifference) {
    539             tempDifference = leastCommonMultiple - tempDifference;
    540         }
    541 
    542         // if this is as close as we've come, keep track of how close
    543         // that is, and the line number of the rule that did it.  If
    544         // we've scored a direct hit, we don't have to look at any more
    545         // rules
    546         if (tempDifference < difference) {
    547             difference = tempDifference;
    548             winner = i;
    549             if (difference == 0) {
    550                 break;
    551             }
    552         }
    553     }
    554 
    555     // if we have two successive rules that both have the winning base
    556     // value, then the first one (the one we found above) is used if
    557     // the numerator of the fraction is 1 and the second one is used if
    558     // the numerator of the fraction is anything else (this lets us
    559     // do things like "one third"/"two thirds" without haveing to define
    560     // a whole bunch of extra rule sets)
    561     if ((unsigned)(winner + 1) < rules.size() &&
    562         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
    563         double n = ((double)rules[winner]->getBaseValue()) * number;
    564         if (n < 0.5 || n >= 2) {
    565             ++winner;
    566         }
    567     }
    568 
    569     // finally, return the winning rule
    570     return rules[winner];
    571 }
    572 
    573 /**
    574  * Parses a string.  Matches the string to be parsed against each
    575  * of its rules (with a base value less than upperBound) and returns
    576  * the value produced by the rule that matched the most charcters
    577  * in the source string.
    578  * @param text The string to parse
    579  * @param parsePosition The initial position is ignored and assumed
    580  * to be 0.  On exit, this object has been updated to point to the
    581  * first character position this rule set didn't consume.
    582  * @param upperBound Limits the rules that can be allowed to match.
    583  * Only rules whose base values are strictly less than upperBound
    584  * are considered.
    585  * @return The numerical result of parsing this string.  This will
    586  * be the matching rule's base value, composed appropriately with
    587  * the results of matching any of its substitutions.  The object
    588  * will be an instance of Long if it's an integral value; otherwise,
    589  * it will be an instance of Double.  This function always returns
    590  * a valid object: If nothing matched the input string at all,
    591  * this function returns new Long(0), and the parse position is
    592  * left unchanged.
    593  */
    594 #ifdef RBNF_DEBUG
    595 #include <stdio.h>
    596 
    597 static void dumpUS(FILE* f, const UnicodeString& us) {
    598   int len = us.length();
    599   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
    600   if (buf != NULL) {
    601 	  us.extract(0, len, buf);
    602 	  buf[len] = 0;
    603 	  fprintf(f, "%s", buf);
    604 	  uprv_free(buf); //delete[] buf;
    605   }
    606 }
    607 #endif
    608 
    609 UBool
    610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
    611 {
    612     // try matching each rule in the rule set against the text being
    613     // parsed.  Whichever one matches the most characters is the one
    614     // that determines the value we return.
    615 
    616     result.setLong(0);
    617 
    618     // dump out if there's no text to parse
    619     if (text.length() == 0) {
    620         return 0;
    621     }
    622 
    623     ParsePosition highWaterMark;
    624     ParsePosition workingPos = pos;
    625 
    626 #ifdef RBNF_DEBUG
    627     fprintf(stderr, "<nfrs> %x '", this);
    628     dumpUS(stderr, name);
    629     fprintf(stderr, "' text '");
    630     dumpUS(stderr, text);
    631     fprintf(stderr, "'\n");
    632     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
    633 #endif
    634 
    635     // start by trying the negative number rule (if there is one)
    636     if (negativeNumberRule) {
    637         Formattable tempResult;
    638 #ifdef RBNF_DEBUG
    639         fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
    640 #endif
    641         UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
    642 #ifdef RBNF_DEBUG
    643         fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
    644 #endif
    645         if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
    646             result = tempResult;
    647             highWaterMark = workingPos;
    648         }
    649         workingPos = pos;
    650     }
    651 #ifdef RBNF_DEBUG
    652     fprintf(stderr, "<nfrs> continue fractional with text '");
    653     dumpUS(stderr, text);
    654     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
    655 #endif
    656     // then try each of the fraction rules
    657     {
    658         for (int i = 0; i < 3; i++) {
    659             if (fractionRules[i]) {
    660                 Formattable tempResult;
    661                 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
    662                 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
    663                     result = tempResult;
    664                     highWaterMark = workingPos;
    665                 }
    666                 workingPos = pos;
    667             }
    668         }
    669     }
    670 #ifdef RBNF_DEBUG
    671     fprintf(stderr, "<nfrs> continue other with text '");
    672     dumpUS(stderr, text);
    673     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
    674 #endif
    675 
    676     // finally, go through the regular rules one at a time.  We start
    677     // at the end of the list because we want to try matching the most
    678     // sigificant rule first (this helps ensure that we parse
    679     // "five thousand three hundred six" as
    680     // "(five thousand) (three hundred) (six)" rather than
    681     // "((five thousand three) hundred) (six)").  Skip rules whose
    682     // base values are higher than the upper bound (again, this helps
    683     // limit ambiguity by making sure the rules that match a rule's
    684     // are less significant than the rule containing the substitutions)/
    685     {
    686         int64_t ub = util64_fromDouble(upperBound);
    687 #ifdef RBNF_DEBUG
    688         {
    689             char ubstr[64];
    690             util64_toa(ub, ubstr, 64);
    691             char ubstrhex[64];
    692             util64_toa(ub, ubstrhex, 64, 16);
    693             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
    694         }
    695 #endif
    696         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
    697             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
    698                 continue;
    699             }
    700             Formattable tempResult;
    701             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
    702             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
    703                 result = tempResult;
    704                 highWaterMark = workingPos;
    705             }
    706             workingPos = pos;
    707         }
    708     }
    709 #ifdef RBNF_DEBUG
    710     fprintf(stderr, "<nfrs> exit\n");
    711 #endif
    712     // finally, update the parse postion we were passed to point to the
    713     // first character we didn't use, and return the result that
    714     // corresponds to that string of characters
    715     pos = highWaterMark;
    716 
    717     return 1;
    718 }
    719 
    720 void
    721 NFRuleSet::appendRules(UnicodeString& result) const
    722 {
    723     // the rule set name goes first...
    724     result.append(name);
    725     result.append(gColon);
    726     result.append(gLineFeed);
    727 
    728     // followed by the regular rules...
    729     for (uint32_t i = 0; i < rules.size(); i++) {
    730         result.append(gFourSpaces, 4);
    731         rules[i]->_appendRuleText(result);
    732         result.append(gLineFeed);
    733     }
    734 
    735     // followed by the special rules (if they exist)
    736     if (negativeNumberRule) {
    737         result.append(gFourSpaces, 4);
    738         negativeNumberRule->_appendRuleText(result);
    739         result.append(gLineFeed);
    740     }
    741 
    742     {
    743         for (uint32_t i = 0; i < 3; ++i) {
    744             if (fractionRules[i]) {
    745                 result.append(gFourSpaces, 4);
    746                 fractionRules[i]->_appendRuleText(result);
    747                 result.append(gLineFeed);
    748             }
    749         }
    750     }
    751 }
    752 
    753 // utility functions
    754 
    755 int64_t util64_fromDouble(double d) {
    756     int64_t result = 0;
    757     if (!uprv_isNaN(d)) {
    758         double mant = uprv_maxMantissa();
    759         if (d < -mant) {
    760             d = -mant;
    761         } else if (d > mant) {
    762             d = mant;
    763         }
    764         UBool neg = d < 0;
    765         if (neg) {
    766             d = -d;
    767         }
    768         result = (int64_t)uprv_floor(d);
    769         if (neg) {
    770             result = -result;
    771         }
    772     }
    773     return result;
    774 }
    775 
    776 int64_t util64_pow(int32_t r, uint32_t e)  {
    777     if (r == 0) {
    778         return 0;
    779     } else if (e == 0) {
    780         return 1;
    781     } else {
    782         int64_t n = r;
    783         while (--e > 0) {
    784             n *= r;
    785         }
    786         return n;
    787     }
    788 }
    789 
    790 static const uint8_t asciiDigits[] = {
    791     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
    792     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
    793     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
    794     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
    795     0x77u, 0x78u, 0x79u, 0x7au,
    796 };
    797 
    798 static const UChar kUMinus = (UChar)0x002d;
    799 
    800 #ifdef RBNF_DEBUG
    801 static const char kMinus = '-';
    802 
    803 static const uint8_t digitInfo[] = {
    804         0,     0,     0,     0,     0,     0,     0,     0,
    805         0,     0,     0,     0,     0,     0,     0,     0,
    806         0,     0,     0,     0,     0,     0,     0,     0,
    807         0,     0,     0,     0,     0,     0,     0,     0,
    808         0,     0,     0,     0,     0,     0,     0,     0,
    809         0,     0,     0,     0,     0,     0,     0,     0,
    810     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
    811     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
    812         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    813     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    814     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    815     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    816         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    817     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    818     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    819     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    820 };
    821 
    822 int64_t util64_atoi(const char* str, uint32_t radix)
    823 {
    824     if (radix > 36) {
    825         radix = 36;
    826     } else if (radix < 2) {
    827         radix = 2;
    828     }
    829     int64_t lradix = radix;
    830 
    831     int neg = 0;
    832     if (*str == kMinus) {
    833         ++str;
    834         neg = 1;
    835     }
    836     int64_t result = 0;
    837     uint8_t b;
    838     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
    839         result *= lradix;
    840         result += (int32_t)b;
    841     }
    842     if (neg) {
    843         result = -result;
    844     }
    845     return result;
    846 }
    847 
    848 int64_t util64_utoi(const UChar* str, uint32_t radix)
    849 {
    850     if (radix > 36) {
    851         radix = 36;
    852     } else if (radix < 2) {
    853         radix = 2;
    854     }
    855     int64_t lradix = radix;
    856 
    857     int neg = 0;
    858     if (*str == kUMinus) {
    859         ++str;
    860         neg = 1;
    861     }
    862     int64_t result = 0;
    863     UChar c;
    864     uint8_t b;
    865     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
    866         result *= lradix;
    867         result += (int32_t)b;
    868     }
    869     if (neg) {
    870         result = -result;
    871     }
    872     return result;
    873 }
    874 
    875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
    876 {
    877     if (radix > 36) {
    878         radix = 36;
    879     } else if (radix < 2) {
    880         radix = 2;
    881     }
    882     int64_t base = radix;
    883 
    884     char* p = buf;
    885     if (len && (w < 0) && (radix == 10) && !raw) {
    886         w = -w;
    887         *p++ = kMinus;
    888         --len;
    889     } else if (len && (w == 0)) {
    890         *p++ = (char)raw ? 0 : asciiDigits[0];
    891         --len;
    892     }
    893 
    894     while (len && w != 0) {
    895         int64_t n = w / base;
    896         int64_t m = n * base;
    897         int32_t d = (int32_t)(w-m);
    898         *p++ = raw ? (char)d : asciiDigits[d];
    899         w = n;
    900         --len;
    901     }
    902     if (len) {
    903         *p = 0; // null terminate if room for caller convenience
    904     }
    905 
    906     len = p - buf;
    907     if (*buf == kMinus) {
    908         ++buf;
    909     }
    910     while (--p > buf) {
    911         char c = *p;
    912         *p = *buf;
    913         *buf = c;
    914         ++buf;
    915     }
    916 
    917     return len;
    918 }
    919 #endif
    920 
    921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
    922 {
    923     if (radix > 36) {
    924         radix = 36;
    925     } else if (radix < 2) {
    926         radix = 2;
    927     }
    928     int64_t base = radix;
    929 
    930     UChar* p = buf;
    931     if (len && (w < 0) && (radix == 10) && !raw) {
    932         w = -w;
    933         *p++ = kUMinus;
    934         --len;
    935     } else if (len && (w == 0)) {
    936         *p++ = (UChar)raw ? 0 : asciiDigits[0];
    937         --len;
    938     }
    939 
    940     while (len && (w != 0)) {
    941         int64_t n = w / base;
    942         int64_t m = n * base;
    943         int32_t d = (int32_t)(w-m);
    944         *p++ = (UChar)(raw ? d : asciiDigits[d]);
    945         w = n;
    946         --len;
    947     }
    948     if (len) {
    949         *p = 0; // null terminate if room for caller convenience
    950     }
    951 
    952     len = (uint32_t)(p - buf);
    953     if (*buf == kUMinus) {
    954         ++buf;
    955     }
    956     while (--p > buf) {
    957         UChar c = *p;
    958         *p = *buf;
    959         *buf = c;
    960         ++buf;
    961     }
    962 
    963     return len;
    964 }
    965 
    966 
    967 U_NAMESPACE_END
    968 
    969 /* U_HAVE_RBNF */
    970 #endif
    971 
    972