Home | History | Annotate | Download | only in i18n
      1 /*
      2 ******************************************************************************
      3 *   Copyright (C) 1997-2015, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 ******************************************************************************
      6 *   file name:  nfrs.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 * Modification history
     12 * Date        Name      Comments
     13 * 10/11/2001  Doug      Ported from ICU4J
     14 */
     15 
     16 #include "nfrs.h"
     17 
     18 #if U_HAVE_RBNF
     19 
     20 #include "unicode/uchar.h"
     21 #include "nfrule.h"
     22 #include "nfrlist.h"
     23 #include "patternprops.h"
     24 
     25 #ifdef RBNF_DEBUG
     26 #include "cmemory.h"
     27 #endif
     28 
     29 enum {
     30     /** -x */
     31     NEGATIVE_RULE_INDEX = 0,
     32     /** x.x */
     33     IMPROPER_FRACTION_RULE_INDEX = 1,
     34     /** 0.x */
     35     PROPER_FRACTION_RULE_INDEX = 2,
     36     /** x.0 */
     37     MASTER_RULE_INDEX = 3,
     38     /** Inf */
     39     INFINITY_RULE_INDEX = 4,
     40     /** NaN */
     41     NAN_RULE_INDEX = 5,
     42     NON_NUMERICAL_RULE_LENGTH = 6
     43 };
     44 
     45 U_NAMESPACE_BEGIN
     46 
     47 #if 0
     48 // euclid's algorithm works with doubles
     49 // note, doubles only get us up to one quadrillion or so, which
     50 // isn't as much range as we get with longs.  We probably still
     51 // want either 64-bit math, or BigInteger.
     52 
     53 static int64_t
     54 util_lcm(int64_t x, int64_t y)
     55 {
     56     x.abs();
     57     y.abs();
     58 
     59     if (x == 0 || y == 0) {
     60         return 0;
     61     } else {
     62         do {
     63             if (x < y) {
     64                 int64_t t = x; x = y; y = t;
     65             }
     66             x -= y * (x/y);
     67         } while (x != 0);
     68 
     69         return y;
     70     }
     71 }
     72 
     73 #else
     74 /**
     75  * Calculates the least common multiple of x and y.
     76  */
     77 static int64_t
     78 util_lcm(int64_t x, int64_t y)
     79 {
     80     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
     81     // vol. 2, 1st ed., pp. 298-299
     82     int64_t x1 = x;
     83     int64_t y1 = y;
     84 
     85     int p2 = 0;
     86     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
     87         ++p2;
     88         x1 >>= 1;
     89         y1 >>= 1;
     90     }
     91 
     92     int64_t t;
     93     if ((x1 & 1) == 1) {
     94         t = -y1;
     95     } else {
     96         t = x1;
     97     }
     98 
     99     while (t != 0) {
    100         while ((t & 1) == 0) {
    101             t = t >> 1;
    102         }
    103         if (t > 0) {
    104             x1 = t;
    105         } else {
    106             y1 = -t;
    107         }
    108         t = x1 - y1;
    109     }
    110 
    111     int64_t gcd = x1 << p2;
    112 
    113     // x * y == gcd(x, y) * lcm(x, y)
    114     return x / gcd * y;
    115 }
    116 #endif
    117 
    118 static const UChar gPercent = 0x0025;
    119 static const UChar gColon = 0x003a;
    120 static const UChar gSemicolon = 0x003b;
    121 static const UChar gLineFeed = 0x000a;
    122 
    123 static const UChar gPercentPercent[] =
    124 {
    125     0x25, 0x25, 0
    126 }; /* "%%" */
    127 
    128 static const UChar gNoparse[] =
    129 {
    130     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
    131 }; /* "@noparse" */
    132 
    133 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
    134   : name()
    135   , rules(0)
    136   , owner(_owner)
    137   , fractionRules()
    138   , fIsFractionRuleSet(FALSE)
    139   , fIsPublic(FALSE)
    140   , fIsParseable(TRUE)
    141 {
    142     for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
    143         nonNumericalRules[i] = NULL;
    144     }
    145 
    146     if (U_FAILURE(status)) {
    147         return;
    148     }
    149 
    150     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
    151 
    152     if (description.length() == 0) {
    153         // throw new IllegalArgumentException("Empty rule set description");
    154         status = U_PARSE_ERROR;
    155         return;
    156     }
    157 
    158     // if the description begins with a rule set name (the rule set
    159     // name can be omitted in formatter descriptions that consist
    160     // of only one rule set), copy it out into our "name" member
    161     // and delete it from the description
    162     if (description.charAt(0) == gPercent) {
    163         int32_t pos = description.indexOf(gColon);
    164         if (pos == -1) {
    165             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
    166             status = U_PARSE_ERROR;
    167         } else {
    168             name.setTo(description, 0, pos);
    169             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
    170             }
    171             description.remove(0, pos);
    172         }
    173     } else {
    174         name.setTo(UNICODE_STRING_SIMPLE("%default"));
    175     }
    176 
    177     if (description.length() == 0) {
    178         // throw new IllegalArgumentException("Empty rule set description");
    179         status = U_PARSE_ERROR;
    180     }
    181 
    182     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
    183 
    184     if ( name.endsWith(gNoparse,8) ) {
    185         fIsParseable = FALSE;
    186         name.truncate(name.length()-8); // remove the @noparse from the name
    187     }
    188 
    189     // all of the other members of NFRuleSet are initialized
    190     // by parseRules()
    191 }
    192 
    193 void
    194 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
    195 {
    196     // start by creating a Vector whose elements are Strings containing
    197     // the descriptions of the rules (one rule per element).  The rules
    198     // are separated by semicolons (there's no escape facility: ALL
    199     // semicolons are rule delimiters)
    200 
    201     if (U_FAILURE(status)) {
    202         return;
    203     }
    204 
    205     // ensure we are starting with an empty rule list
    206     rules.deleteAll();
    207 
    208     // dlf - the original code kept a separate description array for no reason,
    209     // so I got rid of it.  The loop was too complex so I simplified it.
    210 
    211     UnicodeString currentDescription;
    212     int32_t oldP = 0;
    213     while (oldP < description.length()) {
    214         int32_t p = description.indexOf(gSemicolon, oldP);
    215         if (p == -1) {
    216             p = description.length();
    217         }
    218         currentDescription.setTo(description, oldP, p - oldP);
    219         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
    220         oldP = p + 1;
    221     }
    222 
    223     // for rules that didn't specify a base value, their base values
    224     // were initialized to 0.  Make another pass through the list and
    225     // set all those rules' base values.  We also remove any special
    226     // rules from the list and put them into their own member variables
    227     int64_t defaultBaseValue = 0;
    228 
    229     // (this isn't a for loop because we might be deleting items from
    230     // the vector-- we want to make sure we only increment i when
    231     // we _didn't_ delete aything from the vector)
    232     int32_t rulesSize = rules.size();
    233     for (int32_t i = 0; i < rulesSize; i++) {
    234         NFRule* rule = rules[i];
    235         int64_t baseValue = rule->getBaseValue();
    236 
    237         if (baseValue == 0) {
    238             // if the rule's base value is 0, fill in a default
    239             // base value (this will be 1 plus the preceding
    240             // rule's base value for regular rule sets, and the
    241             // same as the preceding rule's base value in fraction
    242             // rule sets)
    243             rule->setBaseValue(defaultBaseValue, status);
    244         }
    245         else {
    246             // if it's a regular rule that already knows its base value,
    247             // check to make sure the rules are in order, and update
    248             // the default base value for the next rule
    249             if (baseValue < defaultBaseValue) {
    250                 // throw new IllegalArgumentException("Rules are not in order");
    251                 status = U_PARSE_ERROR;
    252                 return;
    253             }
    254             defaultBaseValue = baseValue;
    255         }
    256         if (!fIsFractionRuleSet) {
    257             ++defaultBaseValue;
    258         }
    259     }
    260 }
    261 
    262 /**
    263  * Set one of the non-numerical rules.
    264  * @param rule The rule to set.
    265  */
    266 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
    267     int64_t baseValue = rule->getBaseValue();
    268     if (baseValue == NFRule::kNegativeNumberRule) {
    269         delete nonNumericalRules[NEGATIVE_RULE_INDEX];
    270         nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
    271     }
    272     else if (baseValue == NFRule::kImproperFractionRule) {
    273         setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
    274     }
    275     else if (baseValue == NFRule::kProperFractionRule) {
    276         setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
    277     }
    278     else if (baseValue == NFRule::kMasterRule) {
    279         setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
    280     }
    281     else if (baseValue == NFRule::kInfinityRule) {
    282         delete nonNumericalRules[INFINITY_RULE_INDEX];
    283         nonNumericalRules[INFINITY_RULE_INDEX] = rule;
    284     }
    285     else if (baseValue == NFRule::kNaNRule) {
    286         delete nonNumericalRules[NAN_RULE_INDEX];
    287         nonNumericalRules[NAN_RULE_INDEX] = rule;
    288     }
    289 }
    290 
    291 /**
    292  * Determine the best fraction rule to use. Rules matching the decimal point from
    293  * DecimalFormatSymbols become the main set of rules to use.
    294  * @param originalIndex The index into nonNumericalRules
    295  * @param newRule The new rule to consider
    296  * @param rememberRule Should the new rule be added to fractionRules.
    297  */
    298 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
    299     if (rememberRule) {
    300         fractionRules.add(newRule);
    301     }
    302     NFRule *bestResult = nonNumericalRules[originalIndex];
    303     if (bestResult == NULL) {
    304         nonNumericalRules[originalIndex] = newRule;
    305     }
    306     else {
    307         // We have more than one. Which one is better?
    308         const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
    309         if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
    310             == newRule->getDecimalPoint())
    311         {
    312             nonNumericalRules[originalIndex] = newRule;
    313         }
    314         // else leave it alone
    315     }
    316 }
    317 
    318 NFRuleSet::~NFRuleSet()
    319 {
    320     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    321         if (i != IMPROPER_FRACTION_RULE_INDEX
    322             && i != PROPER_FRACTION_RULE_INDEX
    323             && i != MASTER_RULE_INDEX)
    324         {
    325             delete nonNumericalRules[i];
    326         }
    327         // else it will be deleted via NFRuleList fractionRules
    328     }
    329 }
    330 
    331 static UBool
    332 util_equalRules(const NFRule* rule1, const NFRule* rule2)
    333 {
    334     if (rule1) {
    335         if (rule2) {
    336             return *rule1 == *rule2;
    337         }
    338     } else if (!rule2) {
    339         return TRUE;
    340     }
    341     return FALSE;
    342 }
    343 
    344 UBool
    345 NFRuleSet::operator==(const NFRuleSet& rhs) const
    346 {
    347     if (rules.size() == rhs.rules.size() &&
    348         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
    349         name == rhs.name) {
    350 
    351         // ...then compare the non-numerical rule lists...
    352         for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    353             if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
    354                 return FALSE;
    355             }
    356         }
    357 
    358         // ...then compare the rule lists...
    359         for (uint32_t i = 0; i < rules.size(); ++i) {
    360             if (*rules[i] != *rhs.rules[i]) {
    361                 return FALSE;
    362             }
    363         }
    364         return TRUE;
    365     }
    366     return FALSE;
    367 }
    368 
    369 void
    370 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
    371     for (uint32_t i = 0; i < rules.size(); ++i) {
    372         rules[i]->setDecimalFormatSymbols(newSymbols, status);
    373     }
    374     // Switch the fraction rules to mirror the DecimalFormatSymbols.
    375     for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
    376         if (nonNumericalRules[nonNumericalIdx]) {
    377             for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
    378                 NFRule *fractionRule = fractionRules[fIdx];
    379                 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
    380                     setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
    381                 }
    382             }
    383         }
    384     }
    385 
    386     for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
    387         NFRule *rule = nonNumericalRules[nnrIdx];
    388         if (rule) {
    389             rule->setDecimalFormatSymbols(newSymbols, status);
    390         }
    391     }
    392 }
    393 
    394 #define RECURSION_LIMIT 64
    395 
    396 void
    397 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
    398 {
    399     if (recursionCount >= RECURSION_LIMIT) {
    400         // stop recursion
    401         status = U_INVALID_STATE_ERROR;
    402         return;
    403     }
    404     const NFRule *rule = findNormalRule(number);
    405     if (rule) { // else error, but can't report it
    406         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
    407     }
    408 }
    409 
    410 void
    411 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
    412 {
    413     if (recursionCount >= RECURSION_LIMIT) {
    414         // stop recursion
    415         status = U_INVALID_STATE_ERROR;
    416         return;
    417     }
    418     const NFRule *rule = findDoubleRule(number);
    419     if (rule) { // else error, but can't report it
    420         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
    421     }
    422 }
    423 
    424 const NFRule*
    425 NFRuleSet::findDoubleRule(double number) const
    426 {
    427     // if this is a fraction rule set, use findFractionRuleSetRule()
    428     if (isFractionRuleSet()) {
    429         return findFractionRuleSetRule(number);
    430     }
    431 
    432     if (uprv_isNaN(number)) {
    433         const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
    434         if (!rule) {
    435             rule = owner->getDefaultNaNRule();
    436         }
    437         return rule;
    438     }
    439 
    440     // if the number is negative, return the negative number rule
    441     // (if there isn't a negative-number rule, we pretend it's a
    442     // positive number)
    443     if (number < 0) {
    444         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
    445             return  nonNumericalRules[NEGATIVE_RULE_INDEX];
    446         } else {
    447             number = -number;
    448         }
    449     }
    450 
    451     if (uprv_isInfinite(number)) {
    452         const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
    453         if (!rule) {
    454             rule = owner->getDefaultInfinityRule();
    455         }
    456         return rule;
    457     }
    458 
    459     // if the number isn't an integer, we use one of the fraction rules...
    460     if (number != uprv_floor(number)) {
    461         // if the number is between 0 and 1, return the proper
    462         // fraction rule
    463         if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
    464             return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
    465         }
    466         // otherwise, return the improper fraction rule
    467         else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
    468             return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
    469         }
    470     }
    471 
    472     // if there's a master rule, use it to format the number
    473     if (nonNumericalRules[MASTER_RULE_INDEX]) {
    474         return nonNumericalRules[MASTER_RULE_INDEX];
    475     }
    476 
    477     // and if we haven't yet returned a rule, use findNormalRule()
    478     // to find the applicable rule
    479     int64_t r = util64_fromDouble(number + 0.5);
    480     return findNormalRule(r);
    481 }
    482 
    483 const NFRule *
    484 NFRuleSet::findNormalRule(int64_t number) const
    485 {
    486     // if this is a fraction rule set, use findFractionRuleSetRule()
    487     // to find the rule (we should only go into this clause if the
    488     // value is 0)
    489     if (fIsFractionRuleSet) {
    490         return findFractionRuleSetRule((double)number);
    491     }
    492 
    493     // if the number is negative, return the negative-number rule
    494     // (if there isn't one, pretend the number is positive)
    495     if (number < 0) {
    496         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
    497             return nonNumericalRules[NEGATIVE_RULE_INDEX];
    498         } else {
    499             number = -number;
    500         }
    501     }
    502 
    503     // we have to repeat the preceding two checks, even though we
    504     // do them in findRule(), because the version of format() that
    505     // takes a long bypasses findRule() and goes straight to this
    506     // function.  This function does skip the fraction rules since
    507     // we know the value is an integer (it also skips the master
    508     // rule, since it's considered a fraction rule.  Skipping the
    509     // master rule in this function is also how we avoid infinite
    510     // recursion)
    511 
    512     // {dlf} unfortunately this fails if there are no rules except
    513     // special rules.  If there are no rules, use the master rule.
    514 
    515     // binary-search the rule list for the applicable rule
    516     // (a rule is used for all values from its base value to
    517     // the next rule's base value)
    518     int32_t hi = rules.size();
    519     if (hi > 0) {
    520         int32_t lo = 0;
    521 
    522         while (lo < hi) {
    523             int32_t mid = (lo + hi) / 2;
    524             if (rules[mid]->getBaseValue() == number) {
    525                 return rules[mid];
    526             }
    527             else if (rules[mid]->getBaseValue() > number) {
    528                 hi = mid;
    529             }
    530             else {
    531                 lo = mid + 1;
    532             }
    533         }
    534         if (hi == 0) { // bad rule set, minimum base > 0
    535             return NULL; // want to throw exception here
    536         }
    537 
    538         NFRule *result = rules[hi - 1];
    539 
    540         // use shouldRollBack() to see whether we need to invoke the
    541         // rollback rule (see shouldRollBack()'s documentation for
    542         // an explanation of the rollback rule).  If we do, roll back
    543         // one rule and return that one instead of the one we'd normally
    544         // return
    545         if (result->shouldRollBack((double)number)) {
    546             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
    547                 return NULL;
    548             }
    549             result = rules[hi - 2];
    550         }
    551         return result;
    552     }
    553     // else use the master rule
    554     return nonNumericalRules[MASTER_RULE_INDEX];
    555 }
    556 
    557 /**
    558  * If this rule is a fraction rule set, this function is used by
    559  * findRule() to select the most appropriate rule for formatting
    560  * the number.  Basically, the base value of each rule in the rule
    561  * set is treated as the denominator of a fraction.  Whichever
    562  * denominator can produce the fraction closest in value to the
    563  * number passed in is the result.  If there's a tie, the earlier
    564  * one in the list wins.  (If there are two rules in a row with the
    565  * same base value, the first one is used when the numerator of the
    566  * fraction would be 1, and the second rule is used the rest of the
    567  * time.
    568  * @param number The number being formatted (which will always be
    569  * a number between 0 and 1)
    570  * @return The rule to use to format this number
    571  */
    572 const NFRule*
    573 NFRuleSet::findFractionRuleSetRule(double number) const
    574 {
    575     // the obvious way to do this (multiply the value being formatted
    576     // by each rule's base value until you get an integral result)
    577     // doesn't work because of rounding error.  This method is more
    578     // accurate
    579 
    580     // find the least common multiple of the rules' base values
    581     // and multiply this by the number being formatted.  This is
    582     // all the precision we need, and we can do all of the rest
    583     // of the math using integer arithmetic
    584     int64_t leastCommonMultiple = rules[0]->getBaseValue();
    585     int64_t numerator;
    586     {
    587         for (uint32_t i = 1; i < rules.size(); ++i) {
    588             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
    589         }
    590         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
    591     }
    592     // for each rule, do the following...
    593     int64_t tempDifference;
    594     int64_t difference = util64_fromDouble(uprv_maxMantissa());
    595     int32_t winner = 0;
    596     for (uint32_t i = 0; i < rules.size(); ++i) {
    597         // "numerator" is the numerator of the fraction if the
    598         // denominator is the LCD.  The numerator if the rule's
    599         // base value is the denominator is "numerator" times the
    600         // base value divided bythe LCD.  Here we check to see if
    601         // that's an integer, and if not, how close it is to being
    602         // an integer.
    603         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
    604 
    605 
    606         // normalize the result of the above calculation: we want
    607         // the numerator's distance from the CLOSEST multiple
    608         // of the LCD
    609         if (leastCommonMultiple - tempDifference < tempDifference) {
    610             tempDifference = leastCommonMultiple - tempDifference;
    611         }
    612 
    613         // if this is as close as we've come, keep track of how close
    614         // that is, and the line number of the rule that did it.  If
    615         // we've scored a direct hit, we don't have to look at any more
    616         // rules
    617         if (tempDifference < difference) {
    618             difference = tempDifference;
    619             winner = i;
    620             if (difference == 0) {
    621                 break;
    622             }
    623         }
    624     }
    625 
    626     // if we have two successive rules that both have the winning base
    627     // value, then the first one (the one we found above) is used if
    628     // the numerator of the fraction is 1 and the second one is used if
    629     // the numerator of the fraction is anything else (this lets us
    630     // do things like "one third"/"two thirds" without haveing to define
    631     // a whole bunch of extra rule sets)
    632     if ((unsigned)(winner + 1) < rules.size() &&
    633         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
    634         double n = ((double)rules[winner]->getBaseValue()) * number;
    635         if (n < 0.5 || n >= 2) {
    636             ++winner;
    637         }
    638     }
    639 
    640     // finally, return the winning rule
    641     return rules[winner];
    642 }
    643 
    644 /**
    645  * Parses a string.  Matches the string to be parsed against each
    646  * of its rules (with a base value less than upperBound) and returns
    647  * the value produced by the rule that matched the most charcters
    648  * in the source string.
    649  * @param text The string to parse
    650  * @param parsePosition The initial position is ignored and assumed
    651  * to be 0.  On exit, this object has been updated to point to the
    652  * first character position this rule set didn't consume.
    653  * @param upperBound Limits the rules that can be allowed to match.
    654  * Only rules whose base values are strictly less than upperBound
    655  * are considered.
    656  * @return The numerical result of parsing this string.  This will
    657  * be the matching rule's base value, composed appropriately with
    658  * the results of matching any of its substitutions.  The object
    659  * will be an instance of Long if it's an integral value; otherwise,
    660  * it will be an instance of Double.  This function always returns
    661  * a valid object: If nothing matched the input string at all,
    662  * this function returns new Long(0), and the parse position is
    663  * left unchanged.
    664  */
    665 #ifdef RBNF_DEBUG
    666 #include <stdio.h>
    667 
    668 static void dumpUS(FILE* f, const UnicodeString& us) {
    669   int len = us.length();
    670   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
    671   if (buf != NULL) {
    672 	  us.extract(0, len, buf);
    673 	  buf[len] = 0;
    674 	  fprintf(f, "%s", buf);
    675 	  uprv_free(buf); //delete[] buf;
    676   }
    677 }
    678 #endif
    679 
    680 UBool
    681 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
    682 {
    683     // try matching each rule in the rule set against the text being
    684     // parsed.  Whichever one matches the most characters is the one
    685     // that determines the value we return.
    686 
    687     result.setLong(0);
    688 
    689     // dump out if there's no text to parse
    690     if (text.length() == 0) {
    691         return 0;
    692     }
    693 
    694     ParsePosition highWaterMark;
    695     ParsePosition workingPos = pos;
    696 
    697 #ifdef RBNF_DEBUG
    698     fprintf(stderr, "<nfrs> %x '", this);
    699     dumpUS(stderr, name);
    700     fprintf(stderr, "' text '");
    701     dumpUS(stderr, text);
    702     fprintf(stderr, "'\n");
    703     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
    704 #endif
    705     // Try each of the negative rules, fraction rules, infinity rules and NaN rules
    706     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    707         if (nonNumericalRules[i]) {
    708             Formattable tempResult;
    709             UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
    710             if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
    711                 result = tempResult;
    712                 highWaterMark = workingPos;
    713             }
    714             workingPos = pos;
    715         }
    716     }
    717 #ifdef RBNF_DEBUG
    718     fprintf(stderr, "<nfrs> continue other with text '");
    719     dumpUS(stderr, text);
    720     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
    721 #endif
    722 
    723     // finally, go through the regular rules one at a time.  We start
    724     // at the end of the list because we want to try matching the most
    725     // sigificant rule first (this helps ensure that we parse
    726     // "five thousand three hundred six" as
    727     // "(five thousand) (three hundred) (six)" rather than
    728     // "((five thousand three) hundred) (six)").  Skip rules whose
    729     // base values are higher than the upper bound (again, this helps
    730     // limit ambiguity by making sure the rules that match a rule's
    731     // are less significant than the rule containing the substitutions)/
    732     {
    733         int64_t ub = util64_fromDouble(upperBound);
    734 #ifdef RBNF_DEBUG
    735         {
    736             char ubstr[64];
    737             util64_toa(ub, ubstr, 64);
    738             char ubstrhex[64];
    739             util64_toa(ub, ubstrhex, 64, 16);
    740             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
    741         }
    742 #endif
    743         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
    744             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
    745                 continue;
    746             }
    747             Formattable tempResult;
    748             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
    749             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
    750                 result = tempResult;
    751                 highWaterMark = workingPos;
    752             }
    753             workingPos = pos;
    754         }
    755     }
    756 #ifdef RBNF_DEBUG
    757     fprintf(stderr, "<nfrs> exit\n");
    758 #endif
    759     // finally, update the parse postion we were passed to point to the
    760     // first character we didn't use, and return the result that
    761     // corresponds to that string of characters
    762     pos = highWaterMark;
    763 
    764     return 1;
    765 }
    766 
    767 void
    768 NFRuleSet::appendRules(UnicodeString& result) const
    769 {
    770     uint32_t i;
    771 
    772     // the rule set name goes first...
    773     result.append(name);
    774     result.append(gColon);
    775     result.append(gLineFeed);
    776 
    777     // followed by the regular rules...
    778     for (i = 0; i < rules.size(); i++) {
    779         rules[i]->_appendRuleText(result);
    780         result.append(gLineFeed);
    781     }
    782 
    783     // followed by the special rules (if they exist)
    784     for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
    785         NFRule *rule = nonNumericalRules[i];
    786         if (nonNumericalRules[i]) {
    787             if (rule->getBaseValue() == NFRule::kImproperFractionRule
    788                 || rule->getBaseValue() == NFRule::kProperFractionRule
    789                 || rule->getBaseValue() == NFRule::kMasterRule)
    790             {
    791                 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
    792                     NFRule *fractionRule = fractionRules[fIdx];
    793                     if (fractionRule->getBaseValue() == rule->getBaseValue()) {
    794                         fractionRule->_appendRuleText(result);
    795                         result.append(gLineFeed);
    796                     }
    797                 }
    798             }
    799             else {
    800                 rule->_appendRuleText(result);
    801                 result.append(gLineFeed);
    802             }
    803         }
    804     }
    805 }
    806 
    807 // utility functions
    808 
    809 int64_t util64_fromDouble(double d) {
    810     int64_t result = 0;
    811     if (!uprv_isNaN(d)) {
    812         double mant = uprv_maxMantissa();
    813         if (d < -mant) {
    814             d = -mant;
    815         } else if (d > mant) {
    816             d = mant;
    817         }
    818         UBool neg = d < 0;
    819         if (neg) {
    820             d = -d;
    821         }
    822         result = (int64_t)uprv_floor(d);
    823         if (neg) {
    824             result = -result;
    825         }
    826     }
    827     return result;
    828 }
    829 
    830 int64_t util64_pow(int32_t r, uint32_t e)  {
    831     if (r == 0) {
    832         return 0;
    833     } else if (e == 0) {
    834         return 1;
    835     } else {
    836         int64_t n = r;
    837         while (--e > 0) {
    838             n *= r;
    839         }
    840         return n;
    841     }
    842 }
    843 
    844 static const uint8_t asciiDigits[] = {
    845     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
    846     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
    847     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
    848     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
    849     0x77u, 0x78u, 0x79u, 0x7au,
    850 };
    851 
    852 static const UChar kUMinus = (UChar)0x002d;
    853 
    854 #ifdef RBNF_DEBUG
    855 static const char kMinus = '-';
    856 
    857 static const uint8_t digitInfo[] = {
    858         0,     0,     0,     0,     0,     0,     0,     0,
    859         0,     0,     0,     0,     0,     0,     0,     0,
    860         0,     0,     0,     0,     0,     0,     0,     0,
    861         0,     0,     0,     0,     0,     0,     0,     0,
    862         0,     0,     0,     0,     0,     0,     0,     0,
    863         0,     0,     0,     0,     0,     0,     0,     0,
    864     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
    865     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
    866         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    867     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    868     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    869     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    870         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    871     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    872     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    873     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    874 };
    875 
    876 int64_t util64_atoi(const char* str, uint32_t radix)
    877 {
    878     if (radix > 36) {
    879         radix = 36;
    880     } else if (radix < 2) {
    881         radix = 2;
    882     }
    883     int64_t lradix = radix;
    884 
    885     int neg = 0;
    886     if (*str == kMinus) {
    887         ++str;
    888         neg = 1;
    889     }
    890     int64_t result = 0;
    891     uint8_t b;
    892     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
    893         result *= lradix;
    894         result += (int32_t)b;
    895     }
    896     if (neg) {
    897         result = -result;
    898     }
    899     return result;
    900 }
    901 
    902 int64_t util64_utoi(const UChar* str, uint32_t radix)
    903 {
    904     if (radix > 36) {
    905         radix = 36;
    906     } else if (radix < 2) {
    907         radix = 2;
    908     }
    909     int64_t lradix = radix;
    910 
    911     int neg = 0;
    912     if (*str == kUMinus) {
    913         ++str;
    914         neg = 1;
    915     }
    916     int64_t result = 0;
    917     UChar c;
    918     uint8_t b;
    919     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
    920         result *= lradix;
    921         result += (int32_t)b;
    922     }
    923     if (neg) {
    924         result = -result;
    925     }
    926     return result;
    927 }
    928 
    929 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
    930 {
    931     if (radix > 36) {
    932         radix = 36;
    933     } else if (radix < 2) {
    934         radix = 2;
    935     }
    936     int64_t base = radix;
    937 
    938     char* p = buf;
    939     if (len && (w < 0) && (radix == 10) && !raw) {
    940         w = -w;
    941         *p++ = kMinus;
    942         --len;
    943     } else if (len && (w == 0)) {
    944         *p++ = (char)raw ? 0 : asciiDigits[0];
    945         --len;
    946     }
    947 
    948     while (len && w != 0) {
    949         int64_t n = w / base;
    950         int64_t m = n * base;
    951         int32_t d = (int32_t)(w-m);
    952         *p++ = raw ? (char)d : asciiDigits[d];
    953         w = n;
    954         --len;
    955     }
    956     if (len) {
    957         *p = 0; // null terminate if room for caller convenience
    958     }
    959 
    960     len = p - buf;
    961     if (*buf == kMinus) {
    962         ++buf;
    963     }
    964     while (--p > buf) {
    965         char c = *p;
    966         *p = *buf;
    967         *buf = c;
    968         ++buf;
    969     }
    970 
    971     return len;
    972 }
    973 #endif
    974 
    975 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
    976 {
    977     if (radix > 36) {
    978         radix = 36;
    979     } else if (radix < 2) {
    980         radix = 2;
    981     }
    982     int64_t base = radix;
    983 
    984     UChar* p = buf;
    985     if (len && (w < 0) && (radix == 10) && !raw) {
    986         w = -w;
    987         *p++ = kUMinus;
    988         --len;
    989     } else if (len && (w == 0)) {
    990         *p++ = (UChar)raw ? 0 : asciiDigits[0];
    991         --len;
    992     }
    993 
    994     while (len && (w != 0)) {
    995         int64_t n = w / base;
    996         int64_t m = n * base;
    997         int32_t d = (int32_t)(w-m);
    998         *p++ = (UChar)(raw ? d : asciiDigits[d]);
    999         w = n;
   1000         --len;
   1001     }
   1002     if (len) {
   1003         *p = 0; // null terminate if room for caller convenience
   1004     }
   1005 
   1006     len = (uint32_t)(p - buf);
   1007     if (*buf == kUMinus) {
   1008         ++buf;
   1009     }
   1010     while (--p > buf) {
   1011         UChar c = *p;
   1012         *p = *buf;
   1013         *buf = c;
   1014         ++buf;
   1015     }
   1016 
   1017     return len;
   1018 }
   1019 
   1020 
   1021 U_NAMESPACE_END
   1022 
   1023 /* U_HAVE_RBNF */
   1024 #endif
   1025 
   1026