Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *   Copyright (C) 1997-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ******************************************************************************
      8 *   file name:  nfrs.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 * Modification history
     14 * Date        Name      Comments
     15 * 10/11/2001  Doug      Ported from ICU4J
     16 */
     17 
     18 #include "nfrs.h"
     19 
     20 #if U_HAVE_RBNF
     21 
     22 #include "unicode/uchar.h"
     23 #include "nfrule.h"
     24 #include "nfrlist.h"
     25 #include "patternprops.h"
     26 #include "putilimp.h"
     27 
     28 #ifdef RBNF_DEBUG
     29 #include "cmemory.h"
     30 #endif
     31 
     32 enum {
     33     /** -x */
     34     NEGATIVE_RULE_INDEX = 0,
     35     /** x.x */
     36     IMPROPER_FRACTION_RULE_INDEX = 1,
     37     /** 0.x */
     38     PROPER_FRACTION_RULE_INDEX = 2,
     39     /** x.0 */
     40     MASTER_RULE_INDEX = 3,
     41     /** Inf */
     42     INFINITY_RULE_INDEX = 4,
     43     /** NaN */
     44     NAN_RULE_INDEX = 5,
     45     NON_NUMERICAL_RULE_LENGTH = 6
     46 };
     47 
     48 U_NAMESPACE_BEGIN
     49 
     50 #if 0
     51 // euclid's algorithm works with doubles
     52 // note, doubles only get us up to one quadrillion or so, which
     53 // isn't as much range as we get with longs.  We probably still
     54 // want either 64-bit math, or BigInteger.
     55 
     56 static int64_t
     57 util_lcm(int64_t x, int64_t y)
     58 {
     59     x.abs();
     60     y.abs();
     61 
     62     if (x == 0 || y == 0) {
     63         return 0;
     64     } else {
     65         do {
     66             if (x < y) {
     67                 int64_t t = x; x = y; y = t;
     68             }
     69             x -= y * (x/y);
     70         } while (x != 0);
     71 
     72         return y;
     73     }
     74 }
     75 
     76 #else
     77 /**
     78  * Calculates the least common multiple of x and y.
     79  */
     80 static int64_t
     81 util_lcm(int64_t x, int64_t y)
     82 {
     83     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
     84     // vol. 2, 1st ed., pp. 298-299
     85     int64_t x1 = x;
     86     int64_t y1 = y;
     87 
     88     int p2 = 0;
     89     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
     90         ++p2;
     91         x1 >>= 1;
     92         y1 >>= 1;
     93     }
     94 
     95     int64_t t;
     96     if ((x1 & 1) == 1) {
     97         t = -y1;
     98     } else {
     99         t = x1;
    100     }
    101 
    102     while (t != 0) {
    103         while ((t & 1) == 0) {
    104             t = t >> 1;
    105         }
    106         if (t > 0) {
    107             x1 = t;
    108         } else {
    109             y1 = -t;
    110         }
    111         t = x1 - y1;
    112     }
    113 
    114     int64_t gcd = x1 << p2;
    115 
    116     // x * y == gcd(x, y) * lcm(x, y)
    117     return x / gcd * y;
    118 }
    119 #endif
    120 
    121 static const UChar gPercent = 0x0025;
    122 static const UChar gColon = 0x003a;
    123 static const UChar gSemicolon = 0x003b;
    124 static const UChar gLineFeed = 0x000a;
    125 
    126 static const UChar gPercentPercent[] =
    127 {
    128     0x25, 0x25, 0
    129 }; /* "%%" */
    130 
    131 static const UChar gNoparse[] =
    132 {
    133     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
    134 }; /* "@noparse" */
    135 
    136 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
    137   : name()
    138   , rules(0)
    139   , owner(_owner)
    140   , fractionRules()
    141   , fIsFractionRuleSet(FALSE)
    142   , fIsPublic(FALSE)
    143   , fIsParseable(TRUE)
    144 {
    145     for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
    146         nonNumericalRules[i] = NULL;
    147     }
    148 
    149     if (U_FAILURE(status)) {
    150         return;
    151     }
    152 
    153     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
    154 
    155     if (description.length() == 0) {
    156         // throw new IllegalArgumentException("Empty rule set description");
    157         status = U_PARSE_ERROR;
    158         return;
    159     }
    160 
    161     // if the description begins with a rule set name (the rule set
    162     // name can be omitted in formatter descriptions that consist
    163     // of only one rule set), copy it out into our "name" member
    164     // and delete it from the description
    165     if (description.charAt(0) == gPercent) {
    166         int32_t pos = description.indexOf(gColon);
    167         if (pos == -1) {
    168             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
    169             status = U_PARSE_ERROR;
    170         } else {
    171             name.setTo(description, 0, pos);
    172             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
    173             }
    174             description.remove(0, pos);
    175         }
    176     } else {
    177         name.setTo(UNICODE_STRING_SIMPLE("%default"));
    178     }
    179 
    180     if (description.length() == 0) {
    181         // throw new IllegalArgumentException("Empty rule set description");
    182         status = U_PARSE_ERROR;
    183     }
    184 
    185     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
    186 
    187     if ( name.endsWith(gNoparse,8) ) {
    188         fIsParseable = FALSE;
    189         name.truncate(name.length()-8); // remove the @noparse from the name
    190     }
    191 
    192     // all of the other members of NFRuleSet are initialized
    193     // by parseRules()
    194 }
    195 
    196 void
    197 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
    198 {
    199     // start by creating a Vector whose elements are Strings containing
    200     // the descriptions of the rules (one rule per element).  The rules
    201     // are separated by semicolons (there's no escape facility: ALL
    202     // semicolons are rule delimiters)
    203 
    204     if (U_FAILURE(status)) {
    205         return;
    206     }
    207 
    208     // ensure we are starting with an empty rule list
    209     rules.deleteAll();
    210 
    211     // dlf - the original code kept a separate description array for no reason,
    212     // so I got rid of it.  The loop was too complex so I simplified it.
    213 
    214     UnicodeString currentDescription;
    215     int32_t oldP = 0;
    216     while (oldP < description.length()) {
    217         int32_t p = description.indexOf(gSemicolon, oldP);
    218         if (p == -1) {
    219             p = description.length();
    220         }
    221         currentDescription.setTo(description, oldP, p - oldP);
    222         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
    223         oldP = p + 1;
    224     }
    225 
    226     // for rules that didn't specify a base value, their base values
    227     // were initialized to 0.  Make another pass through the list and
    228     // set all those rules' base values.  We also remove any special
    229     // rules from the list and put them into their own member variables
    230     int64_t defaultBaseValue = 0;
    231 
    232     // (this isn't a for loop because we might be deleting items from
    233     // the vector-- we want to make sure we only increment i when
    234     // we _didn't_ delete aything from the vector)
    235     int32_t rulesSize = rules.size();
    236     for (int32_t i = 0; i < rulesSize; i++) {
    237         NFRule* rule = rules[i];
    238         int64_t baseValue = rule->getBaseValue();
    239 
    240         if (baseValue == 0) {
    241             // if the rule's base value is 0, fill in a default
    242             // base value (this will be 1 plus the preceding
    243             // rule's base value for regular rule sets, and the
    244             // same as the preceding rule's base value in fraction
    245             // rule sets)
    246             rule->setBaseValue(defaultBaseValue, status);
    247         }
    248         else {
    249             // if it's a regular rule that already knows its base value,
    250             // check to make sure the rules are in order, and update
    251             // the default base value for the next rule
    252             if (baseValue < defaultBaseValue) {
    253                 // throw new IllegalArgumentException("Rules are not in order");
    254                 status = U_PARSE_ERROR;
    255                 return;
    256             }
    257             defaultBaseValue = baseValue;
    258         }
    259         if (!fIsFractionRuleSet) {
    260             ++defaultBaseValue;
    261         }
    262     }
    263 }
    264 
    265 /**
    266  * Set one of the non-numerical rules.
    267  * @param rule The rule to set.
    268  */
    269 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
    270     int64_t baseValue = rule->getBaseValue();
    271     if (baseValue == NFRule::kNegativeNumberRule) {
    272         delete nonNumericalRules[NEGATIVE_RULE_INDEX];
    273         nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
    274     }
    275     else if (baseValue == NFRule::kImproperFractionRule) {
    276         setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
    277     }
    278     else if (baseValue == NFRule::kProperFractionRule) {
    279         setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
    280     }
    281     else if (baseValue == NFRule::kMasterRule) {
    282         setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
    283     }
    284     else if (baseValue == NFRule::kInfinityRule) {
    285         delete nonNumericalRules[INFINITY_RULE_INDEX];
    286         nonNumericalRules[INFINITY_RULE_INDEX] = rule;
    287     }
    288     else if (baseValue == NFRule::kNaNRule) {
    289         delete nonNumericalRules[NAN_RULE_INDEX];
    290         nonNumericalRules[NAN_RULE_INDEX] = rule;
    291     }
    292 }
    293 
    294 /**
    295  * Determine the best fraction rule to use. Rules matching the decimal point from
    296  * DecimalFormatSymbols become the main set of rules to use.
    297  * @param originalIndex The index into nonNumericalRules
    298  * @param newRule The new rule to consider
    299  * @param rememberRule Should the new rule be added to fractionRules.
    300  */
    301 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
    302     if (rememberRule) {
    303         fractionRules.add(newRule);
    304     }
    305     NFRule *bestResult = nonNumericalRules[originalIndex];
    306     if (bestResult == NULL) {
    307         nonNumericalRules[originalIndex] = newRule;
    308     }
    309     else {
    310         // We have more than one. Which one is better?
    311         const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
    312         if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
    313             == newRule->getDecimalPoint())
    314         {
    315             nonNumericalRules[originalIndex] = newRule;
    316         }
    317         // else leave it alone
    318     }
    319 }
    320 
    321 NFRuleSet::~NFRuleSet()
    322 {
    323     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    324         if (i != IMPROPER_FRACTION_RULE_INDEX
    325             && i != PROPER_FRACTION_RULE_INDEX
    326             && i != MASTER_RULE_INDEX)
    327         {
    328             delete nonNumericalRules[i];
    329         }
    330         // else it will be deleted via NFRuleList fractionRules
    331     }
    332 }
    333 
    334 static UBool
    335 util_equalRules(const NFRule* rule1, const NFRule* rule2)
    336 {
    337     if (rule1) {
    338         if (rule2) {
    339             return *rule1 == *rule2;
    340         }
    341     } else if (!rule2) {
    342         return TRUE;
    343     }
    344     return FALSE;
    345 }
    346 
    347 UBool
    348 NFRuleSet::operator==(const NFRuleSet& rhs) const
    349 {
    350     if (rules.size() == rhs.rules.size() &&
    351         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
    352         name == rhs.name) {
    353 
    354         // ...then compare the non-numerical rule lists...
    355         for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    356             if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
    357                 return FALSE;
    358             }
    359         }
    360 
    361         // ...then compare the rule lists...
    362         for (uint32_t i = 0; i < rules.size(); ++i) {
    363             if (*rules[i] != *rhs.rules[i]) {
    364                 return FALSE;
    365             }
    366         }
    367         return TRUE;
    368     }
    369     return FALSE;
    370 }
    371 
    372 void
    373 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
    374     for (uint32_t i = 0; i < rules.size(); ++i) {
    375         rules[i]->setDecimalFormatSymbols(newSymbols, status);
    376     }
    377     // Switch the fraction rules to mirror the DecimalFormatSymbols.
    378     for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
    379         if (nonNumericalRules[nonNumericalIdx]) {
    380             for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
    381                 NFRule *fractionRule = fractionRules[fIdx];
    382                 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
    383                     setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
    384                 }
    385             }
    386         }
    387     }
    388 
    389     for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
    390         NFRule *rule = nonNumericalRules[nnrIdx];
    391         if (rule) {
    392             rule->setDecimalFormatSymbols(newSymbols, status);
    393         }
    394     }
    395 }
    396 
    397 #define RECURSION_LIMIT 64
    398 
    399 void
    400 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
    401 {
    402     if (recursionCount >= RECURSION_LIMIT) {
    403         // stop recursion
    404         status = U_INVALID_STATE_ERROR;
    405         return;
    406     }
    407     const NFRule *rule = findNormalRule(number);
    408     if (rule) { // else error, but can't report it
    409         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
    410     }
    411 }
    412 
    413 void
    414 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
    415 {
    416     if (recursionCount >= RECURSION_LIMIT) {
    417         // stop recursion
    418         status = U_INVALID_STATE_ERROR;
    419         return;
    420     }
    421     const NFRule *rule = findDoubleRule(number);
    422     if (rule) { // else error, but can't report it
    423         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
    424     }
    425 }
    426 
    427 const NFRule*
    428 NFRuleSet::findDoubleRule(double number) const
    429 {
    430     // if this is a fraction rule set, use findFractionRuleSetRule()
    431     if (isFractionRuleSet()) {
    432         return findFractionRuleSetRule(number);
    433     }
    434 
    435     if (uprv_isNaN(number)) {
    436         const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
    437         if (!rule) {
    438             rule = owner->getDefaultNaNRule();
    439         }
    440         return rule;
    441     }
    442 
    443     // if the number is negative, return the negative number rule
    444     // (if there isn't a negative-number rule, we pretend it's a
    445     // positive number)
    446     if (number < 0) {
    447         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
    448             return  nonNumericalRules[NEGATIVE_RULE_INDEX];
    449         } else {
    450             number = -number;
    451         }
    452     }
    453 
    454     if (uprv_isInfinite(number)) {
    455         const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
    456         if (!rule) {
    457             rule = owner->getDefaultInfinityRule();
    458         }
    459         return rule;
    460     }
    461 
    462     // if the number isn't an integer, we use one of the fraction rules...
    463     if (number != uprv_floor(number)) {
    464         // if the number is between 0 and 1, return the proper
    465         // fraction rule
    466         if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
    467             return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
    468         }
    469         // otherwise, return the improper fraction rule
    470         else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
    471             return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
    472         }
    473     }
    474 
    475     // if there's a master rule, use it to format the number
    476     if (nonNumericalRules[MASTER_RULE_INDEX]) {
    477         return nonNumericalRules[MASTER_RULE_INDEX];
    478     }
    479 
    480     // and if we haven't yet returned a rule, use findNormalRule()
    481     // to find the applicable rule
    482     int64_t r = util64_fromDouble(number + 0.5);
    483     return findNormalRule(r);
    484 }
    485 
    486 const NFRule *
    487 NFRuleSet::findNormalRule(int64_t number) const
    488 {
    489     // if this is a fraction rule set, use findFractionRuleSetRule()
    490     // to find the rule (we should only go into this clause if the
    491     // value is 0)
    492     if (fIsFractionRuleSet) {
    493         return findFractionRuleSetRule((double)number);
    494     }
    495 
    496     // if the number is negative, return the negative-number rule
    497     // (if there isn't one, pretend the number is positive)
    498     if (number < 0) {
    499         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
    500             return nonNumericalRules[NEGATIVE_RULE_INDEX];
    501         } else {
    502             number = -number;
    503         }
    504     }
    505 
    506     // we have to repeat the preceding two checks, even though we
    507     // do them in findRule(), because the version of format() that
    508     // takes a long bypasses findRule() and goes straight to this
    509     // function.  This function does skip the fraction rules since
    510     // we know the value is an integer (it also skips the master
    511     // rule, since it's considered a fraction rule.  Skipping the
    512     // master rule in this function is also how we avoid infinite
    513     // recursion)
    514 
    515     // {dlf} unfortunately this fails if there are no rules except
    516     // special rules.  If there are no rules, use the master rule.
    517 
    518     // binary-search the rule list for the applicable rule
    519     // (a rule is used for all values from its base value to
    520     // the next rule's base value)
    521     int32_t hi = rules.size();
    522     if (hi > 0) {
    523         int32_t lo = 0;
    524 
    525         while (lo < hi) {
    526             int32_t mid = (lo + hi) / 2;
    527             if (rules[mid]->getBaseValue() == number) {
    528                 return rules[mid];
    529             }
    530             else if (rules[mid]->getBaseValue() > number) {
    531                 hi = mid;
    532             }
    533             else {
    534                 lo = mid + 1;
    535             }
    536         }
    537         if (hi == 0) { // bad rule set, minimum base > 0
    538             return NULL; // want to throw exception here
    539         }
    540 
    541         NFRule *result = rules[hi - 1];
    542 
    543         // use shouldRollBack() to see whether we need to invoke the
    544         // rollback rule (see shouldRollBack()'s documentation for
    545         // an explanation of the rollback rule).  If we do, roll back
    546         // one rule and return that one instead of the one we'd normally
    547         // return
    548         if (result->shouldRollBack(number)) {
    549             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
    550                 return NULL;
    551             }
    552             result = rules[hi - 2];
    553         }
    554         return result;
    555     }
    556     // else use the master rule
    557     return nonNumericalRules[MASTER_RULE_INDEX];
    558 }
    559 
    560 /**
    561  * If this rule is a fraction rule set, this function is used by
    562  * findRule() to select the most appropriate rule for formatting
    563  * the number.  Basically, the base value of each rule in the rule
    564  * set is treated as the denominator of a fraction.  Whichever
    565  * denominator can produce the fraction closest in value to the
    566  * number passed in is the result.  If there's a tie, the earlier
    567  * one in the list wins.  (If there are two rules in a row with the
    568  * same base value, the first one is used when the numerator of the
    569  * fraction would be 1, and the second rule is used the rest of the
    570  * time.
    571  * @param number The number being formatted (which will always be
    572  * a number between 0 and 1)
    573  * @return The rule to use to format this number
    574  */
    575 const NFRule*
    576 NFRuleSet::findFractionRuleSetRule(double number) const
    577 {
    578     // the obvious way to do this (multiply the value being formatted
    579     // by each rule's base value until you get an integral result)
    580     // doesn't work because of rounding error.  This method is more
    581     // accurate
    582 
    583     // find the least common multiple of the rules' base values
    584     // and multiply this by the number being formatted.  This is
    585     // all the precision we need, and we can do all of the rest
    586     // of the math using integer arithmetic
    587     int64_t leastCommonMultiple = rules[0]->getBaseValue();
    588     int64_t numerator;
    589     {
    590         for (uint32_t i = 1; i < rules.size(); ++i) {
    591             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
    592         }
    593         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
    594     }
    595     // for each rule, do the following...
    596     int64_t tempDifference;
    597     int64_t difference = util64_fromDouble(uprv_maxMantissa());
    598     int32_t winner = 0;
    599     for (uint32_t i = 0; i < rules.size(); ++i) {
    600         // "numerator" is the numerator of the fraction if the
    601         // denominator is the LCD.  The numerator if the rule's
    602         // base value is the denominator is "numerator" times the
    603         // base value divided bythe LCD.  Here we check to see if
    604         // that's an integer, and if not, how close it is to being
    605         // an integer.
    606         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
    607 
    608 
    609         // normalize the result of the above calculation: we want
    610         // the numerator's distance from the CLOSEST multiple
    611         // of the LCD
    612         if (leastCommonMultiple - tempDifference < tempDifference) {
    613             tempDifference = leastCommonMultiple - tempDifference;
    614         }
    615 
    616         // if this is as close as we've come, keep track of how close
    617         // that is, and the line number of the rule that did it.  If
    618         // we've scored a direct hit, we don't have to look at any more
    619         // rules
    620         if (tempDifference < difference) {
    621             difference = tempDifference;
    622             winner = i;
    623             if (difference == 0) {
    624                 break;
    625             }
    626         }
    627     }
    628 
    629     // if we have two successive rules that both have the winning base
    630     // value, then the first one (the one we found above) is used if
    631     // the numerator of the fraction is 1 and the second one is used if
    632     // the numerator of the fraction is anything else (this lets us
    633     // do things like "one third"/"two thirds" without haveing to define
    634     // a whole bunch of extra rule sets)
    635     if ((unsigned)(winner + 1) < rules.size() &&
    636         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
    637         double n = ((double)rules[winner]->getBaseValue()) * number;
    638         if (n < 0.5 || n >= 2) {
    639             ++winner;
    640         }
    641     }
    642 
    643     // finally, return the winning rule
    644     return rules[winner];
    645 }
    646 
    647 /**
    648  * Parses a string.  Matches the string to be parsed against each
    649  * of its rules (with a base value less than upperBound) and returns
    650  * the value produced by the rule that matched the most charcters
    651  * in the source string.
    652  * @param text The string to parse
    653  * @param parsePosition The initial position is ignored and assumed
    654  * to be 0.  On exit, this object has been updated to point to the
    655  * first character position this rule set didn't consume.
    656  * @param upperBound Limits the rules that can be allowed to match.
    657  * Only rules whose base values are strictly less than upperBound
    658  * are considered.
    659  * @return The numerical result of parsing this string.  This will
    660  * be the matching rule's base value, composed appropriately with
    661  * the results of matching any of its substitutions.  The object
    662  * will be an instance of Long if it's an integral value; otherwise,
    663  * it will be an instance of Double.  This function always returns
    664  * a valid object: If nothing matched the input string at all,
    665  * this function returns new Long(0), and the parse position is
    666  * left unchanged.
    667  */
    668 #ifdef RBNF_DEBUG
    669 #include <stdio.h>
    670 
    671 static void dumpUS(FILE* f, const UnicodeString& us) {
    672   int len = us.length();
    673   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
    674   if (buf != NULL) {
    675 	  us.extract(0, len, buf);
    676 	  buf[len] = 0;
    677 	  fprintf(f, "%s", buf);
    678 	  uprv_free(buf); //delete[] buf;
    679   }
    680 }
    681 #endif
    682 
    683 UBool
    684 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
    685 {
    686     // try matching each rule in the rule set against the text being
    687     // parsed.  Whichever one matches the most characters is the one
    688     // that determines the value we return.
    689 
    690     result.setLong(0);
    691 
    692     // dump out if there's no text to parse
    693     if (text.length() == 0) {
    694         return 0;
    695     }
    696 
    697     ParsePosition highWaterMark;
    698     ParsePosition workingPos = pos;
    699 
    700 #ifdef RBNF_DEBUG
    701     fprintf(stderr, "<nfrs> %x '", this);
    702     dumpUS(stderr, name);
    703     fprintf(stderr, "' text '");
    704     dumpUS(stderr, text);
    705     fprintf(stderr, "'\n");
    706     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
    707 #endif
    708     // Try each of the negative rules, fraction rules, infinity rules and NaN rules
    709     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
    710         if (nonNumericalRules[i]) {
    711             Formattable tempResult;
    712             UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
    713             if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
    714                 result = tempResult;
    715                 highWaterMark = workingPos;
    716             }
    717             workingPos = pos;
    718         }
    719     }
    720 #ifdef RBNF_DEBUG
    721     fprintf(stderr, "<nfrs> continue other with text '");
    722     dumpUS(stderr, text);
    723     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
    724 #endif
    725 
    726     // finally, go through the regular rules one at a time.  We start
    727     // at the end of the list because we want to try matching the most
    728     // sigificant rule first (this helps ensure that we parse
    729     // "five thousand three hundred six" as
    730     // "(five thousand) (three hundred) (six)" rather than
    731     // "((five thousand three) hundred) (six)").  Skip rules whose
    732     // base values are higher than the upper bound (again, this helps
    733     // limit ambiguity by making sure the rules that match a rule's
    734     // are less significant than the rule containing the substitutions)/
    735     {
    736         int64_t ub = util64_fromDouble(upperBound);
    737 #ifdef RBNF_DEBUG
    738         {
    739             char ubstr[64];
    740             util64_toa(ub, ubstr, 64);
    741             char ubstrhex[64];
    742             util64_toa(ub, ubstrhex, 64, 16);
    743             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
    744         }
    745 #endif
    746         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
    747             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
    748                 continue;
    749             }
    750             Formattable tempResult;
    751             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
    752             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
    753                 result = tempResult;
    754                 highWaterMark = workingPos;
    755             }
    756             workingPos = pos;
    757         }
    758     }
    759 #ifdef RBNF_DEBUG
    760     fprintf(stderr, "<nfrs> exit\n");
    761 #endif
    762     // finally, update the parse postion we were passed to point to the
    763     // first character we didn't use, and return the result that
    764     // corresponds to that string of characters
    765     pos = highWaterMark;
    766 
    767     return 1;
    768 }
    769 
    770 void
    771 NFRuleSet::appendRules(UnicodeString& result) const
    772 {
    773     uint32_t i;
    774 
    775     // the rule set name goes first...
    776     result.append(name);
    777     result.append(gColon);
    778     result.append(gLineFeed);
    779 
    780     // followed by the regular rules...
    781     for (i = 0; i < rules.size(); i++) {
    782         rules[i]->_appendRuleText(result);
    783         result.append(gLineFeed);
    784     }
    785 
    786     // followed by the special rules (if they exist)
    787     for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
    788         NFRule *rule = nonNumericalRules[i];
    789         if (nonNumericalRules[i]) {
    790             if (rule->getBaseValue() == NFRule::kImproperFractionRule
    791                 || rule->getBaseValue() == NFRule::kProperFractionRule
    792                 || rule->getBaseValue() == NFRule::kMasterRule)
    793             {
    794                 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
    795                     NFRule *fractionRule = fractionRules[fIdx];
    796                     if (fractionRule->getBaseValue() == rule->getBaseValue()) {
    797                         fractionRule->_appendRuleText(result);
    798                         result.append(gLineFeed);
    799                     }
    800                 }
    801             }
    802             else {
    803                 rule->_appendRuleText(result);
    804                 result.append(gLineFeed);
    805             }
    806         }
    807     }
    808 }
    809 
    810 // utility functions
    811 
    812 int64_t util64_fromDouble(double d) {
    813     int64_t result = 0;
    814     if (!uprv_isNaN(d)) {
    815         double mant = uprv_maxMantissa();
    816         if (d < -mant) {
    817             d = -mant;
    818         } else if (d > mant) {
    819             d = mant;
    820         }
    821         UBool neg = d < 0;
    822         if (neg) {
    823             d = -d;
    824         }
    825         result = (int64_t)uprv_floor(d);
    826         if (neg) {
    827             result = -result;
    828         }
    829     }
    830     return result;
    831 }
    832 
    833 uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
    834     if (base == 0) {
    835         return 0;
    836     }
    837     uint64_t result = 1;
    838     uint64_t pow = base;
    839     while (true) {
    840         if ((exponent & 1) == 1) {
    841             result *= pow;
    842         }
    843         exponent >>= 1;
    844         if (exponent == 0) {
    845             break;
    846         }
    847         pow *= pow;
    848     }
    849     return result;
    850 }
    851 
    852 static const uint8_t asciiDigits[] = {
    853     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
    854     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
    855     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
    856     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
    857     0x77u, 0x78u, 0x79u, 0x7au,
    858 };
    859 
    860 static const UChar kUMinus = (UChar)0x002d;
    861 
    862 #ifdef RBNF_DEBUG
    863 static const char kMinus = '-';
    864 
    865 static const uint8_t digitInfo[] = {
    866         0,     0,     0,     0,     0,     0,     0,     0,
    867         0,     0,     0,     0,     0,     0,     0,     0,
    868         0,     0,     0,     0,     0,     0,     0,     0,
    869         0,     0,     0,     0,     0,     0,     0,     0,
    870         0,     0,     0,     0,     0,     0,     0,     0,
    871         0,     0,     0,     0,     0,     0,     0,     0,
    872     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
    873     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
    874         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    875     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    876     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    877     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    878         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
    879     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
    880     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
    881     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
    882 };
    883 
    884 int64_t util64_atoi(const char* str, uint32_t radix)
    885 {
    886     if (radix > 36) {
    887         radix = 36;
    888     } else if (radix < 2) {
    889         radix = 2;
    890     }
    891     int64_t lradix = radix;
    892 
    893     int neg = 0;
    894     if (*str == kMinus) {
    895         ++str;
    896         neg = 1;
    897     }
    898     int64_t result = 0;
    899     uint8_t b;
    900     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
    901         result *= lradix;
    902         result += (int32_t)b;
    903     }
    904     if (neg) {
    905         result = -result;
    906     }
    907     return result;
    908 }
    909 
    910 int64_t util64_utoi(const UChar* str, uint32_t radix)
    911 {
    912     if (radix > 36) {
    913         radix = 36;
    914     } else if (radix < 2) {
    915         radix = 2;
    916     }
    917     int64_t lradix = radix;
    918 
    919     int neg = 0;
    920     if (*str == kUMinus) {
    921         ++str;
    922         neg = 1;
    923     }
    924     int64_t result = 0;
    925     UChar c;
    926     uint8_t b;
    927     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
    928         result *= lradix;
    929         result += (int32_t)b;
    930     }
    931     if (neg) {
    932         result = -result;
    933     }
    934     return result;
    935 }
    936 
    937 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
    938 {
    939     if (radix > 36) {
    940         radix = 36;
    941     } else if (radix < 2) {
    942         radix = 2;
    943     }
    944     int64_t base = radix;
    945 
    946     char* p = buf;
    947     if (len && (w < 0) && (radix == 10) && !raw) {
    948         w = -w;
    949         *p++ = kMinus;
    950         --len;
    951     } else if (len && (w == 0)) {
    952         *p++ = (char)raw ? 0 : asciiDigits[0];
    953         --len;
    954     }
    955 
    956     while (len && w != 0) {
    957         int64_t n = w / base;
    958         int64_t m = n * base;
    959         int32_t d = (int32_t)(w-m);
    960         *p++ = raw ? (char)d : asciiDigits[d];
    961         w = n;
    962         --len;
    963     }
    964     if (len) {
    965         *p = 0; // null terminate if room for caller convenience
    966     }
    967 
    968     len = p - buf;
    969     if (*buf == kMinus) {
    970         ++buf;
    971     }
    972     while (--p > buf) {
    973         char c = *p;
    974         *p = *buf;
    975         *buf = c;
    976         ++buf;
    977     }
    978 
    979     return len;
    980 }
    981 #endif
    982 
    983 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
    984 {
    985     if (radix > 36) {
    986         radix = 36;
    987     } else if (radix < 2) {
    988         radix = 2;
    989     }
    990     int64_t base = radix;
    991 
    992     UChar* p = buf;
    993     if (len && (w < 0) && (radix == 10) && !raw) {
    994         w = -w;
    995         *p++ = kUMinus;
    996         --len;
    997     } else if (len && (w == 0)) {
    998         *p++ = (UChar)raw ? 0 : asciiDigits[0];
    999         --len;
   1000     }
   1001 
   1002     while (len && (w != 0)) {
   1003         int64_t n = w / base;
   1004         int64_t m = n * base;
   1005         int32_t d = (int32_t)(w-m);
   1006         *p++ = (UChar)(raw ? d : asciiDigits[d]);
   1007         w = n;
   1008         --len;
   1009     }
   1010     if (len) {
   1011         *p = 0; // null terminate if room for caller convenience
   1012     }
   1013 
   1014     len = (uint32_t)(p - buf);
   1015     if (*buf == kUMinus) {
   1016         ++buf;
   1017     }
   1018     while (--p > buf) {
   1019         UChar c = *p;
   1020         *p = *buf;
   1021         *buf = c;
   1022         ++buf;
   1023     }
   1024 
   1025     return len;
   1026 }
   1027 
   1028 
   1029 U_NAMESPACE_END
   1030 
   1031 /* U_HAVE_RBNF */
   1032 #endif
   1033