Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2016, International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 
     11 package android.icu.text;
     12 
     13 import java.math.BigInteger;
     14 import java.text.FieldPosition;
     15 import java.text.ParsePosition;
     16 import java.util.Arrays;
     17 import java.util.HashMap;
     18 import java.util.Locale;
     19 import java.util.Map;
     20 import java.util.MissingResourceException;
     21 import java.util.Set;
     22 
     23 import android.icu.impl.ICUData;
     24 import android.icu.impl.ICUDebug;
     25 import android.icu.impl.ICUResourceBundle;
     26 import android.icu.impl.PatternProps;
     27 import android.icu.lang.UCharacter;
     28 import android.icu.math.BigDecimal;
     29 import android.icu.util.ULocale;
     30 import android.icu.util.ULocale.Category;
     31 import android.icu.util.UResourceBundle;
     32 import android.icu.util.UResourceBundleIterator;
     33 
     34 
     35 /**
     36  * <p>A class that formats numbers according to a set of rules. This number formatter is
     37  * typically used for spelling out numeric values in words (e.g., 25,3476 as
     38  * &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
     39  * cents soixante-seize&quot; or
     40  * &quot;funfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
     41  * other complicated formatting tasks, such as formatting a number of seconds as hours,
     42  * minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).</p>
     43  *
     44  * <p>The resources contain three predefined formatters for each locale: spellout, which
     45  * spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
     46  * appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
     47  * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
     48  * &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
     49  * by supplying programmer-defined rule sets.</p>
     50  *
     51  * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
     52  * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
     53  * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
     54  * Each rule has a string of output text and a value or range of values it is applicable to.
     55  * In a typical spellout rule set, the first twenty rules are the words for the numbers from
     56  * 0 to 19:</p>
     57  *
     58  * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
     59  * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
     60  *
     61  * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
     62  * we only have to supply the words for the multiples of 10:</p>
     63  *
     64  * <pre>20: twenty[-&gt;&gt;];
     65  * 30: thirty{-&gt;&gt;];
     66  * 40: forty[-&gt;&gt;];
     67  * 50: fifty[-&gt;&gt;];
     68  * 60: sixty[-&gt;&gt;];
     69  * 70: seventy[-&gt;&gt;];
     70  * 80: eighty[-&gt;&gt;];
     71  * 90: ninety[-&gt;&gt;];</pre>
     72  *
     73  * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
     74  * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
     75  * to all numbers from its own base value to one less than the next rule's base value. The
     76  * &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the formatter to
     77  * isolate the number's ones digit, format it using this same set of rules, and place the
     78  * result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
     79  * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
     80  * is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
     81  *
     82  * <p>For even larger numbers, we can actually look up several parts of the number in the
     83  * list:</p>
     84  *
     85  * <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
     86  *
     87  * <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
     88  * the hundreds digit (and any digits to its left), formats it using this same rule set, and
     89  * places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
     90  * &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
     91  * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
     92  * which is the highest power of 10 that is less than or equal to the base value (the user
     93  * can change this). To fill in the substitutions, the formatter divides the number being
     94  * formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
     95  * substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
     96  * of the brackets changes similarly: text in brackets is omitted if the value being
     97  * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
     98  * if a substitution is filled in with text that includes another substitution, that
     99  * substitution is also filled in.</p>
    100  *
    101  * <p>This rule covers values up to 999, at which point we add another rule:</p>
    102  *
    103  * <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
    104  *
    105  * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
    106  * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
    107  * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
    108  *
    109  * <pre>1,000,000: &lt;&lt; million[ &gt;&gt;];
    110  * 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
    111  * 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
    112  * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
    113  *
    114  * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
    115  * are ignored by the rule parser. The last rule in the list is customarily treated as an
    116  * &quot;overflow rule,&quot; applying to everything from its base value on up, and often (as
    117  * in this example) being used to print out an error message or default representation.
    118  * Notice also that the size of the major groupings in large numbers is controlled by the
    119  * spacing of the rules: because in English we group numbers by thousand, the higher rules
    120  * are separated from each other by a factor of 1,000.</p>
    121  *
    122  * <p>To see how these rules actually work in practice, consider the following example:
    123  * Formatting 25,430 with this rule set would work like this:</p>
    124  *
    125  * <table border="0" width="630">
    126  *   <tr>
    127  *     <td style="width: 21;"></td>
    128  *     <td style="width: 257; vertical-align: top;"><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
    129  *     <td style="width: 340; vertical-align: top;">[the rule whose base value is 1,000 is applicable to 25,340]</td>
    130  *   </tr>
    131  *   <tr>
    132  *     <td style="width: 21;"></td>
    133  *     <td style="width: 257; vertical-align: top;"><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
    134  *     <td style="width: 340; vertical-align: top;">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
    135  *   </tr>
    136  *   <tr>
    137  *     <td style="width: 21;"></td>
    138  *     <td style="width: 257; vertical-align: top;">twenty-<strong>five</strong> thousand &gt;&gt;</td>
    139  *     <td style="width: 340; vertical-align: top;">[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
    140  *   </tr>
    141  *   <tr>
    142  *     <td style="width: 21;"></td>
    143  *     <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
    144  *     <td style="width: 340; vertical-align: top;">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
    145  *   </tr>
    146  *   <tr>
    147  *     <td style="width: 21;"></td>
    148  *     <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
    149  *     <td style="width: 340; vertical-align: top;">[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
    150  *   </tr>
    151  *   <tr>
    152  *     <td style="width: 21;"></td>
    153  *     <td style="width: 257; vertical-align: top;">twenty-five thousand three hundred <strong>forty</strong></td>
    154  *     <td style="width: 340; vertical-align: top;">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
    155  *     evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
    156  *   </tr>
    157  * </table>
    158  *
    159  * <p>The above syntax suffices only to format positive integers. To format negative numbers,
    160  * we add a special rule:</p>
    161  *
    162  * <pre>-x: minus &gt;&gt;;</pre>
    163  *
    164  * <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
    165  * where the base value would be. This rule is used to format all negative numbers. the
    166  * &gt;&gt; token here means &quot;find the number's absolute value, format it with these
    167  * rules, and put the result here.&quot;</p>
    168  *
    169  * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
    170  * parts:</p>
    171  *
    172  * <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
    173  *
    174  * <p>This rule is used for all positive non-integers (negative non-integers pass through the
    175  * negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
    176  * the number's integral part, and the &gt;&gt; to the number's fractional part. The
    177  * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
    178  * formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
    179  *
    180  * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
    181  *
    182  * <p>There is actually much more flexibility built into the rule language than the
    183  * description above shows. A formatter may own multiple rule sets, which can be selected by
    184  * the caller, and which can use each other to fill in their substitutions. Substitutions can
    185  * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
    186  * used to alter a rule's divisor in various ways. And there is provision for much more
    187  * flexible fraction handling. A complete description of the rule syntax follows:</p>
    188  *
    189  * <hr>
    190  *
    191  * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
    192  * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
    193  * set name must begin with a % sign. Rule sets with names that begin with a single % sign
    194  * are <em>public:</em> the caller can specify that they be used to format and parse numbers.
    195  * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
    196  * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
    197  *
    198  * <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
    199  * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
    200  * description which is used to define equivalences for lenient parsing. For more information
    201  * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
    202  * see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning
    203  * in collation rules, such as '&amp;', have no particular meaning when appearing outside
    204  * of the <tt>lenient-parse</tt> rule set.</p>
    205  *
    206  * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
    207  * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
    208  * These parameters are controlled by the description syntax, which consists of a <em>rule
    209  * descriptor,</em> a colon, and a <em>rule body.</em></p>
    210  *
    211  * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
    212  * name of a token):</p>
    213  *
    214  * <table border="0" width="100%">
    215  *   <tr>
    216  *     <td style="width: 5%; vertical-align: top;"></td>
    217  *     <td style="width: 8%; vertical-align: top;"><em>bv</em>:</td>
    218  *     <td valign="top"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
    219  *     number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
    220  *     which are ignored. The rule's divisor is the highest power of 10 less than or equal to
    221  *     the base value.</td>
    222  *   </tr>
    223  *   <tr>
    224  *     <td style="width: 5%; vertical-align: top;"></td>
    225  *     <td style="width: 8%; vertical-align: top;"><em>bv</em>/<em>rad</em>:</td>
    226  *     <td valign="top"><em>bv</em> specifies the rule's base value. The rule's divisor is the
    227  *     highest power of <em>rad</em> less than or equal to the base value.</td>
    228  *   </tr>
    229  *   <tr>
    230  *     <td style="width: 5%; vertical-align: top;"></td>
    231  *     <td style="width: 8%; vertical-align: top;"><em>bv</em>&gt;:</td>
    232  *     <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
    233  *     let the radix be 10, and the exponent be the highest exponent of the radix that yields a
    234  *     result less than or equal to the base value. Every &gt; character after the base value
    235  *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
    236  *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
    237  *   </tr>
    238  *   <tr>
    239  *     <td style="width: 5%; vertical-align: top;"></td>
    240  *     <td style="width: 8%; vertical-align: top;"><em>bv</em>/<em>rad</em>&gt;:</td>
    241  *     <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
    242  *     let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
    243  *     yields a result less than or equal to the base value. Every &gt; character after the radix
    244  *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
    245  *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
    246  *   </tr>
    247  *   <tr>
    248  *     <td style="width: 5%; vertical-align: top;"></td>
    249  *     <td style="width: 8%; vertical-align: top;">-x:</td>
    250  *     <td valign="top">The rule is a negative-number rule.</td>
    251  *   </tr>
    252  *   <tr>
    253  *     <td style="width: 5%; vertical-align: top;"></td>
    254  *     <td style="width: 8%; vertical-align: top;">x.x:</td>
    255  *     <td valign="top">The rule is an <em>improper fraction rule</em>. If the full stop in
    256  *     the middle of the rule name is replaced with the decimal point
    257  *     that is used in the language or DecimalFormatSymbols, then that rule will
    258  *     have precedence when formatting and parsing this rule. For example, some
    259  *     languages use the comma, and can thus be written as x,x instead. For example,
    260  *     you can use "x.x: &lt;&lt; point &gt;&gt;;x,x: &lt;&lt; comma &gt;&gt;;" to
    261  *     handle the decimal point that matches the language's natural spelling of
    262  *     the punctuation of either the full stop or comma.</td>
    263  *   </tr>
    264  *   <tr>
    265  *     <td style="width: 5%; vertical-align: top;"></td>
    266  *     <td style="width: 8%; vertical-align: top;">0.x:</td>
    267  *     <td valign="top">The rule is a <em>proper fraction rule</em>. If the full stop in
    268  *     the middle of the rule name is replaced with the decimal point
    269  *     that is used in the language or DecimalFormatSymbols, then that rule will
    270  *     have precedence when formatting and parsing this rule. For example, some
    271  *     languages use the comma, and can thus be written as 0,x instead. For example,
    272  *     you can use "0.x: point &gt;&gt;;0,x: comma &gt;&gt;;" to
    273  *     handle the decimal point that matches the language's natural spelling of
    274  *     the punctuation of either the full stop or comma</td>
    275  *   </tr>
    276  *   <tr>
    277  *     <td style="width: 5%; vertical-align: top;"></td>
    278  *     <td style="width: 8%; vertical-align: top;">x.0:</td>
    279  *     <td valign="top">The rule is a <em>master rule</em>. If the full stop in
    280  *     the middle of the rule name is replaced with the decimal point
    281  *     that is used in the language or DecimalFormatSymbols, then that rule will
    282  *     have precedence when formatting and parsing this rule. For example, some
    283  *     languages use the comma, and can thus be written as x,0 instead. For example,
    284  *     you can use "x.0: &lt;&lt; point;x,0: &lt;&lt; comma;" to
    285  *     handle the decimal point that matches the language's natural spelling of
    286  *     the punctuation of either the full stop or comma</td>
    287  *   </tr>
    288  *   <tr>
    289  *     <td style="width: 5%; vertical-align: top;"></td>
    290  *     <td style="width: 8%; vertical-align: top;">Inf:</td>
    291  *     <td style="vertical-align: top;">The rule for infinity.</td>
    292  *   </tr>
    293  *   <tr>
    294  *     <td style="width: 5%; vertical-align: top;"></td>
    295  *     <td style="width: 8%; vertical-align: top;">NaN:</td>
    296  *     <td style="vertical-align: top;">The rule for an IEEE 754 NaN (not a number).</td>
    297  *   </tr>
    298  *   <tr>
    299  *     <td style="width: 5%; vertical-align: top;"></td>
    300  *     <td style="width: 8%; vertical-align: top;"><em>nothing</em></td>
    301  *     <td style="vertical-align: top;">If the rule's rule descriptor is left out, the base value is one plus the
    302  *     preceding rule's base value (or zero if this is the first rule in the list) in a normal
    303  *     rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
    304  *     base value.</td>
    305  *   </tr>
    306  * </table>
    307  *
    308  * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
    309  * on whether it is used to format a number's integral part (or the whole number) or a
    310  * number's fractional part. Using a rule set to format a rule's fractional part makes it a
    311  * fraction rule set.</p>
    312  *
    313  * <p>Which rule is used to format a number is defined according to one of the following
    314  * algorithms: If the rule set is a regular rule set, do the following:
    315  *
    316  * <ul>
    317  *   <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
    318  *     use the master rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
    319  *     the master rule is ignored.)</li>
    320  *   <li>If the number is negative, use the negative-number rule.</li>
    321  *   <li>If the number has a fractional part and is greater than 1, use the improper fraction
    322  *     rule.</li>
    323  *   <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
    324  *     rule.</li>
    325  *   <li>Binary-search the rule list for the rule with the highest base value less than or equal
    326  *     to the number. If that rule has two substitutions, its base value is not an even multiple
    327  *     of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
    328  *     rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
    329  * </ul>
    330  *
    331  * <p>If the rule set is a fraction rule set, do the following:
    332  *
    333  * <ul>
    334  *   <li>Ignore negative-number and fraction rules.</li>
    335  *   <li>For each rule in the list, multiply the number being formatted (which will always be
    336  *     between 0 and 1) by the rule's base value. Keep track of the distance between the result
    337  *     the nearest integer.</li>
    338  *   <li>Use the rule that produced the result closest to zero in the above calculation. In the
    339  *     event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
    340  *     to try each rule's base value as a possible denominator of a fraction. Whichever
    341  *     denominator produces the fraction closest in value to the number being formatted wins.) If
    342  *     the rule following the matching rule has the same base value, use it if the numerator of
    343  *     the fraction is anything other than 1; if the numerator is 1, use the original matching
    344  *     rule. (This is to allow singular and plural forms of the rule text without a lot of extra
    345  *     hassle.)</li>
    346  * </ul>
    347  *
    348  * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
    349  * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
    350  * brackets. The brackets denote optional text (and may also include one or both
    351  * substitutions). The exact meanings of the substitution tokens, and under what conditions
    352  * optional text is omitted, depend on the syntax of the substitution token and the context.
    353  * The rest of the text in a rule body is literal text that is output when the rule matches
    354  * the number being formatted.</p>
    355  *
    356  * <p>A substitution token begins and ends with a <em>token character.</em> The token
    357  * character and the context together specify a mathematical operation to be performed on the
    358  * number being formatted. An optional <em>substitution descriptor </em>specifies how the
    359  * value resulting from that operation is used to fill in the substitution. The position of
    360  * the substitution token in the rule body specifies the location of the resultant text in
    361  * the original rule text.</p>
    362  *
    363  * <p>The meanings of the substitution token characters are as follows:</p>
    364  *
    365  * <table border="0" width="100%">
    366  *   <tr>
    367  *     <td style="width: 37;"></td>
    368  *     <td style="width: 23;">&gt;&gt;</td>
    369  *     <td style="width: 165; vertical-align: top;">in normal rule</td>
    370  *     <td>Divide the number by the rule's divisor and format the remainder</td>
    371  *   </tr>
    372  *   <tr>
    373  *     <td style="width: 37;"></td>
    374  *     <td style="width: 23;"></td>
    375  *     <td style="width: 165; vertical-align: top;">in negative-number rule</td>
    376  *     <td>Find the absolute value of the number and format the result</td>
    377  *   </tr>
    378  *   <tr>
    379  *     <td style="width: 37;"></td>
    380  *     <td style="width: 23;"></td>
    381  *     <td style="width: 165; vertical-align: top;">in fraction or master rule</td>
    382  *     <td>Isolate the number's fractional part and format it.</td>
    383  *   </tr>
    384  *   <tr>
    385  *     <td style="width: 37;"></td>
    386  *     <td style="width: 23;"></td>
    387  *     <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td>
    388  *     <td>Not allowed.</td>
    389  *   </tr>
    390  *   <tr>
    391  *     <td style="width: 37;"></td>
    392  *     <td style="width: 23;">&gt;&gt;&gt;</td>
    393  *     <td style="width: 165; vertical-align: top;">in normal rule</td>
    394  *     <td>Divide the number by the rule's divisor and format the remainder,
    395  *       but bypass the normal rule-selection process and just use the
    396  *       rule that precedes this one in this rule list.</td>
    397  *   </tr>
    398  *   <tr>
    399  *     <td style="width: 37;"></td>
    400  *     <td style="width: 23;"></td>
    401  *     <td style="width: 165; vertical-align: top;">in all other rules</td>
    402  *     <td>Not allowed.</td>
    403  *   </tr>
    404  *   <tr>
    405  *     <td style="width: 37;"></td>
    406  *     <td style="width: 23;">&lt;&lt;</td>
    407  *     <td style="width: 165; vertical-align: top;">in normal rule</td>
    408  *     <td>Divide the number by the rule's divisor and format the quotient</td>
    409  *   </tr>
    410  *   <tr>
    411  *     <td style="width: 37;"></td>
    412  *     <td style="width: 23;"></td>
    413  *     <td style="width: 165; vertical-align: top;">in negative-number rule</td>
    414  *     <td>Not allowed.</td>
    415  *   </tr>
    416  *   <tr>
    417  *     <td style="width: 37;"></td>
    418  *     <td style="width: 23;"></td>
    419  *     <td style="width: 165; vertical-align: top;">in fraction or master rule</td>
    420  *     <td>Isolate the number's integral part and format it.</td>
    421  *   </tr>
    422  *   <tr>
    423  *     <td style="width: 37;"></td>
    424  *     <td style="width: 23;"></td>
    425  *     <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td>
    426  *     <td>Multiply the number by the rule's base value and format the result.</td>
    427  *   </tr>
    428  *   <tr>
    429  *     <td style="width: 37;"></td>
    430  *     <td style="width: 23;">==</td>
    431  *     <td style="width: 165; vertical-align: top;">in all rule sets</td>
    432  *     <td>Format the number unchanged</td>
    433  *   </tr>
    434  *   <tr>
    435  *     <td style="width: 37;"></td>
    436  *     <td style="width: 23;">[]</td>
    437  *     <td style="width: 165; vertical-align: top;">in normal rule</td>
    438  *     <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
    439  *   </tr>
    440  *   <tr>
    441  *     <td style="width: 37;"></td>
    442  *     <td style="width: 23;"></td>
    443  *     <td style="width: 165; vertical-align: top;">in negative-number rule</td>
    444  *     <td>Not allowed.</td>
    445  *   </tr>
    446  *   <tr>
    447  *     <td style="width: 37;"></td>
    448  *     <td style="width: 23;"></td>
    449  *     <td style="width: 165; vertical-align: top;">in improper-fraction rule</td>
    450  *     <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
    451  *     x.x rule and a 0.x rule)</td>
    452  *   </tr>
    453  *   <tr>
    454  *     <td style="width: 37;"></td>
    455  *     <td style="width: 23;"></td>
    456  *     <td style="width: 165; vertical-align: top;">in master rule</td>
    457  *     <td>Omit the optional text if the number is an integer (same as specifying both an x.x
    458  *     rule and an x.0 rule)</td>
    459  *   </tr>
    460  *   <tr>
    461  *     <td style="width: 37;"></td>
    462  *     <td style="width: 23;"></td>
    463  *     <td style="width: 165; vertical-align: top;">in proper-fraction rule</td>
    464  *     <td>Not allowed.</td>
    465  *   </tr>
    466  *   <tr>
    467  *     <td style="width: 37;"></td>
    468  *     <td style="width: 23;"></td>
    469  *     <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td>
    470  *     <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
    471  *   </tr>
    472  *   <tr>
    473  *     <td style="width: 37;">$(cardinal,<i>plural syntax</i>)$</td>
    474  *     <td style="width: 23;"></td>
    475  *     <td style="width: 165; vertical-align: top;">in all rule sets</td>
    476  *     <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
    477  *     exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
    478  *     This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated
    479  *     as the same base value for parsing.</td>
    480  *   </tr>
    481  *   <tr>
    482  *     <td style="width: 37;">$(ordinal,<i>plural syntax</i>)$</td>
    483  *     <td style="width: 23;"></td>
    484  *     <td style="width: 165; vertical-align: top;">in all rule sets</td>
    485  *     <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
    486  *     exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
    487  *     This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated
    488  *     as the same base value for parsing.</td>
    489  *   </tr>
    490  * </table>
    491  *
    492  * <p>The substitution descriptor (i.e., the text between the token characters) may take one
    493  * of three forms:</p>
    494  *
    495  * <table border="0" width="100%">
    496  *   <tr>
    497  *     <td style="width: 42;"></td>
    498  *     <td style="width: 166; vertical-align: top;">a rule set name</td>
    499  *     <td>Perform the mathematical operation on the number, and format the result using the
    500  *     named rule set.</td>
    501  *   </tr>
    502  *   <tr>
    503  *     <td style="width: 42;"></td>
    504  *     <td style="width: 166; vertical-align: top;">a DecimalFormat pattern</td>
    505  *     <td>Perform the mathematical operation on the number, and format the result using a
    506  *     DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
    507  *   </tr>
    508  *   <tr>
    509  *     <td style="width: 42;"></td>
    510  *     <td style="width: 166; vertical-align: top;">nothing</td>
    511  *     <td>Perform the mathematical operation on the number, and format the result using the rule
    512  *     set containing the current rule, except:<ul>
    513  *       <li>You can't have an empty substitution descriptor with a == substitution.</li>
    514  *       <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
    515  *         format the result one digit at a time using the rule set containing the current rule.</li>
    516  *       <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
    517  *         fraction rule set, format the result using the default rule set for this formatter.</li>
    518  *     </ul>
    519  *     </td>
    520  *   </tr>
    521  * </table>
    522  *
    523  * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
    524  * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
    525  * the apostrophe is ignored, but all text after it becomes significant (this is how you can
    526  * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
    527  * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
    528  * names. The characters beginning a substitution token are always treated as the beginning
    529  * of a substitution token.</p>
    530  *
    531  * <p>See the resource data and the demo program for annotated examples of real rule sets
    532  * using these features.</p>
    533  *
    534  * @author Richard Gillam
    535  * @see NumberFormat
    536  * @see DecimalFormat
    537  * @see PluralFormat
    538  * @see PluralRules
    539  * @hide Only a subset of ICU is exposed in Android
    540  */
    541 public class RuleBasedNumberFormat extends NumberFormat {
    542 
    543     //-----------------------------------------------------------------------
    544     // constants
    545     //-----------------------------------------------------------------------
    546 
    547     // Generated by serialver from JDK 1.4.1_01
    548     static final long serialVersionUID = -7664252765575395068L;
    549 
    550     /**
    551      * Selector code that tells the constructor to create a spellout formatter
    552      */
    553     public static final int SPELLOUT = 1;
    554 
    555     /**
    556      * Selector code that tells the constructor to create an ordinal formatter
    557      */
    558     public static final int ORDINAL = 2;
    559 
    560     /**
    561      * Selector code that tells the constructor to create a duration formatter
    562      */
    563     public static final int DURATION = 3;
    564 
    565     /**
    566      * Selector code that tells the constructor to create a numbering system formatter
    567      */
    568     public static final int NUMBERING_SYSTEM = 4;
    569 
    570     //-----------------------------------------------------------------------
    571     // data members
    572     //-----------------------------------------------------------------------
    573 
    574     /**
    575      * The formatter's rule sets.
    576      */
    577     private transient NFRuleSet[] ruleSets = null;
    578 
    579     /**
    580      * The formatter's rule names mapped to rule sets.
    581      */
    582     private transient Map<String, NFRuleSet> ruleSetsMap = null;
    583 
    584     /**
    585      * A pointer to the formatter's default rule set.  This is always included
    586      * in ruleSets.
    587      */
    588     private transient NFRuleSet defaultRuleSet = null;
    589 
    590     /**
    591      * The formatter's locale.  This is used to create DecimalFormatSymbols and
    592      * Collator objects.
    593      * @serial
    594      */
    595     private ULocale locale = null;
    596 
    597     /**
    598      * The formatter's rounding mode.
    599      * @serial
    600      */
    601     private int roundingMode = BigDecimal.ROUND_UNNECESSARY;
    602 
    603     /**
    604      * Collator to be used in lenient parsing.  This variable is lazy-evaluated:
    605      * the collator is actually created the first time the client does a parse
    606      * with lenient-parse mode turned on.
    607      */
    608     private transient RbnfLenientScannerProvider scannerProvider = null;
    609 
    610     // flag to mark whether we've previously looked for a scanner and failed
    611     private transient boolean lookedForScanner;
    612 
    613     /**
    614      * The DecimalFormatSymbols object that any DecimalFormat objects this
    615      * formatter uses should use.  This variable is lazy-evaluated: it isn't
    616      * filled in if the rule set never uses a DecimalFormat pattern.
    617      */
    618     private transient DecimalFormatSymbols decimalFormatSymbols = null;
    619 
    620     /**
    621      * The NumberFormat used when lenient parsing numbers.  This needs to reflect
    622      * the locale.  This is lazy-evaluated, like decimalFormatSymbols.  It is
    623      * here so it can be shared by different NFSubstitutions.
    624      */
    625     private transient DecimalFormat decimalFormat = null;
    626 
    627     /**
    628      * The rule used when dealing with infinity. This is lazy-evaluated, and derived from decimalFormat.
    629      * It is here so it can be shared by different NFRuleSets.
    630      */
    631     private transient NFRule defaultInfinityRule = null;
    632 
    633     /**
    634      * The rule used when dealing with IEEE 754 NaN. This is lazy-evaluated, and derived from decimalFormat.
    635      * It is here so it can be shared by different NFRuleSets.
    636      */
    637     private transient NFRule defaultNaNRule = null;
    638 
    639     /**
    640      * Flag specifying whether lenient parse mode is on or off.  Off by default.
    641      * @serial
    642      */
    643     private boolean lenientParse = false;
    644 
    645     /**
    646      * If the description specifies lenient-parse rules, they're stored here until
    647      * the collator is created.
    648      */
    649     private transient String lenientParseRules;
    650 
    651     /**
    652      * If the description specifies post-process rules, they're stored here until
    653      * post-processing is required.
    654      */
    655     private transient String postProcessRules;
    656 
    657     /**
    658      * Post processor lazily constructed from the postProcessRules.
    659      */
    660     private transient RBNFPostProcessor postProcessor;
    661 
    662     /**
    663      * Localizations for rule set names.
    664      * @serial
    665      */
    666     private Map<String, String[]> ruleSetDisplayNames;
    667 
    668     /**
    669      * The public rule set names;
    670      * @serial
    671      */
    672     private String[] publicRuleSetNames;
    673 
    674     /**
    675      * Data for handling context-based capitalization
    676      */
    677     private boolean capitalizationInfoIsSet = false;
    678     private boolean capitalizationForListOrMenu = false;
    679     private boolean capitalizationForStandAlone = false;
    680     private transient BreakIterator capitalizationBrkIter = null;
    681 
    682 
    683     private static final boolean DEBUG  =  ICUDebug.enabled("rbnf");
    684 
    685     //-----------------------------------------------------------------------
    686     // constructors
    687     //-----------------------------------------------------------------------
    688 
    689     /**
    690      * Creates a RuleBasedNumberFormat that behaves according to the description
    691      * passed in.  The formatter uses the default <code>FORMAT</code> locale.
    692      * @param description A description of the formatter's desired behavior.
    693      * See the class documentation for a complete explanation of the description
    694      * syntax.
    695      * @see Category#FORMAT
    696      */
    697     public RuleBasedNumberFormat(String description) {
    698         locale = ULocale.getDefault(Category.FORMAT);
    699         init(description, null);
    700     }
    701 
    702     /**
    703      * Creates a RuleBasedNumberFormat that behaves according to the description
    704      * passed in.  The formatter uses the default <code>FORMAT</code> locale.
    705      * <p>
    706      * The localizations data provides information about the public
    707      * rule sets and their localized display names for different
    708      * locales. The first element in the list is an array of the names
    709      * of the public rule sets.  The first element in this array is
    710      * the initial default ruleset.  The remaining elements in the
    711      * list are arrays of localizations of the names of the public
    712      * rule sets.  Each of these is one longer than the initial array,
    713      * with the first String being the ULocale ID, and the remaining
    714      * Strings being the localizations of the rule set names, in the
    715      * same order as the initial array.
    716      * @param description A description of the formatter's desired behavior.
    717      * See the class documentation for a complete explanation of the description
    718      * syntax.
    719      * @param localizations a list of localizations for the rule set
    720      * names in the description.
    721      * @see Category#FORMAT
    722      */
    723     public RuleBasedNumberFormat(String description, String[][] localizations) {
    724         locale = ULocale.getDefault(Category.FORMAT);
    725         init(description, localizations);
    726     }
    727 
    728     /**
    729      * Creates a RuleBasedNumberFormat that behaves according to the description
    730      * passed in.  The formatter uses the specified locale to determine the
    731      * characters to use when formatting in numerals, and to define equivalences
    732      * for lenient parsing.
    733      * @param description A description of the formatter's desired behavior.
    734      * See the class documentation for a complete explanation of the description
    735      * syntax.
    736      * @param locale A locale, which governs which characters are used for
    737      * formatting values in numerals, and which characters are equivalent in
    738      * lenient parsing.
    739      */
    740     public RuleBasedNumberFormat(String description, Locale locale) {
    741         this(description, ULocale.forLocale(locale));
    742     }
    743 
    744     /**
    745      * Creates a RuleBasedNumberFormat that behaves according to the description
    746      * passed in.  The formatter uses the specified locale to determine the
    747      * characters to use when formatting in numerals, and to define equivalences
    748      * for lenient parsing.
    749      * @param description A description of the formatter's desired behavior.
    750      * See the class documentation for a complete explanation of the description
    751      * syntax.
    752      * @param locale A locale, which governs which characters are used for
    753      * formatting values in numerals, and which characters are equivalent in
    754      * lenient parsing.
    755      */
    756     public RuleBasedNumberFormat(String description, ULocale locale) {
    757         this.locale = locale;
    758         init(description, null);
    759     }
    760 
    761     /**
    762      * Creates a RuleBasedNumberFormat that behaves according to the description
    763      * passed in.  The formatter uses the specified locale to determine the
    764      * characters to use when formatting in numerals, and to define equivalences
    765      * for lenient parsing.
    766      * <p>
    767      * The localizations data provides information about the public
    768      * rule sets and their localized display names for different
    769      * locales. The first element in the list is an array of the names
    770      * of the public rule sets.  The first element in this array is
    771      * the initial default ruleset.  The remaining elements in the
    772      * list are arrays of localizations of the names of the public
    773      * rule sets.  Each of these is one longer than the initial array,
    774      * with the first String being the ULocale ID, and the remaining
    775      * Strings being the localizations of the rule set names, in the
    776      * same order as the initial array.
    777      * @param description A description of the formatter's desired behavior.
    778      * See the class documentation for a complete explanation of the description
    779      * syntax.
    780      * @param localizations a list of localizations for the rule set names in the description.
    781      * @param locale A ULocale that governs which characters are used for
    782      * formatting values in numerals, and determines which characters are equivalent in
    783      * lenient parsing.
    784      */
    785     public RuleBasedNumberFormat(String description, String[][] localizations, ULocale locale) {
    786         this.locale = locale;
    787         init(description, localizations);
    788     }
    789 
    790     /**
    791      * Creates a RuleBasedNumberFormat from a predefined description.  The selector
    792      * code chooses among three possible predefined formats: spellout, ordinal,
    793      * and duration.
    794      * @param locale The locale for the formatter.
    795      * @param format A selector code specifying which kind of formatter to create for that
    796      * locale.  There are three legal values: SPELLOUT, which creates a formatter that
    797      * spells out a value in words in the desired language, ORDINAL, which attaches
    798      * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
    799      * and DURATION, which formats a duration in seconds as hours, minutes, and seconds.
    800      */
    801     public RuleBasedNumberFormat(Locale locale, int format) {
    802         this(ULocale.forLocale(locale), format);
    803     }
    804 
    805     /**
    806      * Creates a RuleBasedNumberFormat from a predefined description.  The selector
    807      * code chooses among three possible predefined formats: spellout, ordinal,
    808      * and duration.
    809      * @param locale The locale for the formatter.
    810      * @param format A selector code specifying which kind of formatter to create for that
    811      * locale.  There are four legal values: SPELLOUT, which creates a formatter that
    812      * spells out a value in words in the desired language, ORDINAL, which attaches
    813      * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
    814      * DURATION, which formats a duration in seconds as hours, minutes, and seconds, and
    815      * NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering
    816      * systems such as the Hebrew numbering system, or for Roman numerals, etc..
    817      */
    818     public RuleBasedNumberFormat(ULocale locale, int format) {
    819         this.locale = locale;
    820 
    821         ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle.
    822             getBundleInstance(ICUData.ICU_RBNF_BASE_NAME, locale);
    823 
    824         // TODO: determine correct actual/valid locale.  Note ambiguity
    825         // here -- do actual/valid refer to pattern, DecimalFormatSymbols,
    826         // or Collator?
    827         ULocale uloc = bundle.getULocale();
    828         setLocale(uloc, uloc);
    829 
    830         StringBuilder description = new StringBuilder();
    831         String[][] localizations = null;
    832 
    833         try {
    834             ICUResourceBundle rules = bundle.getWithFallback("RBNFRules/"+rulenames[format-1]);
    835             UResourceBundleIterator it = rules.getIterator();
    836             while (it.hasNext()) {
    837                description.append(it.nextString());
    838             }
    839         }
    840         catch (MissingResourceException e1) {
    841         }
    842 
    843         // We use findTopLevel() instead of get() because
    844         // it's faster when we know that it's usually going to fail.
    845         UResourceBundle locNamesBundle = bundle.findTopLevel(locnames[format - 1]);
    846         if (locNamesBundle != null) {
    847             localizations = new String[locNamesBundle.getSize()][];
    848             for (int i = 0; i < localizations.length; ++i) {
    849                 localizations[i] = locNamesBundle.get(i).getStringArray();
    850             }
    851         }
    852         // else there are no localized names. It's not that important.
    853 
    854         init(description.toString(), localizations);
    855     }
    856 
    857     private static final String[] rulenames = {
    858         "SpelloutRules", "OrdinalRules", "DurationRules", "NumberingSystemRules",
    859     };
    860     private static final String[] locnames = {
    861         "SpelloutLocalizations", "OrdinalLocalizations", "DurationLocalizations", "NumberingSystemLocalizations",
    862     };
    863 
    864     /**
    865      * Creates a RuleBasedNumberFormat from a predefined description.  Uses the
    866      * default <code>FORMAT</code> locale.
    867      * @param format A selector code specifying which kind of formatter to create.
    868      * There are three legal values: SPELLOUT, which creates a formatter that spells
    869      * out a value in words in the default locale's language, ORDINAL, which attaches
    870      * an ordinal suffix from the default locale's language to a numeral, and
    871      * DURATION, which formats a duration in seconds as hours, minutes, and seconds always rounding down.
    872      * or NUMBERING_SYSTEM, which is used for alternate numbering systems such as Hebrew.
    873      * @see Category#FORMAT
    874      */
    875     public RuleBasedNumberFormat(int format) {
    876         this(ULocale.getDefault(Category.FORMAT), format);
    877     }
    878 
    879     //-----------------------------------------------------------------------
    880     // boilerplate
    881     //-----------------------------------------------------------------------
    882 
    883     /**
    884      * Duplicates this formatter.
    885      * @return A RuleBasedNumberFormat that is equal to this one.
    886      */
    887     @Override
    888     public Object clone() {
    889         return super.clone();
    890     }
    891 
    892     /**
    893      * Tests two RuleBasedNumberFormats for equality.
    894      * @param that The formatter to compare against this one.
    895      * @return true if the two formatters have identical behavior.
    896      */
    897     @Override
    898     public boolean equals(Object that) {
    899         // if the other object isn't a RuleBasedNumberFormat, that's
    900         // all we need to know
    901         // Test for capitalization info equality is adequately handled
    902         // by the NumberFormat test for capitalizationSetting equality;
    903         // the info here is just derived from that.
    904         if (!(that instanceof RuleBasedNumberFormat)) {
    905             return false;
    906         } else {
    907             // cast the other object's pointer to a pointer to a
    908             // RuleBasedNumberFormat
    909             RuleBasedNumberFormat that2 = (RuleBasedNumberFormat)that;
    910 
    911             // compare their locales and lenient-parse modes
    912             if (!locale.equals(that2.locale) || lenientParse != that2.lenientParse) {
    913                 return false;
    914             }
    915 
    916             // if that succeeds, then compare their rule set lists
    917             if (ruleSets.length != that2.ruleSets.length) {
    918                 return false;
    919             }
    920             for (int i = 0; i < ruleSets.length; i++) {
    921                 if (!ruleSets[i].equals(that2.ruleSets[i])) {
    922                     return false;
    923                 }
    924             }
    925 
    926             return true;
    927         }
    928     }
    929 
    930     /**
    931      * Mock implementation of hashCode(). This implementation always returns a constant
    932      * value. When Java assertion is enabled, this method triggers an assertion failure.
    933      * @deprecated This API is ICU internal only.
    934      * @hide draft / provisional / internal are hidden on Android
    935      */
    936     @Override
    937     @Deprecated
    938     public int hashCode() {
    939         return super.hashCode();
    940     }
    941 
    942     /**
    943      * Generates a textual description of this formatter.
    944      * @return a String containing a rule set that will produce a RuleBasedNumberFormat
    945      * with identical behavior to this one.  This won't necessarily be identical
    946      * to the rule set description that was originally passed in, but will produce
    947      * the same result.
    948      */
    949     @Override
    950     public String toString() {
    951 
    952         // accumulate the descriptions of all the rule sets in a
    953         // StringBuffer, then cast it to a String and return it
    954         StringBuilder result = new StringBuilder();
    955         for (NFRuleSet ruleSet : ruleSets) {
    956             result.append(ruleSet.toString());
    957         }
    958         return result.toString();
    959     }
    960 
    961     /**
    962      * Writes this object to a stream.
    963      * @param out The stream to write to.
    964      */
    965     private void writeObject(java.io.ObjectOutputStream out)
    966         throws java.io.IOException {
    967         // we just write the textual description to the stream, so we
    968         // have an implementation-independent streaming format
    969         out.writeUTF(this.toString());
    970         out.writeObject(this.locale);
    971         out.writeInt(this.roundingMode);
    972     }
    973 
    974     /**
    975      * Reads this object in from a stream.
    976      * @param in The stream to read from.
    977      */
    978     private void readObject(java.io.ObjectInputStream in)
    979         throws java.io.IOException {
    980 
    981         // read the description in from the stream
    982         String description = in.readUTF();
    983         ULocale loc;
    984 
    985         try {
    986             loc = (ULocale) in.readObject();
    987         } catch (Exception e) {
    988             loc = ULocale.getDefault(Category.FORMAT);
    989         }
    990         try {
    991             roundingMode = in.readInt();
    992         } catch (Exception ignored) {
    993         }
    994 
    995         // build a brand-new RuleBasedNumberFormat from the description,
    996         // then steal its substructure.  This object's substructure and
    997         // the temporary RuleBasedNumberFormat drop on the floor and
    998         // get swept up by the garbage collector
    999         RuleBasedNumberFormat temp = new RuleBasedNumberFormat(description, loc);
   1000         ruleSets = temp.ruleSets;
   1001         ruleSetsMap = temp.ruleSetsMap;
   1002         defaultRuleSet = temp.defaultRuleSet;
   1003         publicRuleSetNames = temp.publicRuleSetNames;
   1004         decimalFormatSymbols = temp.decimalFormatSymbols;
   1005         decimalFormat = temp.decimalFormat;
   1006         locale = temp.locale;
   1007         defaultInfinityRule = temp.defaultInfinityRule;
   1008         defaultNaNRule = temp.defaultNaNRule;
   1009     }
   1010 
   1011 
   1012     //-----------------------------------------------------------------------
   1013     // public API functions
   1014     //-----------------------------------------------------------------------
   1015 
   1016     /**
   1017      * Returns a list of the names of all of this formatter's public rule sets.
   1018      * @return A list of the names of all of this formatter's public rule sets.
   1019      */
   1020     public String[] getRuleSetNames() {
   1021         return publicRuleSetNames.clone();
   1022     }
   1023 
   1024     /**
   1025      * Return a list of locales for which there are locale-specific display names
   1026      * for the rule sets in this formatter.  If there are no localized display names, return null.
   1027      * @return an array of the ULocales for which there is rule set display name information
   1028      */
   1029     public ULocale[] getRuleSetDisplayNameLocales() {
   1030         if (ruleSetDisplayNames != null) {
   1031             Set<String> s = ruleSetDisplayNames.keySet();
   1032             String[] locales = s.toArray(new String[s.size()]);
   1033             Arrays.sort(locales, String.CASE_INSENSITIVE_ORDER);
   1034             ULocale[] result = new ULocale[locales.length];
   1035             for (int i = 0; i < locales.length; ++i) {
   1036                 result[i] = new ULocale(locales[i]);
   1037             }
   1038             return result;
   1039         }
   1040         return null;
   1041     }
   1042 
   1043     private String[] getNameListForLocale(ULocale loc) {
   1044         if (loc != null && ruleSetDisplayNames != null) {
   1045             String[] localeNames = { loc.getBaseName(), ULocale.getDefault(Category.DISPLAY).getBaseName() };
   1046             for (String lname : localeNames) {
   1047                 while (lname.length() > 0) {
   1048                     String[] names = ruleSetDisplayNames.get(lname);
   1049                     if (names != null) {
   1050                         return names;
   1051                     }
   1052                     lname = ULocale.getFallback(lname);
   1053                 }
   1054             }
   1055         }
   1056         return null;
   1057     }
   1058 
   1059     /**
   1060      * Return the rule set display names for the provided locale.  These are in the same order
   1061      * as those returned by getRuleSetNames.  The locale is matched against the locales for
   1062      * which there is display name data, using normal fallback rules.  If no locale matches,
   1063      * the default display names are returned.  (These are the internal rule set names minus
   1064      * the leading '%'.)
   1065      * @return an array of the locales that have display name information
   1066      * @see #getRuleSetNames
   1067      */
   1068     public String[] getRuleSetDisplayNames(ULocale loc) {
   1069         String[] names = getNameListForLocale(loc);
   1070         if (names != null) {
   1071             return names.clone();
   1072         }
   1073         names = getRuleSetNames();
   1074         for (int i = 0; i < names.length; ++i) {
   1075             names[i] = names[i].substring(1);
   1076         }
   1077         return names;
   1078     }
   1079 
   1080     /**
   1081      * Return the rule set display names for the current default <code>DISPLAY</code> locale.
   1082      * @return an array of the display names
   1083      * @see #getRuleSetDisplayNames(ULocale)
   1084      * @see Category#DISPLAY
   1085      */
   1086     public String[] getRuleSetDisplayNames() {
   1087         return getRuleSetDisplayNames(ULocale.getDefault(Category.DISPLAY));
   1088     }
   1089 
   1090     /**
   1091      * Return the rule set display name for the provided rule set and locale.
   1092      * The locale is matched against the locales for which there is display name data, using
   1093      * normal fallback rules.  If no locale matches, the default display name is returned.
   1094      * @return the display name for the rule set
   1095      * @see #getRuleSetDisplayNames
   1096      * @throws IllegalArgumentException if ruleSetName is not a valid rule set name for this format
   1097      */
   1098     public String getRuleSetDisplayName(String ruleSetName, ULocale loc) {
   1099         String[] rsnames = publicRuleSetNames;
   1100         for (int ix = 0; ix < rsnames.length; ++ix) {
   1101             if (rsnames[ix].equals(ruleSetName)) {
   1102                 String[] names = getNameListForLocale(loc);
   1103                 if (names != null) {
   1104                     return names[ix];
   1105                 }
   1106                 return rsnames[ix].substring(1);
   1107             }
   1108         }
   1109         throw new IllegalArgumentException("unrecognized rule set name: " + ruleSetName);
   1110     }
   1111 
   1112     /**
   1113      * Return the rule set display name for the provided rule set in the current default <code>DISPLAY</code> locale.
   1114      * @return the display name for the rule set
   1115      * @see #getRuleSetDisplayName(String,ULocale)
   1116      * @see Category#DISPLAY
   1117      */
   1118     public String getRuleSetDisplayName(String ruleSetName) {
   1119         return getRuleSetDisplayName(ruleSetName, ULocale.getDefault(Category.DISPLAY));
   1120     }
   1121 
   1122     /**
   1123      * Formats the specified number according to the specified rule set.
   1124      * @param number The number to format.
   1125      * @param ruleSet The name of the rule set to format the number with.
   1126      * This must be the name of a valid public rule set for this formatter.
   1127      * @return A textual representation of the number.
   1128      */
   1129     public String format(double number, String ruleSet) throws IllegalArgumentException {
   1130         if (ruleSet.startsWith("%%")) {
   1131             throw new IllegalArgumentException("Can't use internal rule set");
   1132         }
   1133         return adjustForContext(format(number, findRuleSet(ruleSet)));
   1134     }
   1135 
   1136     /**
   1137      * Formats the specified number according to the specified rule set.
   1138      * (If the specified rule set specifies a master ["x.0"] rule, this function
   1139      * ignores it.  Convert the number to a double first if you ned it.)  This
   1140      * function preserves all the precision in the long-- it doesn't convert it
   1141      * to a double.
   1142      * @param number The number to format.
   1143      * @param ruleSet The name of the rule set to format the number with.
   1144      * This must be the name of a valid public rule set for this formatter.
   1145      * @return A textual representation of the number.
   1146      */
   1147     public String format(long number, String ruleSet) throws IllegalArgumentException {
   1148         if (ruleSet.startsWith("%%")) {
   1149             throw new IllegalArgumentException("Can't use internal rule set");
   1150         }
   1151         return adjustForContext(format(number, findRuleSet(ruleSet)));
   1152     }
   1153 
   1154     /**
   1155      * Formats the specified number using the formatter's default rule set.
   1156      * (The default rule set is the last public rule set defined in the description.)
   1157      * @param number The number to format.
   1158      * @param toAppendTo A StringBuffer that the result should be appended to.
   1159      * @param ignore This function doesn't examine or update the field position.
   1160      * @return toAppendTo
   1161      */
   1162     @Override
   1163     public StringBuffer format(double number,
   1164                                StringBuffer toAppendTo,
   1165                                FieldPosition ignore) {
   1166         // this is one of the inherited format() methods.  Since it doesn't
   1167         // have a way to select the rule set to use, it just uses the
   1168         // default one
   1169         // Note, the BigInteger/BigDecimal methods below currently go through this.
   1170         if (toAppendTo.length() == 0) {
   1171             toAppendTo.append(adjustForContext(format(number, defaultRuleSet)));
   1172         } else {
   1173             // appending to other text, don't capitalize
   1174             toAppendTo.append(format(number, defaultRuleSet));
   1175         }
   1176         return toAppendTo;
   1177     }
   1178 
   1179     /**
   1180      * Formats the specified number using the formatter's default rule set.
   1181      * (The default rule set is the last public rule set defined in the description.)
   1182      * (If the specified rule set specifies a master ["x.0"] rule, this function
   1183      * ignores it.  Convert the number to a double first if you ned it.)  This
   1184      * function preserves all the precision in the long-- it doesn't convert it
   1185      * to a double.
   1186      * @param number The number to format.
   1187      * @param toAppendTo A StringBuffer that the result should be appended to.
   1188      * @param ignore This function doesn't examine or update the field position.
   1189      * @return toAppendTo
   1190      */
   1191     @Override
   1192     public StringBuffer format(long number,
   1193                                StringBuffer toAppendTo,
   1194                                FieldPosition ignore) {
   1195         // this is one of the inherited format() methods.  Since it doesn't
   1196         // have a way to select the rule set to use, it just uses the
   1197         // default one
   1198         if (toAppendTo.length() == 0) {
   1199             toAppendTo.append(adjustForContext(format(number, defaultRuleSet)));
   1200         } else {
   1201             // appending to other text, don't capitalize
   1202             toAppendTo.append(format(number, defaultRuleSet));
   1203         }
   1204         return toAppendTo;
   1205     }
   1206 
   1207     /**
   1208      * <strong style="font-family: helvetica; color: red;">NEW</strong>
   1209      * Implement android.icu.text.NumberFormat:
   1210      * Format a BigInteger.
   1211      */
   1212     @Override
   1213     public StringBuffer format(BigInteger number,
   1214                                StringBuffer toAppendTo,
   1215                                FieldPosition pos) {
   1216         return format(new android.icu.math.BigDecimal(number), toAppendTo, pos);
   1217     }
   1218 
   1219     /**
   1220      * <strong style="font-family: helvetica; color: red;">NEW</strong>
   1221      * Implement android.icu.text.NumberFormat:
   1222      * Format a BigDecimal.
   1223      */
   1224     @Override
   1225     public StringBuffer format(java.math.BigDecimal number,
   1226                                StringBuffer toAppendTo,
   1227                                FieldPosition pos) {
   1228         return format(new android.icu.math.BigDecimal(number), toAppendTo, pos);
   1229     }
   1230 
   1231     private static final android.icu.math.BigDecimal MAX_VALUE = android.icu.math.BigDecimal.valueOf(Long.MAX_VALUE);
   1232     private static final android.icu.math.BigDecimal MIN_VALUE = android.icu.math.BigDecimal.valueOf(Long.MIN_VALUE);
   1233 
   1234     /**
   1235      * <strong style="font-family: helvetica; color: red;">NEW</strong>
   1236      * Implement android.icu.text.NumberFormat:
   1237      * Format a BigDecimal.
   1238      */
   1239     @Override
   1240     public StringBuffer format(android.icu.math.BigDecimal number,
   1241                                StringBuffer toAppendTo,
   1242                                FieldPosition pos) {
   1243         if (MIN_VALUE.compareTo(number) > 0 || MAX_VALUE.compareTo(number) < 0) {
   1244             // We're outside of our normal range that this framework can handle.
   1245             // The DecimalFormat will provide more accurate results.
   1246             return getDecimalFormat().format(number, toAppendTo, pos);
   1247         }
   1248         if (number.scale() == 0) {
   1249             return format(number.longValue(), toAppendTo, pos);
   1250         }
   1251         return format(number.doubleValue(), toAppendTo, pos);
   1252     }
   1253 
   1254     /**
   1255      * Parses the specified string, beginning at the specified position, according
   1256      * to this formatter's rules.  This will match the string against all of the
   1257      * formatter's public rule sets and return the value corresponding to the longest
   1258      * parseable substring.  This function's behavior is affected by the lenient
   1259      * parse mode.
   1260      * @param text The string to parse
   1261      * @param parsePosition On entry, contains the position of the first character
   1262      * in "text" to examine.  On exit, has been updated to contain the position
   1263      * of the first character in "text" that wasn't consumed by the parse.
   1264      * @return The number that corresponds to the parsed text.  This will be an
   1265      * instance of either Long or Double, depending on whether the result has a
   1266      * fractional part.
   1267      * @see #setLenientParseMode
   1268      */
   1269     @Override
   1270     public Number parse(String text, ParsePosition parsePosition) {
   1271 
   1272         // parsePosition tells us where to start parsing.  We copy the
   1273         // text in the string from here to the end inro a new string,
   1274         // and create a new ParsePosition and result variable to use
   1275         // for the duration of the parse operation
   1276         String workingText = text.substring(parsePosition.getIndex());
   1277         ParsePosition workingPos = new ParsePosition(0);
   1278         Number tempResult = null;
   1279 
   1280         // keep track of the largest number of characters consumed in
   1281         // the various trials, and the result that corresponds to it
   1282         Number result = NFRule.ZERO;
   1283         ParsePosition highWaterMark = new ParsePosition(workingPos.getIndex());
   1284 
   1285         // iterate over the public rule sets (beginning with the default one)
   1286         // and try parsing the text with each of them.  Keep track of which
   1287         // one consumes the most characters: that's the one that determines
   1288         // the result we return
   1289         for (int i = ruleSets.length - 1; i >= 0; i--) {
   1290             // skip private or unparseable rule sets
   1291             if (!ruleSets[i].isPublic() || !ruleSets[i].isParseable()) {
   1292                 continue;
   1293             }
   1294 
   1295             // try parsing the string with the rule set.  If it gets past the
   1296             // high-water mark, update the high-water mark and the result
   1297             tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE);
   1298             if (workingPos.getIndex() > highWaterMark.getIndex()) {
   1299                 result = tempResult;
   1300                 highWaterMark.setIndex(workingPos.getIndex());
   1301             }
   1302             // commented out because this API on ParsePosition doesn't exist in 1.1.x
   1303             //            if (workingPos.getErrorIndex() > highWaterMark.getErrorIndex()) {
   1304             //                highWaterMark.setErrorIndex(workingPos.getErrorIndex());
   1305             //            }
   1306 
   1307             // if we manage to use up all the characters in the string,
   1308             // we don't have to try any more rule sets
   1309             if (highWaterMark.getIndex() == workingText.length()) {
   1310                 break;
   1311             }
   1312 
   1313             // otherwise, reset our internal parse position to the
   1314             // beginning and try again with the next rule set
   1315             workingPos.setIndex(0);
   1316         }
   1317 
   1318         // add the high water mark to our original parse position and
   1319         // return the result
   1320         parsePosition.setIndex(parsePosition.getIndex() + highWaterMark.getIndex());
   1321         // commented out because this API on ParsePosition doesn't exist in 1.1.x
   1322         //        if (highWaterMark.getIndex() == 0) {
   1323         //            parsePosition.setErrorIndex(parsePosition.getIndex() + highWaterMark.getErrorIndex());
   1324         //        }
   1325         return result;
   1326     }
   1327 
   1328     /**
   1329      * Turns lenient parse mode on and off.
   1330      *
   1331      * When in lenient parse mode, the formatter uses an RbnfLenientScanner
   1332      * for parsing the text.  Lenient parsing is only in effect if a scanner
   1333      * is set.  If a provider is not set, and this is used for parsing,
   1334      * a default scanner <code>RbnfLenientScannerProviderImpl</code> will be set if
   1335      * it is available on the classpath.  Otherwise this will have no effect.
   1336      *
   1337      * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
   1338      * @see RbnfLenientScanner
   1339      * @see RbnfLenientScannerProvider
   1340      */
   1341     public void setLenientParseMode(boolean enabled) {
   1342         lenientParse = enabled;
   1343     }
   1344 
   1345     /**
   1346      * Returns true if lenient-parse mode is turned on.  Lenient parsing is off
   1347      * by default.
   1348      * @return true if lenient-parse mode is turned on.
   1349      * @see #setLenientParseMode
   1350      */
   1351     public boolean lenientParseEnabled() {
   1352         return lenientParse;
   1353     }
   1354 
   1355     /**
   1356      * Sets the provider for the lenient scanner.  If this has not been set,
   1357      * {@link #setLenientParseMode}
   1358      * has no effect.  This is necessary to decouple collation from format code.
   1359      * @param scannerProvider the provider
   1360      * @see #setLenientParseMode
   1361      * @see #getLenientScannerProvider
   1362      */
   1363     public void setLenientScannerProvider(RbnfLenientScannerProvider scannerProvider) {
   1364         this.scannerProvider = scannerProvider;
   1365     }
   1366 
   1367     /**
   1368      * Returns the lenient scanner provider.  If none was set, and lenient parse is
   1369      * enabled, this will attempt to instantiate a default scanner, setting it if
   1370      * it was successful.  Otherwise this returns false.
   1371      *
   1372      * @see #setLenientScannerProvider
   1373      */
   1374     public RbnfLenientScannerProvider getLenientScannerProvider() {
   1375         // there's a potential race condition if two threads try to set/get the scanner at
   1376         // the same time, but you get what you get, and you shouldn't be using this from
   1377         // multiple threads anyway.
   1378         if (scannerProvider == null && lenientParse && !lookedForScanner) {
   1379             try {
   1380                 lookedForScanner = true;
   1381                 Class<?> cls = Class.forName("android.icu.impl.text.RbnfScannerProviderImpl");
   1382                 RbnfLenientScannerProvider provider = (RbnfLenientScannerProvider)cls.newInstance();
   1383                 setLenientScannerProvider(provider);
   1384             }
   1385             catch (Exception e) {
   1386                 // any failure, we just ignore and return null
   1387             }
   1388         }
   1389 
   1390         return scannerProvider;
   1391     }
   1392 
   1393     /**
   1394      * Override the default rule set to use.  If ruleSetName is null, reset
   1395      * to the initial default rule set.
   1396      * @param ruleSetName the name of the rule set, or null to reset the initial default.
   1397      * @throws IllegalArgumentException if ruleSetName is not the name of a public ruleset.
   1398      */
   1399     public void setDefaultRuleSet(String ruleSetName) {
   1400         if (ruleSetName == null) {
   1401             if (publicRuleSetNames.length > 0) {
   1402                 defaultRuleSet = findRuleSet(publicRuleSetNames[0]);
   1403             } else {
   1404                 defaultRuleSet = null;
   1405                 int n = ruleSets.length;
   1406                 while (--n >= 0) {
   1407                    String currentName = ruleSets[n].getName();
   1408                    if (currentName.equals("%spellout-numbering") ||
   1409                        currentName.equals("%digits-ordinal") ||
   1410                        currentName.equals("%duration")) {
   1411 
   1412                        defaultRuleSet = ruleSets[n];
   1413                        return;
   1414                    }
   1415                 }
   1416 
   1417                 n = ruleSets.length;
   1418                 while (--n >= 0) {
   1419                     if (ruleSets[n].isPublic()) {
   1420                         defaultRuleSet = ruleSets[n];
   1421                         break;
   1422                     }
   1423                 }
   1424             }
   1425         } else if (ruleSetName.startsWith("%%")) {
   1426             throw new IllegalArgumentException("cannot use private rule set: " + ruleSetName);
   1427         } else {
   1428             defaultRuleSet = findRuleSet(ruleSetName);
   1429         }
   1430     }
   1431 
   1432     /**
   1433      * Return the name of the current default rule set.
   1434      * @return the name of the current default rule set, if it is public, else the empty string.
   1435      */
   1436     public String getDefaultRuleSetName() {
   1437         if (defaultRuleSet != null && defaultRuleSet.isPublic()) {
   1438             return defaultRuleSet.getName();
   1439         }
   1440         return "";
   1441     }
   1442 
   1443     /**
   1444      * Sets the decimal format symbols used by this formatter. The formatter uses a copy of the
   1445      * provided symbols.
   1446      *
   1447      * @param newSymbols desired DecimalFormatSymbols
   1448      * @see DecimalFormatSymbols
   1449      */
   1450     public void setDecimalFormatSymbols(DecimalFormatSymbols newSymbols) {
   1451         if (newSymbols != null) {
   1452             decimalFormatSymbols = (DecimalFormatSymbols) newSymbols.clone();
   1453             if (decimalFormat != null) {
   1454                 decimalFormat.setDecimalFormatSymbols(decimalFormatSymbols);
   1455             }
   1456             if (defaultInfinityRule != null) {
   1457                 defaultInfinityRule = null;
   1458                 getDefaultInfinityRule(); // Reset with the new DecimalFormatSymbols
   1459             }
   1460             if (defaultNaNRule != null) {
   1461                 defaultNaNRule = null;
   1462                 getDefaultNaNRule(); // Reset with the new DecimalFormatSymbols
   1463             }
   1464 
   1465             // Apply the new decimalFormatSymbols by reparsing the rulesets
   1466             for (NFRuleSet ruleSet : ruleSets) {
   1467                 ruleSet.setDecimalFormatSymbols(decimalFormatSymbols);
   1468             }
   1469         }
   1470     }
   1471 
   1472     /**
   1473      * <strong>[icu]</strong> Set a particular DisplayContext value in the formatter,
   1474      * such as CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see
   1475      * NumberFormat.
   1476      *
   1477      * @param context The DisplayContext value to set.
   1478      */
   1479     // Here we override the NumberFormat implementation in order to
   1480     // lazily initialize relevant items
   1481     @Override
   1482     public void setContext(DisplayContext context) {
   1483         super.setContext(context);
   1484         if (!capitalizationInfoIsSet &&
   1485               (context==DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU || context==DisplayContext.CAPITALIZATION_FOR_STANDALONE)) {
   1486             initCapitalizationContextInfo(locale);
   1487             capitalizationInfoIsSet = true;
   1488         }
   1489         if (capitalizationBrkIter == null && (context==DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1490               (context==DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForListOrMenu) ||
   1491               (context==DisplayContext.CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone) )) {
   1492             capitalizationBrkIter = BreakIterator.getSentenceInstance(locale);
   1493         }
   1494     }
   1495 
   1496     /**
   1497      * Returns the rounding mode.
   1498      *
   1499      * @return A rounding mode, between <code>BigDecimal.ROUND_UP</code> and
   1500      * <code>BigDecimal.ROUND_UNNECESSARY</code>.
   1501      * @see #setRoundingMode
   1502      * @see java.math.BigDecimal
   1503      */
   1504     @Override
   1505     public int getRoundingMode() {
   1506         return roundingMode;
   1507     }
   1508 
   1509     /**
   1510      * Sets the rounding mode. This has no effect unless the rounding increment is greater
   1511      * than zero.
   1512      *
   1513      * @param roundingMode A rounding mode, between <code>BigDecimal.ROUND_UP</code> and
   1514      * <code>BigDecimal.ROUND_UNNECESSARY</code>.
   1515      * @exception IllegalArgumentException if <code>roundingMode</code> is unrecognized.
   1516      * @see #getRoundingMode
   1517      * @see java.math.BigDecimal
   1518      */
   1519     @Override
   1520     public void setRoundingMode(int roundingMode) {
   1521         if (roundingMode < BigDecimal.ROUND_UP || roundingMode > BigDecimal.ROUND_UNNECESSARY) {
   1522             throw new IllegalArgumentException("Invalid rounding mode: " + roundingMode);
   1523         }
   1524 
   1525         this.roundingMode = roundingMode;
   1526     }
   1527 
   1528 
   1529     //-----------------------------------------------------------------------
   1530     // package-internal API
   1531     //-----------------------------------------------------------------------
   1532 
   1533     /**
   1534      * Returns a reference to the formatter's default rule set.  The default
   1535      * rule set is the last public rule set in the description, or the one
   1536      * most recently set by setDefaultRuleSet.
   1537      * @return The formatter's default rule set.
   1538      */
   1539     NFRuleSet getDefaultRuleSet() {
   1540         return defaultRuleSet;
   1541     }
   1542 
   1543     /**
   1544      * Returns the scanner to use for lenient parsing.  The scanner is
   1545      * provided by the provider.
   1546      * @return The collator to use for lenient parsing, or null if lenient parsing
   1547      * is turned off.
   1548      */
   1549     RbnfLenientScanner getLenientScanner() {
   1550         if (lenientParse) {
   1551             RbnfLenientScannerProvider provider = getLenientScannerProvider();
   1552             if (provider != null) {
   1553                 return provider.get(locale, lenientParseRules);
   1554             }
   1555         }
   1556         return null;
   1557     }
   1558 
   1559     /**
   1560      * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
   1561      * instances owned by this formatter.  This object is lazily created: this function
   1562      * creates it the first time it's called.
   1563      * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
   1564      * instances owned by this formatter.
   1565      */
   1566     DecimalFormatSymbols getDecimalFormatSymbols() {
   1567         // lazy-evaluate the DecimalFormatSymbols object.  This object
   1568         // is shared by all DecimalFormat instances belonging to this
   1569         // formatter
   1570         if (decimalFormatSymbols == null) {
   1571             decimalFormatSymbols = new DecimalFormatSymbols(locale);
   1572         }
   1573         return decimalFormatSymbols;
   1574     }
   1575 
   1576     DecimalFormat getDecimalFormat() {
   1577         if (decimalFormat == null) {
   1578             // Don't use NumberFormat.getInstance, which can cause a recursive call
   1579             String pattern = getPattern(locale, NUMBERSTYLE);
   1580             decimalFormat = new DecimalFormat(pattern, getDecimalFormatSymbols());
   1581         }
   1582         return decimalFormat;
   1583     }
   1584 
   1585     PluralFormat createPluralFormat(PluralRules.PluralType pluralType, String pattern) {
   1586         return new PluralFormat(locale, pluralType, pattern, getDecimalFormat());
   1587     }
   1588 
   1589     /**
   1590      * Returns the default rule for infinity. This object is lazily created: this function
   1591      * creates it the first time it's called.
   1592      */
   1593     NFRule getDefaultInfinityRule() {
   1594         if (defaultInfinityRule == null) {
   1595             defaultInfinityRule = new NFRule(this, "Inf: " + getDecimalFormatSymbols().getInfinity());
   1596         }
   1597         return defaultInfinityRule;
   1598     }
   1599 
   1600     /**
   1601      * Returns the default rule for NaN. This object is lazily created: this function
   1602      * creates it the first time it's called.
   1603      */
   1604     NFRule getDefaultNaNRule() {
   1605         if (defaultNaNRule == null) {
   1606             defaultNaNRule = new NFRule(this, "NaN: " + getDecimalFormatSymbols().getNaN());
   1607         }
   1608         return defaultNaNRule;
   1609     }
   1610 
   1611     //-----------------------------------------------------------------------
   1612     // construction implementation
   1613     //-----------------------------------------------------------------------
   1614 
   1615     /**
   1616      * This extracts the special information from the rule sets before the
   1617      * main parsing starts.  Extra whitespace must have already been removed
   1618      * from the description.  If found, the special information is removed from the
   1619      * description and returned, otherwise the description is unchanged and null
   1620      * is returned.  Note: the trailing semicolon at the end of the special
   1621      * rules is stripped.
   1622      * @param description the rbnf description with extra whitespace removed
   1623      * @param specialName the name of the special rule text to extract
   1624      * @return the special rule text, or null if the rule was not found
   1625      */
   1626     private String extractSpecial(StringBuilder description, String specialName) {
   1627         String result = null;
   1628         int lp = description.indexOf(specialName);
   1629         if (lp != -1) {
   1630             // we've got to make sure we're not in the middle of a rule
   1631             // (where specialName would actually get treated as
   1632             // rule text)
   1633             if (lp == 0 || description.charAt(lp - 1) == ';') {
   1634                 // locate the beginning and end of the actual special
   1635                 // rules (there may be whitespace between the name and
   1636                 // the first token in the description)
   1637                 int lpEnd = description.indexOf(";%", lp);
   1638 
   1639                 if (lpEnd == -1) {
   1640                     lpEnd = description.length() - 1; // later we add 1 back to get the '%'
   1641                 }
   1642                 int lpStart = lp + specialName.length();
   1643                 while (lpStart < lpEnd &&
   1644                        PatternProps.isWhiteSpace(description.charAt(lpStart))) {
   1645                     ++lpStart;
   1646                 }
   1647 
   1648                 // copy out the special rules
   1649                 result = description.substring(lpStart, lpEnd);
   1650 
   1651                 // remove the special rule from the description
   1652                 description.delete(lp, lpEnd+1); // delete the semicolon but not the '%'
   1653             }
   1654         }
   1655         return result;
   1656     }
   1657 
   1658     /**
   1659      * This function parses the description and uses it to build all of
   1660      * internal data structures that the formatter uses to do formatting
   1661      * @param description The description of the formatter's desired behavior.
   1662      * This is either passed in by the caller or loaded out of a resource
   1663      * by one of the constructors, and is in the description format specified
   1664      * in the class docs.
   1665      */
   1666     private void init(String description, String[][] localizations) {
   1667         initLocalizations(localizations);
   1668 
   1669         // start by stripping the trailing whitespace from all the rules
   1670         // (this is all the whitespace follwing each semicolon in the
   1671         // description).  This allows us to look for rule-set boundaries
   1672         // by searching for ";%" without having to worry about whitespace
   1673         // between the ; and the %
   1674         StringBuilder descBuf = stripWhitespace(description);
   1675 
   1676         // check to see if there's a set of lenient-parse rules.  If there
   1677         // is, pull them out into our temporary holding place for them,
   1678         // and delete them from the description before the real description-
   1679         // parsing code sees them
   1680 
   1681         lenientParseRules = extractSpecial(descBuf, "%%lenient-parse:");
   1682         postProcessRules = extractSpecial(descBuf, "%%post-process:");
   1683 
   1684         // pre-flight parsing the description and count the number of
   1685         // rule sets (";%" marks the end of one rule set and the beginning
   1686         // of the next)
   1687         int numRuleSets = 1;
   1688         int p = 0;
   1689         while ((p = descBuf.indexOf(";%", p)) != -1) {
   1690             ++numRuleSets;
   1691             p += 2; // Skip the length of ";%"
   1692         }
   1693 
   1694         // our rule list is an array of the appropriate size
   1695         ruleSets = new NFRuleSet[numRuleSets];
   1696         ruleSetsMap = new HashMap<String, NFRuleSet>(numRuleSets * 2 + 1);
   1697         defaultRuleSet = null;
   1698 
   1699         // Used to count the number of public rule sets
   1700         // Public rule sets have names that begin with % instead of %%.
   1701         int publicRuleSetCount = 0;
   1702 
   1703         // divide up the descriptions into individual rule-set descriptions
   1704         // and store them in a temporary array.  At each step, we also
   1705         // new up a rule set, but all this does is initialize its name
   1706         // and remove it from its description.  We can't actually parse
   1707         // the rest of the descriptions and finish initializing everything
   1708         // because we have to know the names and locations of all the rule
   1709         // sets before we can actually set everything up
   1710         String[] ruleSetDescriptions = new String[numRuleSets];
   1711 
   1712         int curRuleSet = 0;
   1713         int start = 0;
   1714 
   1715         while (curRuleSet < ruleSets.length) {
   1716             p = descBuf.indexOf(";%", start);
   1717             if (p < 0) {
   1718                 p = descBuf.length() - 1;
   1719             }
   1720             ruleSetDescriptions[curRuleSet] = descBuf.substring(start, p + 1);
   1721             NFRuleSet ruleSet = new NFRuleSet(this, ruleSetDescriptions, curRuleSet);
   1722             ruleSets[curRuleSet] = ruleSet;
   1723             String currentName = ruleSet.getName();
   1724             ruleSetsMap.put(currentName, ruleSet);
   1725             if (!currentName.startsWith("%%")) {
   1726                 ++publicRuleSetCount;
   1727                 if (defaultRuleSet == null
   1728                         && currentName.equals("%spellout-numbering")
   1729                         || currentName.equals("%digits-ordinal")
   1730                         || currentName.equals("%duration"))
   1731                 {
   1732                     defaultRuleSet = ruleSet;
   1733                 }
   1734             }
   1735             ++curRuleSet;
   1736             start = p + 1;
   1737         }
   1738 
   1739         // now we can take note of the formatter's default rule set, which
   1740         // is the last public rule set in the description (it's the last
   1741         // rather than the first so that a user can create a new formatter
   1742         // from an existing formatter and change its default behavior just
   1743         // by appending more rule sets to the end)
   1744 
   1745         // {dlf} Initialization of a fraction rule set requires the default rule
   1746         // set to be known.  For purposes of initialization, this is always the
   1747         // last public rule set, no matter what the localization data says.
   1748 
   1749         // Set the default ruleset to the last public ruleset, unless one of the predefined
   1750         // ruleset names %spellout-numbering, %digits-ordinal, or %duration is found
   1751 
   1752         if (defaultRuleSet == null) {
   1753             for (int i = ruleSets.length - 1; i >= 0; --i) {
   1754                 if (!ruleSets[i].getName().startsWith("%%")) {
   1755                     defaultRuleSet = ruleSets[i];
   1756                     break;
   1757                 }
   1758             }
   1759         }
   1760         if (defaultRuleSet == null) {
   1761             defaultRuleSet = ruleSets[ruleSets.length - 1];
   1762         }
   1763 
   1764         // finally, we can go back through the temporary descriptions
   1765         // list and finish setting up the substructure
   1766         for (int i = 0; i < ruleSets.length; i++) {
   1767             ruleSets[i].parseRules(ruleSetDescriptions[i]);
   1768         }
   1769 
   1770         // Now that the rules are initialized, the 'real' default rule
   1771         // set can be adjusted by the localization data.
   1772 
   1773         // prepare an array of the proper size and copy the names into it
   1774         String[] publicRuleSetTemp = new String[publicRuleSetCount];
   1775         publicRuleSetCount = 0;
   1776         for (int i = ruleSets.length - 1; i >= 0; i--) {
   1777             if (!ruleSets[i].getName().startsWith("%%")) {
   1778                 publicRuleSetTemp[publicRuleSetCount++] = ruleSets[i].getName();
   1779             }
   1780         }
   1781 
   1782         if (publicRuleSetNames != null) {
   1783             // confirm the names, if any aren't in the rules, that's an error
   1784             // it is ok if the rules contain public rule sets that are not in this list
   1785             loop: for (int i = 0; i < publicRuleSetNames.length; ++i) {
   1786                 String name = publicRuleSetNames[i];
   1787                 for (int j = 0; j < publicRuleSetTemp.length; ++j) {
   1788                     if (name.equals(publicRuleSetTemp[j])) {
   1789                         continue loop;
   1790                     }
   1791                 }
   1792                 throw new IllegalArgumentException("did not find public rule set: " + name);
   1793             }
   1794 
   1795             defaultRuleSet = findRuleSet(publicRuleSetNames[0]); // might be different
   1796         } else {
   1797             publicRuleSetNames = publicRuleSetTemp;
   1798         }
   1799     }
   1800 
   1801     /**
   1802      * Take the localizations array and create a Map from the locale strings to
   1803      * the localization arrays.
   1804      */
   1805     private void initLocalizations(String[][] localizations) {
   1806         if (localizations != null) {
   1807             publicRuleSetNames = localizations[0].clone();
   1808 
   1809             Map<String, String[]> m = new HashMap<String, String[]>();
   1810             for (int i = 1; i < localizations.length; ++i) {
   1811                 String[] data = localizations[i];
   1812                 String loc = data[0];
   1813                 String[] names = new String[data.length-1];
   1814                 if (names.length != publicRuleSetNames.length) {
   1815                     throw new IllegalArgumentException("public name length: " + publicRuleSetNames.length +
   1816                                                        " != localized names[" + i + "] length: " + names.length);
   1817                 }
   1818                 System.arraycopy(data, 1, names, 0, names.length);
   1819                 m.put(loc, names);
   1820             }
   1821 
   1822             if (!m.isEmpty()) {
   1823                 ruleSetDisplayNames = m;
   1824             }
   1825         }
   1826     }
   1827 
   1828     /**
   1829      * Set capitalizationForListOrMenu, capitalizationForStandAlone
   1830      */
   1831     private void initCapitalizationContextInfo(ULocale theLocale) {
   1832         ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, theLocale);
   1833         try {
   1834             ICUResourceBundle rdb = rb.getWithFallback("contextTransforms/number-spellout");
   1835             int[] intVector = rdb.getIntVector();
   1836             if (intVector.length >= 2) {
   1837                 capitalizationForListOrMenu = (intVector[0] != 0);
   1838                 capitalizationForStandAlone = (intVector[1] != 0);
   1839             }
   1840         } catch (MissingResourceException e) {
   1841             // use default
   1842         }
   1843     }
   1844 
   1845     /**
   1846      * This function is used by init() to strip whitespace between rules (i.e.,
   1847      * after semicolons).
   1848      * @param description The formatter description
   1849      * @return The description with all the whitespace that follows semicolons
   1850      * taken out.
   1851      */
   1852     private StringBuilder stripWhitespace(String description) {
   1853         // since we don't have a method that deletes characters (why?!!)
   1854         // create a new StringBuffer to copy the text into
   1855         StringBuilder result = new StringBuilder();
   1856         int descriptionLength = description.length();
   1857 
   1858         // iterate through the characters...
   1859         int start = 0;
   1860         while (start < descriptionLength) {
   1861             // seek to the first non-whitespace character...
   1862             while (start < descriptionLength
   1863                    && PatternProps.isWhiteSpace(description.charAt(start)))
   1864             {
   1865                 ++start;
   1866             }
   1867 
   1868             //if the first non-whitespace character is semicolon, skip it and continue
   1869             if (start < descriptionLength && description.charAt(start) == ';') {
   1870                 start += 1;
   1871                 continue;
   1872             }
   1873 
   1874             // locate the next semicolon in the text and copy the text from
   1875             // our current position up to that semicolon into the result
   1876             int p = description.indexOf(';', start);
   1877             if (p == -1) {
   1878                 // or if we don't find a semicolon, just copy the rest of
   1879                 // the string into the result
   1880                 result.append(description.substring(start));
   1881                 break;
   1882             }
   1883             else if (p < descriptionLength) {
   1884                 result.append(description.substring(start, p + 1));
   1885                 start = p + 1;
   1886             }
   1887             else {
   1888                 // when we get here, we've seeked off the end of the string, and
   1889                 // we terminate the loop (we continue until *start* is -1 rather
   1890                 // than until *p* is -1, because otherwise we'd miss the last
   1891                 // rule in the description)
   1892                 break;
   1893             }
   1894         }
   1895         return result;
   1896     }
   1897 
   1898     //-----------------------------------------------------------------------
   1899     // formatting implementation
   1900     //-----------------------------------------------------------------------
   1901 
   1902     /**
   1903      * Bottleneck through which all the public format() methods
   1904      * that take a double pass. By the time we get here, we know
   1905      * which rule set we're using to do the formatting.
   1906      * @param number The number to format
   1907      * @param ruleSet The rule set to use to format the number
   1908      * @return The text that resulted from formatting the number
   1909      */
   1910     private String format(double number, NFRuleSet ruleSet) {
   1911         // all API format() routines that take a double vector through
   1912         // here.  Create an empty string buffer where the result will
   1913         // be built, and pass it to the rule set (along with an insertion
   1914         // position of 0 and the number being formatted) to the rule set
   1915         // for formatting
   1916         StringBuilder result = new StringBuilder();
   1917         if (getRoundingMode() != BigDecimal.ROUND_UNNECESSARY && !Double.isNaN(number) && !Double.isInfinite(number)) {
   1918             // We convert to a string because BigDecimal insists on excessive precision.
   1919             number = new BigDecimal(Double.toString(number)).setScale(getMaximumFractionDigits(), roundingMode).doubleValue();
   1920         }
   1921         ruleSet.format(number, result, 0, 0);
   1922         postProcess(result, ruleSet);
   1923         return result.toString();
   1924     }
   1925 
   1926     /**
   1927      * Bottleneck through which all the public format() methods
   1928      * that take a long pass. By the time we get here, we know
   1929      * which rule set we're using to do the formatting.
   1930      * @param number The number to format
   1931      * @param ruleSet The rule set to use to format the number
   1932      * @return The text that resulted from formatting the number
   1933      */
   1934     private String format(long number, NFRuleSet ruleSet) {
   1935         // all API format() routines that take a double vector through
   1936         // here.  We have these two identical functions-- one taking a
   1937         // double and one taking a long-- the couple digits of precision
   1938         // that long has but double doesn't (both types are 8 bytes long,
   1939         // but double has to borrow some of the mantissa bits to hold
   1940         // the exponent).
   1941         // Create an empty string buffer where the result will
   1942         // be built, and pass it to the rule set (along with an insertion
   1943         // position of 0 and the number being formatted) to the rule set
   1944         // for formatting
   1945         StringBuilder result = new StringBuilder();
   1946         if (number == Long.MIN_VALUE) {
   1947             // We can't handle this value right now. Provide an accurate default value.
   1948             result.append(getDecimalFormat().format(Long.MIN_VALUE));
   1949         }
   1950         else {
   1951             ruleSet.format(number, result, 0, 0);
   1952         }
   1953         postProcess(result, ruleSet);
   1954         return result.toString();
   1955     }
   1956 
   1957     /**
   1958      * Post-process the rules if we have a post-processor.
   1959      */
   1960     private void postProcess(StringBuilder result, NFRuleSet ruleSet) {
   1961         if (postProcessRules != null) {
   1962             if (postProcessor == null) {
   1963                 int ix = postProcessRules.indexOf(";");
   1964                 if (ix == -1) {
   1965                     ix = postProcessRules.length();
   1966                 }
   1967                 String ppClassName = postProcessRules.substring(0, ix).trim();
   1968                 try {
   1969                     Class<?> cls = Class.forName(ppClassName);
   1970                     postProcessor = (RBNFPostProcessor)cls.newInstance();
   1971                     postProcessor.init(this, postProcessRules);
   1972                 }
   1973                 catch (Exception e) {
   1974                     // if debug, print it out
   1975                     if (DEBUG) System.out.println("could not locate " + ppClassName + ", error " +
   1976                                        e.getClass().getName() + ", " + e.getMessage());
   1977                     postProcessor = null;
   1978                     postProcessRules = null; // don't try again
   1979                     return;
   1980                 }
   1981             }
   1982 
   1983             postProcessor.process(result, ruleSet);
   1984         }
   1985     }
   1986 
   1987     /**
   1988      * Adjust capitalization of formatted result for display context
   1989      */
   1990     private String adjustForContext(String result) {
   1991         DisplayContext capitalization = getContext(DisplayContext.Type.CAPITALIZATION);
   1992         if (capitalization != DisplayContext.CAPITALIZATION_NONE && result != null && result.length() > 0
   1993             && UCharacter.isLowerCase(result.codePointAt(0)))
   1994         {
   1995             if (  capitalization==DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
   1996                   (capitalization == DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForListOrMenu) ||
   1997                   (capitalization == DisplayContext.CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone) ) {
   1998                 if (capitalizationBrkIter == null) {
   1999                     // should only happen when deserializing, etc.
   2000                     capitalizationBrkIter = BreakIterator.getSentenceInstance(locale);
   2001                 }
   2002                 return UCharacter.toTitleCase(locale, result, capitalizationBrkIter,
   2003                                 UCharacter.TITLECASE_NO_LOWERCASE | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
   2004             }
   2005         }
   2006         return result;
   2007     }
   2008 
   2009     /**
   2010      * Returns the named rule set.  Throws an IllegalArgumentException
   2011      * if this formatter doesn't have a rule set with that name.
   2012      * @param name The name of the desired rule set
   2013      * @return The rule set with that name
   2014      */
   2015     NFRuleSet findRuleSet(String name) throws IllegalArgumentException {
   2016         NFRuleSet result = ruleSetsMap.get(name);
   2017         if (result == null) {
   2018             throw new IllegalArgumentException("No rule set named " + name);
   2019         }
   2020         return result;
   2021     }
   2022 }
   2023