1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package android.icu.text; 12 13 import java.math.BigInteger; 14 import java.text.FieldPosition; 15 import java.text.ParsePosition; 16 import java.util.Arrays; 17 import java.util.HashMap; 18 import java.util.Locale; 19 import java.util.Map; 20 import java.util.MissingResourceException; 21 import java.util.Set; 22 23 import android.icu.impl.ICUData; 24 import android.icu.impl.ICUDebug; 25 import android.icu.impl.ICUResourceBundle; 26 import android.icu.impl.PatternProps; 27 import android.icu.lang.UCharacter; 28 import android.icu.math.BigDecimal; 29 import android.icu.util.ULocale; 30 import android.icu.util.ULocale.Category; 31 import android.icu.util.UResourceBundle; 32 import android.icu.util.UResourceBundleIterator; 33 34 35 /** 36 * <p>A class that formats numbers according to a set of rules. This number formatter is 37 * typically used for spelling out numeric values in words (e.g., 25,3476 as 38 * "twenty-five thousand three hundred seventy-six" or "vingt-cinq mille trois 39 * cents soixante-seize" or 40 * "funfundzwanzigtausenddreihundertsechsundsiebzig"), but can also be used for 41 * other complicated formatting tasks, such as formatting a number of seconds as hours, 42 * minutes and seconds (e.g., 3,730 as "1:02:10").</p> 43 * 44 * <p>The resources contain three predefined formatters for each locale: spellout, which 45 * spells out a value in words (123 is "one hundred twenty-three"); ordinal, which 46 * appends an ordinal suffix to the end of a numeral (123 is "123rd"); and 47 * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is 48 * "2:03"). The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s 49 * by supplying programmer-defined rule sets.</p> 50 * 51 * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description 52 * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource 53 * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em> 54 * Each rule has a string of output text and a value or range of values it is applicable to. 55 * In a typical spellout rule set, the first twenty rules are the words for the numbers from 56 * 0 to 19:</p> 57 * 58 * <pre>zero; one; two; three; four; five; six; seven; eight; nine; 59 * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre> 60 * 61 * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and 62 * we only have to supply the words for the multiples of 10:</p> 63 * 64 * <pre>20: twenty[->>]; 65 * 30: thirty{->>]; 66 * 40: forty[->>]; 67 * 50: fifty[->>]; 68 * 60: sixty[->>]; 69 * 70: seventy[->>]; 70 * 80: eighty[->>]; 71 * 90: ninety[->>];</pre> 72 * 73 * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the 74 * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable 75 * to all numbers from its own base value to one less than the next rule's base value. The 76 * ">>" token is called a <em>substitution</em> and tells the formatter to 77 * isolate the number's ones digit, format it using this same set of rules, and place the 78 * result at the position of the ">>" token. Text in brackets is omitted if 79 * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24 80 * is "twenty-four," not "twenty four").</p> 81 * 82 * <p>For even larger numbers, we can actually look up several parts of the number in the 83 * list:</p> 84 * 85 * <pre>100: << hundred[ >>];</pre> 86 * 87 * <p>The "<<" represents a new kind of substitution. The << isolates 88 * the hundreds digit (and any digits to its left), formats it using this same rule set, and 89 * places the result where the "<<" was. Notice also that the meaning of 90 * >> has changed: it now refers to both the tens and the ones digits. The meaning of 91 * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em> 92 * which is the highest power of 10 that is less than or equal to the base value (the user 93 * can change this). To fill in the substitutions, the formatter divides the number being 94 * formatted by the divisor. The integral quotient is used to fill in the << 95 * substitution, and the remainder is used to fill in the >> substitution. The meaning 96 * of the brackets changes similarly: text in brackets is omitted if the value being 97 * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so 98 * if a substitution is filled in with text that includes another substitution, that 99 * substitution is also filled in.</p> 100 * 101 * <p>This rule covers values up to 999, at which point we add another rule:</p> 102 * 103 * <pre>1000: << thousand[ >>];</pre> 104 * 105 * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's 106 * base value is a higher power of 10, changing the rule's divisor. This rule can actually be 107 * used all the way up to 999,999. This allows us to finish out the rules as follows:</p> 108 * 109 * <pre>1,000,000: << million[ >>]; 110 * 1,000,000,000: << billion[ >>]; 111 * 1,000,000,000,000: << trillion[ >>]; 112 * 1,000,000,000,000,000: OUT OF RANGE!;</pre> 113 * 114 * <p>Commas, periods, and spaces can be used in the base values to improve legibility and 115 * are ignored by the rule parser. The last rule in the list is customarily treated as an 116 * "overflow rule," applying to everything from its base value on up, and often (as 117 * in this example) being used to print out an error message or default representation. 118 * Notice also that the size of the major groupings in large numbers is controlled by the 119 * spacing of the rules: because in English we group numbers by thousand, the higher rules 120 * are separated from each other by a factor of 1,000.</p> 121 * 122 * <p>To see how these rules actually work in practice, consider the following example: 123 * Formatting 25,430 with this rule set would work like this:</p> 124 * 125 * <table border="0" width="630"> 126 * <tr> 127 * <td style="width: 21;"></td> 128 * <td style="width: 257; vertical-align: top;"><strong><< thousand >></strong></td> 129 * <td style="width: 340; vertical-align: top;">[the rule whose base value is 1,000 is applicable to 25,340]</td> 130 * </tr> 131 * <tr> 132 * <td style="width: 21;"></td> 133 * <td style="width: 257; vertical-align: top;"><strong>twenty->></strong> thousand >></td> 134 * <td style="width: 340; vertical-align: top;">[25,340 over 1,000 is 25. The rule for 20 applies.]</td> 135 * </tr> 136 * <tr> 137 * <td style="width: 21;"></td> 138 * <td style="width: 257; vertical-align: top;">twenty-<strong>five</strong> thousand >></td> 139 * <td style="width: 340; vertical-align: top;">[25 mod 10 is 5. The rule for 5 is "five."</td> 140 * </tr> 141 * <tr> 142 * <td style="width: 21;"></td> 143 * <td style="width: 257; vertical-align: top;">twenty-five thousand <strong><< hundred >></strong></td> 144 * <td style="width: 340; vertical-align: top;">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td> 145 * </tr> 146 * <tr> 147 * <td style="width: 21;"></td> 148 * <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>three</strong> hundred >></td> 149 * <td style="width: 340; vertical-align: top;">[340 over 100 is 3. The rule for 3 is "three."]</td> 150 * </tr> 151 * <tr> 152 * <td style="width: 21;"></td> 153 * <td style="width: 257; vertical-align: top;">twenty-five thousand three hundred <strong>forty</strong></td> 154 * <td style="width: 340; vertical-align: top;">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides 155 * evenly by 10, the hyphen and substitution in the brackets are omitted.]</td> 156 * </tr> 157 * </table> 158 * 159 * <p>The above syntax suffices only to format positive integers. To format negative numbers, 160 * we add a special rule:</p> 161 * 162 * <pre>-x: minus >>;</pre> 163 * 164 * <p>This is called a <em>negative-number rule,</em> and is identified by "-x" 165 * where the base value would be. This rule is used to format all negative numbers. the 166 * >> token here means "find the number's absolute value, format it with these 167 * rules, and put the result here."</p> 168 * 169 * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional 170 * parts:</p> 171 * 172 * <pre>x.x: << point >>;</pre> 173 * 174 * <p>This rule is used for all positive non-integers (negative non-integers pass through the 175 * negative-number rule first and then through this rule). Here, the << token refers to 176 * the number's integral part, and the >> to the number's fractional part. The 177 * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be 178 * formatted as "one hundred twenty-three point four five six").</p> 179 * 180 * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p> 181 * 182 * <p>There is actually much more flexibility built into the rule language than the 183 * description above shows. A formatter may own multiple rule sets, which can be selected by 184 * the caller, and which can use each other to fill in their substitutions. Substitutions can 185 * also be filled in with digits, using a DecimalFormat object. There is syntax that can be 186 * used to alter a rule's divisor in various ways. And there is provision for much more 187 * flexible fraction handling. A complete description of the rule syntax follows:</p> 188 * 189 * <hr> 190 * 191 * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule 192 * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule 193 * set name must begin with a % sign. Rule sets with names that begin with a single % sign 194 * are <em>public:</em> the caller can specify that they be used to format and parse numbers. 195 * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use 196 * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p> 197 * 198 * <p>The user can also specify a special "rule set" named <tt>%%lenient-parse</tt>. 199 * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt> 200 * description which is used to define equivalences for lenient parsing. For more information 201 * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing, 202 * see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning 203 * in collation rules, such as '&', have no particular meaning when appearing outside 204 * of the <tt>lenient-parse</tt> rule set.</p> 205 * 206 * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em> 207 * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em> 208 * These parameters are controlled by the description syntax, which consists of a <em>rule 209 * descriptor,</em> a colon, and a <em>rule body.</em></p> 210 * 211 * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the 212 * name of a token):</p> 213 * 214 * <table border="0" width="100%"> 215 * <tr> 216 * <td style="width: 5%; vertical-align: top;"></td> 217 * <td style="width: 8%; vertical-align: top;"><em>bv</em>:</td> 218 * <td valign="top"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal 219 * number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas, 220 * which are ignored. The rule's divisor is the highest power of 10 less than or equal to 221 * the base value.</td> 222 * </tr> 223 * <tr> 224 * <td style="width: 5%; vertical-align: top;"></td> 225 * <td style="width: 8%; vertical-align: top;"><em>bv</em>/<em>rad</em>:</td> 226 * <td valign="top"><em>bv</em> specifies the rule's base value. The rule's divisor is the 227 * highest power of <em>rad</em> less than or equal to the base value.</td> 228 * </tr> 229 * <tr> 230 * <td style="width: 5%; vertical-align: top;"></td> 231 * <td style="width: 8%; vertical-align: top;"><em>bv</em>>:</td> 232 * <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor, 233 * let the radix be 10, and the exponent be the highest exponent of the radix that yields a 234 * result less than or equal to the base value. Every > character after the base value 235 * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix 236 * raised to the power of the exponent; otherwise, the divisor is 1.</td> 237 * </tr> 238 * <tr> 239 * <td style="width: 5%; vertical-align: top;"></td> 240 * <td style="width: 8%; vertical-align: top;"><em>bv</em>/<em>rad</em>>:</td> 241 * <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor, 242 * let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that 243 * yields a result less than or equal to the base value. Every > character after the radix 244 * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix 245 * raised to the power of the exponent; otherwise, the divisor is 1.</td> 246 * </tr> 247 * <tr> 248 * <td style="width: 5%; vertical-align: top;"></td> 249 * <td style="width: 8%; vertical-align: top;">-x:</td> 250 * <td valign="top">The rule is a negative-number rule.</td> 251 * </tr> 252 * <tr> 253 * <td style="width: 5%; vertical-align: top;"></td> 254 * <td style="width: 8%; vertical-align: top;">x.x:</td> 255 * <td valign="top">The rule is an <em>improper fraction rule</em>. If the full stop in 256 * the middle of the rule name is replaced with the decimal point 257 * that is used in the language or DecimalFormatSymbols, then that rule will 258 * have precedence when formatting and parsing this rule. For example, some 259 * languages use the comma, and can thus be written as x,x instead. For example, 260 * you can use "x.x: << point >>;x,x: << comma >>;" to 261 * handle the decimal point that matches the language's natural spelling of 262 * the punctuation of either the full stop or comma.</td> 263 * </tr> 264 * <tr> 265 * <td style="width: 5%; vertical-align: top;"></td> 266 * <td style="width: 8%; vertical-align: top;">0.x:</td> 267 * <td valign="top">The rule is a <em>proper fraction rule</em>. If the full stop in 268 * the middle of the rule name is replaced with the decimal point 269 * that is used in the language or DecimalFormatSymbols, then that rule will 270 * have precedence when formatting and parsing this rule. For example, some 271 * languages use the comma, and can thus be written as 0,x instead. For example, 272 * you can use "0.x: point >>;0,x: comma >>;" to 273 * handle the decimal point that matches the language's natural spelling of 274 * the punctuation of either the full stop or comma</td> 275 * </tr> 276 * <tr> 277 * <td style="width: 5%; vertical-align: top;"></td> 278 * <td style="width: 8%; vertical-align: top;">x.0:</td> 279 * <td valign="top">The rule is a <em>master rule</em>. If the full stop in 280 * the middle of the rule name is replaced with the decimal point 281 * that is used in the language or DecimalFormatSymbols, then that rule will 282 * have precedence when formatting and parsing this rule. For example, some 283 * languages use the comma, and can thus be written as x,0 instead. For example, 284 * you can use "x.0: << point;x,0: << comma;" to 285 * handle the decimal point that matches the language's natural spelling of 286 * the punctuation of either the full stop or comma</td> 287 * </tr> 288 * <tr> 289 * <td style="width: 5%; vertical-align: top;"></td> 290 * <td style="width: 8%; vertical-align: top;">Inf:</td> 291 * <td style="vertical-align: top;">The rule for infinity.</td> 292 * </tr> 293 * <tr> 294 * <td style="width: 5%; vertical-align: top;"></td> 295 * <td style="width: 8%; vertical-align: top;">NaN:</td> 296 * <td style="vertical-align: top;">The rule for an IEEE 754 NaN (not a number).</td> 297 * </tr> 298 * <tr> 299 * <td style="width: 5%; vertical-align: top;"></td> 300 * <td style="width: 8%; vertical-align: top;"><em>nothing</em></td> 301 * <td style="vertical-align: top;">If the rule's rule descriptor is left out, the base value is one plus the 302 * preceding rule's base value (or zero if this is the first rule in the list) in a normal 303 * rule set. In a fraction rule set, the base value is the same as the preceding rule's 304 * base value.</td> 305 * </tr> 306 * </table> 307 * 308 * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending 309 * on whether it is used to format a number's integral part (or the whole number) or a 310 * number's fractional part. Using a rule set to format a rule's fractional part makes it a 311 * fraction rule set.</p> 312 * 313 * <p>Which rule is used to format a number is defined according to one of the following 314 * algorithms: If the rule set is a regular rule set, do the following: 315 * 316 * <ul> 317 * <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>), 318 * use the master rule. (If the number being formatted was passed in as a <tt>long</tt>, 319 * the master rule is ignored.)</li> 320 * <li>If the number is negative, use the negative-number rule.</li> 321 * <li>If the number has a fractional part and is greater than 1, use the improper fraction 322 * rule.</li> 323 * <li>If the number has a fractional part and is between 0 and 1, use the proper fraction 324 * rule.</li> 325 * <li>Binary-search the rule list for the rule with the highest base value less than or equal 326 * to the number. If that rule has two substitutions, its base value is not an even multiple 327 * of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the 328 * rule that precedes it in the rule list. Otherwise, use the rule itself.</li> 329 * </ul> 330 * 331 * <p>If the rule set is a fraction rule set, do the following: 332 * 333 * <ul> 334 * <li>Ignore negative-number and fraction rules.</li> 335 * <li>For each rule in the list, multiply the number being formatted (which will always be 336 * between 0 and 1) by the rule's base value. Keep track of the distance between the result 337 * the nearest integer.</li> 338 * <li>Use the rule that produced the result closest to zero in the above calculation. In the 339 * event of a tie or a direct hit, use the first matching rule encountered. (The idea here is 340 * to try each rule's base value as a possible denominator of a fraction. Whichever 341 * denominator produces the fraction closest in value to the number being formatted wins.) If 342 * the rule following the matching rule has the same base value, use it if the numerator of 343 * the fraction is anything other than 1; if the numerator is 1, use the original matching 344 * rule. (This is to allow singular and plural forms of the rule text without a lot of extra 345 * hassle.)</li> 346 * </ul> 347 * 348 * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule 349 * may include zero, one, or two <em>substitution tokens,</em> and a range of text in 350 * brackets. The brackets denote optional text (and may also include one or both 351 * substitutions). The exact meanings of the substitution tokens, and under what conditions 352 * optional text is omitted, depend on the syntax of the substitution token and the context. 353 * The rest of the text in a rule body is literal text that is output when the rule matches 354 * the number being formatted.</p> 355 * 356 * <p>A substitution token begins and ends with a <em>token character.</em> The token 357 * character and the context together specify a mathematical operation to be performed on the 358 * number being formatted. An optional <em>substitution descriptor </em>specifies how the 359 * value resulting from that operation is used to fill in the substitution. The position of 360 * the substitution token in the rule body specifies the location of the resultant text in 361 * the original rule text.</p> 362 * 363 * <p>The meanings of the substitution token characters are as follows:</p> 364 * 365 * <table border="0" width="100%"> 366 * <tr> 367 * <td style="width: 37;"></td> 368 * <td style="width: 23;">>></td> 369 * <td style="width: 165; vertical-align: top;">in normal rule</td> 370 * <td>Divide the number by the rule's divisor and format the remainder</td> 371 * </tr> 372 * <tr> 373 * <td style="width: 37;"></td> 374 * <td style="width: 23;"></td> 375 * <td style="width: 165; vertical-align: top;">in negative-number rule</td> 376 * <td>Find the absolute value of the number and format the result</td> 377 * </tr> 378 * <tr> 379 * <td style="width: 37;"></td> 380 * <td style="width: 23;"></td> 381 * <td style="width: 165; vertical-align: top;">in fraction or master rule</td> 382 * <td>Isolate the number's fractional part and format it.</td> 383 * </tr> 384 * <tr> 385 * <td style="width: 37;"></td> 386 * <td style="width: 23;"></td> 387 * <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td> 388 * <td>Not allowed.</td> 389 * </tr> 390 * <tr> 391 * <td style="width: 37;"></td> 392 * <td style="width: 23;">>>></td> 393 * <td style="width: 165; vertical-align: top;">in normal rule</td> 394 * <td>Divide the number by the rule's divisor and format the remainder, 395 * but bypass the normal rule-selection process and just use the 396 * rule that precedes this one in this rule list.</td> 397 * </tr> 398 * <tr> 399 * <td style="width: 37;"></td> 400 * <td style="width: 23;"></td> 401 * <td style="width: 165; vertical-align: top;">in all other rules</td> 402 * <td>Not allowed.</td> 403 * </tr> 404 * <tr> 405 * <td style="width: 37;"></td> 406 * <td style="width: 23;"><<</td> 407 * <td style="width: 165; vertical-align: top;">in normal rule</td> 408 * <td>Divide the number by the rule's divisor and format the quotient</td> 409 * </tr> 410 * <tr> 411 * <td style="width: 37;"></td> 412 * <td style="width: 23;"></td> 413 * <td style="width: 165; vertical-align: top;">in negative-number rule</td> 414 * <td>Not allowed.</td> 415 * </tr> 416 * <tr> 417 * <td style="width: 37;"></td> 418 * <td style="width: 23;"></td> 419 * <td style="width: 165; vertical-align: top;">in fraction or master rule</td> 420 * <td>Isolate the number's integral part and format it.</td> 421 * </tr> 422 * <tr> 423 * <td style="width: 37;"></td> 424 * <td style="width: 23;"></td> 425 * <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td> 426 * <td>Multiply the number by the rule's base value and format the result.</td> 427 * </tr> 428 * <tr> 429 * <td style="width: 37;"></td> 430 * <td style="width: 23;">==</td> 431 * <td style="width: 165; vertical-align: top;">in all rule sets</td> 432 * <td>Format the number unchanged</td> 433 * </tr> 434 * <tr> 435 * <td style="width: 37;"></td> 436 * <td style="width: 23;">[]</td> 437 * <td style="width: 165; vertical-align: top;">in normal rule</td> 438 * <td>Omit the optional text if the number is an even multiple of the rule's divisor</td> 439 * </tr> 440 * <tr> 441 * <td style="width: 37;"></td> 442 * <td style="width: 23;"></td> 443 * <td style="width: 165; vertical-align: top;">in negative-number rule</td> 444 * <td>Not allowed.</td> 445 * </tr> 446 * <tr> 447 * <td style="width: 37;"></td> 448 * <td style="width: 23;"></td> 449 * <td style="width: 165; vertical-align: top;">in improper-fraction rule</td> 450 * <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an 451 * x.x rule and a 0.x rule)</td> 452 * </tr> 453 * <tr> 454 * <td style="width: 37;"></td> 455 * <td style="width: 23;"></td> 456 * <td style="width: 165; vertical-align: top;">in master rule</td> 457 * <td>Omit the optional text if the number is an integer (same as specifying both an x.x 458 * rule and an x.0 rule)</td> 459 * </tr> 460 * <tr> 461 * <td style="width: 37;"></td> 462 * <td style="width: 23;"></td> 463 * <td style="width: 165; vertical-align: top;">in proper-fraction rule</td> 464 * <td>Not allowed.</td> 465 * </tr> 466 * <tr> 467 * <td style="width: 37;"></td> 468 * <td style="width: 23;"></td> 469 * <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td> 470 * <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td> 471 * </tr> 472 * <tr> 473 * <td style="width: 37;">$(cardinal,<i>plural syntax</i>)$</td> 474 * <td style="width: 23;"></td> 475 * <td style="width: 165; vertical-align: top;">in all rule sets</td> 476 * <td>This provides the ability to choose a word based on the number divided by the radix to the power of the 477 * exponent of the base value for the specified locale, which is normally equivalent to the << value. 478 * This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated 479 * as the same base value for parsing.</td> 480 * </tr> 481 * <tr> 482 * <td style="width: 37;">$(ordinal,<i>plural syntax</i>)$</td> 483 * <td style="width: 23;"></td> 484 * <td style="width: 165; vertical-align: top;">in all rule sets</td> 485 * <td>This provides the ability to choose a word based on the number divided by the radix to the power of the 486 * exponent of the base value for the specified locale, which is normally equivalent to the << value. 487 * This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated 488 * as the same base value for parsing.</td> 489 * </tr> 490 * </table> 491 * 492 * <p>The substitution descriptor (i.e., the text between the token characters) may take one 493 * of three forms:</p> 494 * 495 * <table border="0" width="100%"> 496 * <tr> 497 * <td style="width: 42;"></td> 498 * <td style="width: 166; vertical-align: top;">a rule set name</td> 499 * <td>Perform the mathematical operation on the number, and format the result using the 500 * named rule set.</td> 501 * </tr> 502 * <tr> 503 * <td style="width: 42;"></td> 504 * <td style="width: 166; vertical-align: top;">a DecimalFormat pattern</td> 505 * <td>Perform the mathematical operation on the number, and format the result using a 506 * DecimalFormat with the specified pattern. The pattern must begin with 0 or #.</td> 507 * </tr> 508 * <tr> 509 * <td style="width: 42;"></td> 510 * <td style="width: 166; vertical-align: top;">nothing</td> 511 * <td>Perform the mathematical operation on the number, and format the result using the rule 512 * set containing the current rule, except:<ul> 513 * <li>You can't have an empty substitution descriptor with a == substitution.</li> 514 * <li>If you omit the substitution descriptor in a >> substitution in a fraction rule, 515 * format the result one digit at a time using the rule set containing the current rule.</li> 516 * <li>If you omit the substitution descriptor in a << substitution in a rule in a 517 * fraction rule set, format the result using the default rule set for this formatter.</li> 518 * </ul> 519 * </td> 520 * </tr> 521 * </table> 522 * 523 * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule 524 * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe, 525 * the apostrophe is ignored, but all text after it becomes significant (this is how you can 526 * have a rule's rule text begin with whitespace). There is no escape function: the semicolon 527 * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set 528 * names. The characters beginning a substitution token are always treated as the beginning 529 * of a substitution token.</p> 530 * 531 * <p>See the resource data and the demo program for annotated examples of real rule sets 532 * using these features.</p> 533 * 534 * @author Richard Gillam 535 * @see NumberFormat 536 * @see DecimalFormat 537 * @see PluralFormat 538 * @see PluralRules 539 * @hide Only a subset of ICU is exposed in Android 540 */ 541 public class RuleBasedNumberFormat extends NumberFormat { 542 543 //----------------------------------------------------------------------- 544 // constants 545 //----------------------------------------------------------------------- 546 547 // Generated by serialver from JDK 1.4.1_01 548 static final long serialVersionUID = -7664252765575395068L; 549 550 /** 551 * Selector code that tells the constructor to create a spellout formatter 552 */ 553 public static final int SPELLOUT = 1; 554 555 /** 556 * Selector code that tells the constructor to create an ordinal formatter 557 */ 558 public static final int ORDINAL = 2; 559 560 /** 561 * Selector code that tells the constructor to create a duration formatter 562 */ 563 public static final int DURATION = 3; 564 565 /** 566 * Selector code that tells the constructor to create a numbering system formatter 567 */ 568 public static final int NUMBERING_SYSTEM = 4; 569 570 //----------------------------------------------------------------------- 571 // data members 572 //----------------------------------------------------------------------- 573 574 /** 575 * The formatter's rule sets. 576 */ 577 private transient NFRuleSet[] ruleSets = null; 578 579 /** 580 * The formatter's rule names mapped to rule sets. 581 */ 582 private transient Map<String, NFRuleSet> ruleSetsMap = null; 583 584 /** 585 * A pointer to the formatter's default rule set. This is always included 586 * in ruleSets. 587 */ 588 private transient NFRuleSet defaultRuleSet = null; 589 590 /** 591 * The formatter's locale. This is used to create DecimalFormatSymbols and 592 * Collator objects. 593 * @serial 594 */ 595 private ULocale locale = null; 596 597 /** 598 * The formatter's rounding mode. 599 * @serial 600 */ 601 private int roundingMode = BigDecimal.ROUND_UNNECESSARY; 602 603 /** 604 * Collator to be used in lenient parsing. This variable is lazy-evaluated: 605 * the collator is actually created the first time the client does a parse 606 * with lenient-parse mode turned on. 607 */ 608 private transient RbnfLenientScannerProvider scannerProvider = null; 609 610 // flag to mark whether we've previously looked for a scanner and failed 611 private transient boolean lookedForScanner; 612 613 /** 614 * The DecimalFormatSymbols object that any DecimalFormat objects this 615 * formatter uses should use. This variable is lazy-evaluated: it isn't 616 * filled in if the rule set never uses a DecimalFormat pattern. 617 */ 618 private transient DecimalFormatSymbols decimalFormatSymbols = null; 619 620 /** 621 * The NumberFormat used when lenient parsing numbers. This needs to reflect 622 * the locale. This is lazy-evaluated, like decimalFormatSymbols. It is 623 * here so it can be shared by different NFSubstitutions. 624 */ 625 private transient DecimalFormat decimalFormat = null; 626 627 /** 628 * The rule used when dealing with infinity. This is lazy-evaluated, and derived from decimalFormat. 629 * It is here so it can be shared by different NFRuleSets. 630 */ 631 private transient NFRule defaultInfinityRule = null; 632 633 /** 634 * The rule used when dealing with IEEE 754 NaN. This is lazy-evaluated, and derived from decimalFormat. 635 * It is here so it can be shared by different NFRuleSets. 636 */ 637 private transient NFRule defaultNaNRule = null; 638 639 /** 640 * Flag specifying whether lenient parse mode is on or off. Off by default. 641 * @serial 642 */ 643 private boolean lenientParse = false; 644 645 /** 646 * If the description specifies lenient-parse rules, they're stored here until 647 * the collator is created. 648 */ 649 private transient String lenientParseRules; 650 651 /** 652 * If the description specifies post-process rules, they're stored here until 653 * post-processing is required. 654 */ 655 private transient String postProcessRules; 656 657 /** 658 * Post processor lazily constructed from the postProcessRules. 659 */ 660 private transient RBNFPostProcessor postProcessor; 661 662 /** 663 * Localizations for rule set names. 664 * @serial 665 */ 666 private Map<String, String[]> ruleSetDisplayNames; 667 668 /** 669 * The public rule set names; 670 * @serial 671 */ 672 private String[] publicRuleSetNames; 673 674 /** 675 * Data for handling context-based capitalization 676 */ 677 private boolean capitalizationInfoIsSet = false; 678 private boolean capitalizationForListOrMenu = false; 679 private boolean capitalizationForStandAlone = false; 680 private transient BreakIterator capitalizationBrkIter = null; 681 682 683 private static final boolean DEBUG = ICUDebug.enabled("rbnf"); 684 685 //----------------------------------------------------------------------- 686 // constructors 687 //----------------------------------------------------------------------- 688 689 /** 690 * Creates a RuleBasedNumberFormat that behaves according to the description 691 * passed in. The formatter uses the default <code>FORMAT</code> locale. 692 * @param description A description of the formatter's desired behavior. 693 * See the class documentation for a complete explanation of the description 694 * syntax. 695 * @see Category#FORMAT 696 */ 697 public RuleBasedNumberFormat(String description) { 698 locale = ULocale.getDefault(Category.FORMAT); 699 init(description, null); 700 } 701 702 /** 703 * Creates a RuleBasedNumberFormat that behaves according to the description 704 * passed in. The formatter uses the default <code>FORMAT</code> locale. 705 * <p> 706 * The localizations data provides information about the public 707 * rule sets and their localized display names for different 708 * locales. The first element in the list is an array of the names 709 * of the public rule sets. The first element in this array is 710 * the initial default ruleset. The remaining elements in the 711 * list are arrays of localizations of the names of the public 712 * rule sets. Each of these is one longer than the initial array, 713 * with the first String being the ULocale ID, and the remaining 714 * Strings being the localizations of the rule set names, in the 715 * same order as the initial array. 716 * @param description A description of the formatter's desired behavior. 717 * See the class documentation for a complete explanation of the description 718 * syntax. 719 * @param localizations a list of localizations for the rule set 720 * names in the description. 721 * @see Category#FORMAT 722 */ 723 public RuleBasedNumberFormat(String description, String[][] localizations) { 724 locale = ULocale.getDefault(Category.FORMAT); 725 init(description, localizations); 726 } 727 728 /** 729 * Creates a RuleBasedNumberFormat that behaves according to the description 730 * passed in. The formatter uses the specified locale to determine the 731 * characters to use when formatting in numerals, and to define equivalences 732 * for lenient parsing. 733 * @param description A description of the formatter's desired behavior. 734 * See the class documentation for a complete explanation of the description 735 * syntax. 736 * @param locale A locale, which governs which characters are used for 737 * formatting values in numerals, and which characters are equivalent in 738 * lenient parsing. 739 */ 740 public RuleBasedNumberFormat(String description, Locale locale) { 741 this(description, ULocale.forLocale(locale)); 742 } 743 744 /** 745 * Creates a RuleBasedNumberFormat that behaves according to the description 746 * passed in. The formatter uses the specified locale to determine the 747 * characters to use when formatting in numerals, and to define equivalences 748 * for lenient parsing. 749 * @param description A description of the formatter's desired behavior. 750 * See the class documentation for a complete explanation of the description 751 * syntax. 752 * @param locale A locale, which governs which characters are used for 753 * formatting values in numerals, and which characters are equivalent in 754 * lenient parsing. 755 */ 756 public RuleBasedNumberFormat(String description, ULocale locale) { 757 this.locale = locale; 758 init(description, null); 759 } 760 761 /** 762 * Creates a RuleBasedNumberFormat that behaves according to the description 763 * passed in. The formatter uses the specified locale to determine the 764 * characters to use when formatting in numerals, and to define equivalences 765 * for lenient parsing. 766 * <p> 767 * The localizations data provides information about the public 768 * rule sets and their localized display names for different 769 * locales. The first element in the list is an array of the names 770 * of the public rule sets. The first element in this array is 771 * the initial default ruleset. The remaining elements in the 772 * list are arrays of localizations of the names of the public 773 * rule sets. Each of these is one longer than the initial array, 774 * with the first String being the ULocale ID, and the remaining 775 * Strings being the localizations of the rule set names, in the 776 * same order as the initial array. 777 * @param description A description of the formatter's desired behavior. 778 * See the class documentation for a complete explanation of the description 779 * syntax. 780 * @param localizations a list of localizations for the rule set names in the description. 781 * @param locale A ULocale that governs which characters are used for 782 * formatting values in numerals, and determines which characters are equivalent in 783 * lenient parsing. 784 */ 785 public RuleBasedNumberFormat(String description, String[][] localizations, ULocale locale) { 786 this.locale = locale; 787 init(description, localizations); 788 } 789 790 /** 791 * Creates a RuleBasedNumberFormat from a predefined description. The selector 792 * code chooses among three possible predefined formats: spellout, ordinal, 793 * and duration. 794 * @param locale The locale for the formatter. 795 * @param format A selector code specifying which kind of formatter to create for that 796 * locale. There are three legal values: SPELLOUT, which creates a formatter that 797 * spells out a value in words in the desired language, ORDINAL, which attaches 798 * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"), 799 * and DURATION, which formats a duration in seconds as hours, minutes, and seconds. 800 */ 801 public RuleBasedNumberFormat(Locale locale, int format) { 802 this(ULocale.forLocale(locale), format); 803 } 804 805 /** 806 * Creates a RuleBasedNumberFormat from a predefined description. The selector 807 * code chooses among three possible predefined formats: spellout, ordinal, 808 * and duration. 809 * @param locale The locale for the formatter. 810 * @param format A selector code specifying which kind of formatter to create for that 811 * locale. There are four legal values: SPELLOUT, which creates a formatter that 812 * spells out a value in words in the desired language, ORDINAL, which attaches 813 * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"), 814 * DURATION, which formats a duration in seconds as hours, minutes, and seconds, and 815 * NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering 816 * systems such as the Hebrew numbering system, or for Roman numerals, etc.. 817 */ 818 public RuleBasedNumberFormat(ULocale locale, int format) { 819 this.locale = locale; 820 821 ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle. 822 getBundleInstance(ICUData.ICU_RBNF_BASE_NAME, locale); 823 824 // TODO: determine correct actual/valid locale. Note ambiguity 825 // here -- do actual/valid refer to pattern, DecimalFormatSymbols, 826 // or Collator? 827 ULocale uloc = bundle.getULocale(); 828 setLocale(uloc, uloc); 829 830 StringBuilder description = new StringBuilder(); 831 String[][] localizations = null; 832 833 try { 834 ICUResourceBundle rules = bundle.getWithFallback("RBNFRules/"+rulenames[format-1]); 835 UResourceBundleIterator it = rules.getIterator(); 836 while (it.hasNext()) { 837 description.append(it.nextString()); 838 } 839 } 840 catch (MissingResourceException e1) { 841 } 842 843 // We use findTopLevel() instead of get() because 844 // it's faster when we know that it's usually going to fail. 845 UResourceBundle locNamesBundle = bundle.findTopLevel(locnames[format - 1]); 846 if (locNamesBundle != null) { 847 localizations = new String[locNamesBundle.getSize()][]; 848 for (int i = 0; i < localizations.length; ++i) { 849 localizations[i] = locNamesBundle.get(i).getStringArray(); 850 } 851 } 852 // else there are no localized names. It's not that important. 853 854 init(description.toString(), localizations); 855 } 856 857 private static final String[] rulenames = { 858 "SpelloutRules", "OrdinalRules", "DurationRules", "NumberingSystemRules", 859 }; 860 private static final String[] locnames = { 861 "SpelloutLocalizations", "OrdinalLocalizations", "DurationLocalizations", "NumberingSystemLocalizations", 862 }; 863 864 /** 865 * Creates a RuleBasedNumberFormat from a predefined description. Uses the 866 * default <code>FORMAT</code> locale. 867 * @param format A selector code specifying which kind of formatter to create. 868 * There are three legal values: SPELLOUT, which creates a formatter that spells 869 * out a value in words in the default locale's language, ORDINAL, which attaches 870 * an ordinal suffix from the default locale's language to a numeral, and 871 * DURATION, which formats a duration in seconds as hours, minutes, and seconds always rounding down. 872 * or NUMBERING_SYSTEM, which is used for alternate numbering systems such as Hebrew. 873 * @see Category#FORMAT 874 */ 875 public RuleBasedNumberFormat(int format) { 876 this(ULocale.getDefault(Category.FORMAT), format); 877 } 878 879 //----------------------------------------------------------------------- 880 // boilerplate 881 //----------------------------------------------------------------------- 882 883 /** 884 * Duplicates this formatter. 885 * @return A RuleBasedNumberFormat that is equal to this one. 886 */ 887 @Override 888 public Object clone() { 889 return super.clone(); 890 } 891 892 /** 893 * Tests two RuleBasedNumberFormats for equality. 894 * @param that The formatter to compare against this one. 895 * @return true if the two formatters have identical behavior. 896 */ 897 @Override 898 public boolean equals(Object that) { 899 // if the other object isn't a RuleBasedNumberFormat, that's 900 // all we need to know 901 // Test for capitalization info equality is adequately handled 902 // by the NumberFormat test for capitalizationSetting equality; 903 // the info here is just derived from that. 904 if (!(that instanceof RuleBasedNumberFormat)) { 905 return false; 906 } else { 907 // cast the other object's pointer to a pointer to a 908 // RuleBasedNumberFormat 909 RuleBasedNumberFormat that2 = (RuleBasedNumberFormat)that; 910 911 // compare their locales and lenient-parse modes 912 if (!locale.equals(that2.locale) || lenientParse != that2.lenientParse) { 913 return false; 914 } 915 916 // if that succeeds, then compare their rule set lists 917 if (ruleSets.length != that2.ruleSets.length) { 918 return false; 919 } 920 for (int i = 0; i < ruleSets.length; i++) { 921 if (!ruleSets[i].equals(that2.ruleSets[i])) { 922 return false; 923 } 924 } 925 926 return true; 927 } 928 } 929 930 /** 931 * Mock implementation of hashCode(). This implementation always returns a constant 932 * value. When Java assertion is enabled, this method triggers an assertion failure. 933 * @deprecated This API is ICU internal only. 934 * @hide draft / provisional / internal are hidden on Android 935 */ 936 @Override 937 @Deprecated 938 public int hashCode() { 939 return super.hashCode(); 940 } 941 942 /** 943 * Generates a textual description of this formatter. 944 * @return a String containing a rule set that will produce a RuleBasedNumberFormat 945 * with identical behavior to this one. This won't necessarily be identical 946 * to the rule set description that was originally passed in, but will produce 947 * the same result. 948 */ 949 @Override 950 public String toString() { 951 952 // accumulate the descriptions of all the rule sets in a 953 // StringBuffer, then cast it to a String and return it 954 StringBuilder result = new StringBuilder(); 955 for (NFRuleSet ruleSet : ruleSets) { 956 result.append(ruleSet.toString()); 957 } 958 return result.toString(); 959 } 960 961 /** 962 * Writes this object to a stream. 963 * @param out The stream to write to. 964 */ 965 private void writeObject(java.io.ObjectOutputStream out) 966 throws java.io.IOException { 967 // we just write the textual description to the stream, so we 968 // have an implementation-independent streaming format 969 out.writeUTF(this.toString()); 970 out.writeObject(this.locale); 971 out.writeInt(this.roundingMode); 972 } 973 974 /** 975 * Reads this object in from a stream. 976 * @param in The stream to read from. 977 */ 978 private void readObject(java.io.ObjectInputStream in) 979 throws java.io.IOException { 980 981 // read the description in from the stream 982 String description = in.readUTF(); 983 ULocale loc; 984 985 try { 986 loc = (ULocale) in.readObject(); 987 } catch (Exception e) { 988 loc = ULocale.getDefault(Category.FORMAT); 989 } 990 try { 991 roundingMode = in.readInt(); 992 } catch (Exception ignored) { 993 } 994 995 // build a brand-new RuleBasedNumberFormat from the description, 996 // then steal its substructure. This object's substructure and 997 // the temporary RuleBasedNumberFormat drop on the floor and 998 // get swept up by the garbage collector 999 RuleBasedNumberFormat temp = new RuleBasedNumberFormat(description, loc); 1000 ruleSets = temp.ruleSets; 1001 ruleSetsMap = temp.ruleSetsMap; 1002 defaultRuleSet = temp.defaultRuleSet; 1003 publicRuleSetNames = temp.publicRuleSetNames; 1004 decimalFormatSymbols = temp.decimalFormatSymbols; 1005 decimalFormat = temp.decimalFormat; 1006 locale = temp.locale; 1007 defaultInfinityRule = temp.defaultInfinityRule; 1008 defaultNaNRule = temp.defaultNaNRule; 1009 } 1010 1011 1012 //----------------------------------------------------------------------- 1013 // public API functions 1014 //----------------------------------------------------------------------- 1015 1016 /** 1017 * Returns a list of the names of all of this formatter's public rule sets. 1018 * @return A list of the names of all of this formatter's public rule sets. 1019 */ 1020 public String[] getRuleSetNames() { 1021 return publicRuleSetNames.clone(); 1022 } 1023 1024 /** 1025 * Return a list of locales for which there are locale-specific display names 1026 * for the rule sets in this formatter. If there are no localized display names, return null. 1027 * @return an array of the ULocales for which there is rule set display name information 1028 */ 1029 public ULocale[] getRuleSetDisplayNameLocales() { 1030 if (ruleSetDisplayNames != null) { 1031 Set<String> s = ruleSetDisplayNames.keySet(); 1032 String[] locales = s.toArray(new String[s.size()]); 1033 Arrays.sort(locales, String.CASE_INSENSITIVE_ORDER); 1034 ULocale[] result = new ULocale[locales.length]; 1035 for (int i = 0; i < locales.length; ++i) { 1036 result[i] = new ULocale(locales[i]); 1037 } 1038 return result; 1039 } 1040 return null; 1041 } 1042 1043 private String[] getNameListForLocale(ULocale loc) { 1044 if (loc != null && ruleSetDisplayNames != null) { 1045 String[] localeNames = { loc.getBaseName(), ULocale.getDefault(Category.DISPLAY).getBaseName() }; 1046 for (String lname : localeNames) { 1047 while (lname.length() > 0) { 1048 String[] names = ruleSetDisplayNames.get(lname); 1049 if (names != null) { 1050 return names; 1051 } 1052 lname = ULocale.getFallback(lname); 1053 } 1054 } 1055 } 1056 return null; 1057 } 1058 1059 /** 1060 * Return the rule set display names for the provided locale. These are in the same order 1061 * as those returned by getRuleSetNames. The locale is matched against the locales for 1062 * which there is display name data, using normal fallback rules. If no locale matches, 1063 * the default display names are returned. (These are the internal rule set names minus 1064 * the leading '%'.) 1065 * @return an array of the locales that have display name information 1066 * @see #getRuleSetNames 1067 */ 1068 public String[] getRuleSetDisplayNames(ULocale loc) { 1069 String[] names = getNameListForLocale(loc); 1070 if (names != null) { 1071 return names.clone(); 1072 } 1073 names = getRuleSetNames(); 1074 for (int i = 0; i < names.length; ++i) { 1075 names[i] = names[i].substring(1); 1076 } 1077 return names; 1078 } 1079 1080 /** 1081 * Return the rule set display names for the current default <code>DISPLAY</code> locale. 1082 * @return an array of the display names 1083 * @see #getRuleSetDisplayNames(ULocale) 1084 * @see Category#DISPLAY 1085 */ 1086 public String[] getRuleSetDisplayNames() { 1087 return getRuleSetDisplayNames(ULocale.getDefault(Category.DISPLAY)); 1088 } 1089 1090 /** 1091 * Return the rule set display name for the provided rule set and locale. 1092 * The locale is matched against the locales for which there is display name data, using 1093 * normal fallback rules. If no locale matches, the default display name is returned. 1094 * @return the display name for the rule set 1095 * @see #getRuleSetDisplayNames 1096 * @throws IllegalArgumentException if ruleSetName is not a valid rule set name for this format 1097 */ 1098 public String getRuleSetDisplayName(String ruleSetName, ULocale loc) { 1099 String[] rsnames = publicRuleSetNames; 1100 for (int ix = 0; ix < rsnames.length; ++ix) { 1101 if (rsnames[ix].equals(ruleSetName)) { 1102 String[] names = getNameListForLocale(loc); 1103 if (names != null) { 1104 return names[ix]; 1105 } 1106 return rsnames[ix].substring(1); 1107 } 1108 } 1109 throw new IllegalArgumentException("unrecognized rule set name: " + ruleSetName); 1110 } 1111 1112 /** 1113 * Return the rule set display name for the provided rule set in the current default <code>DISPLAY</code> locale. 1114 * @return the display name for the rule set 1115 * @see #getRuleSetDisplayName(String,ULocale) 1116 * @see Category#DISPLAY 1117 */ 1118 public String getRuleSetDisplayName(String ruleSetName) { 1119 return getRuleSetDisplayName(ruleSetName, ULocale.getDefault(Category.DISPLAY)); 1120 } 1121 1122 /** 1123 * Formats the specified number according to the specified rule set. 1124 * @param number The number to format. 1125 * @param ruleSet The name of the rule set to format the number with. 1126 * This must be the name of a valid public rule set for this formatter. 1127 * @return A textual representation of the number. 1128 */ 1129 public String format(double number, String ruleSet) throws IllegalArgumentException { 1130 if (ruleSet.startsWith("%%")) { 1131 throw new IllegalArgumentException("Can't use internal rule set"); 1132 } 1133 return adjustForContext(format(number, findRuleSet(ruleSet))); 1134 } 1135 1136 /** 1137 * Formats the specified number according to the specified rule set. 1138 * (If the specified rule set specifies a master ["x.0"] rule, this function 1139 * ignores it. Convert the number to a double first if you ned it.) This 1140 * function preserves all the precision in the long-- it doesn't convert it 1141 * to a double. 1142 * @param number The number to format. 1143 * @param ruleSet The name of the rule set to format the number with. 1144 * This must be the name of a valid public rule set for this formatter. 1145 * @return A textual representation of the number. 1146 */ 1147 public String format(long number, String ruleSet) throws IllegalArgumentException { 1148 if (ruleSet.startsWith("%%")) { 1149 throw new IllegalArgumentException("Can't use internal rule set"); 1150 } 1151 return adjustForContext(format(number, findRuleSet(ruleSet))); 1152 } 1153 1154 /** 1155 * Formats the specified number using the formatter's default rule set. 1156 * (The default rule set is the last public rule set defined in the description.) 1157 * @param number The number to format. 1158 * @param toAppendTo A StringBuffer that the result should be appended to. 1159 * @param ignore This function doesn't examine or update the field position. 1160 * @return toAppendTo 1161 */ 1162 @Override 1163 public StringBuffer format(double number, 1164 StringBuffer toAppendTo, 1165 FieldPosition ignore) { 1166 // this is one of the inherited format() methods. Since it doesn't 1167 // have a way to select the rule set to use, it just uses the 1168 // default one 1169 // Note, the BigInteger/BigDecimal methods below currently go through this. 1170 if (toAppendTo.length() == 0) { 1171 toAppendTo.append(adjustForContext(format(number, defaultRuleSet))); 1172 } else { 1173 // appending to other text, don't capitalize 1174 toAppendTo.append(format(number, defaultRuleSet)); 1175 } 1176 return toAppendTo; 1177 } 1178 1179 /** 1180 * Formats the specified number using the formatter's default rule set. 1181 * (The default rule set is the last public rule set defined in the description.) 1182 * (If the specified rule set specifies a master ["x.0"] rule, this function 1183 * ignores it. Convert the number to a double first if you ned it.) This 1184 * function preserves all the precision in the long-- it doesn't convert it 1185 * to a double. 1186 * @param number The number to format. 1187 * @param toAppendTo A StringBuffer that the result should be appended to. 1188 * @param ignore This function doesn't examine or update the field position. 1189 * @return toAppendTo 1190 */ 1191 @Override 1192 public StringBuffer format(long number, 1193 StringBuffer toAppendTo, 1194 FieldPosition ignore) { 1195 // this is one of the inherited format() methods. Since it doesn't 1196 // have a way to select the rule set to use, it just uses the 1197 // default one 1198 if (toAppendTo.length() == 0) { 1199 toAppendTo.append(adjustForContext(format(number, defaultRuleSet))); 1200 } else { 1201 // appending to other text, don't capitalize 1202 toAppendTo.append(format(number, defaultRuleSet)); 1203 } 1204 return toAppendTo; 1205 } 1206 1207 /** 1208 * <strong style="font-family: helvetica; color: red;">NEW</strong> 1209 * Implement android.icu.text.NumberFormat: 1210 * Format a BigInteger. 1211 */ 1212 @Override 1213 public StringBuffer format(BigInteger number, 1214 StringBuffer toAppendTo, 1215 FieldPosition pos) { 1216 return format(new android.icu.math.BigDecimal(number), toAppendTo, pos); 1217 } 1218 1219 /** 1220 * <strong style="font-family: helvetica; color: red;">NEW</strong> 1221 * Implement android.icu.text.NumberFormat: 1222 * Format a BigDecimal. 1223 */ 1224 @Override 1225 public StringBuffer format(java.math.BigDecimal number, 1226 StringBuffer toAppendTo, 1227 FieldPosition pos) { 1228 return format(new android.icu.math.BigDecimal(number), toAppendTo, pos); 1229 } 1230 1231 private static final android.icu.math.BigDecimal MAX_VALUE = android.icu.math.BigDecimal.valueOf(Long.MAX_VALUE); 1232 private static final android.icu.math.BigDecimal MIN_VALUE = android.icu.math.BigDecimal.valueOf(Long.MIN_VALUE); 1233 1234 /** 1235 * <strong style="font-family: helvetica; color: red;">NEW</strong> 1236 * Implement android.icu.text.NumberFormat: 1237 * Format a BigDecimal. 1238 */ 1239 @Override 1240 public StringBuffer format(android.icu.math.BigDecimal number, 1241 StringBuffer toAppendTo, 1242 FieldPosition pos) { 1243 if (MIN_VALUE.compareTo(number) > 0 || MAX_VALUE.compareTo(number) < 0) { 1244 // We're outside of our normal range that this framework can handle. 1245 // The DecimalFormat will provide more accurate results. 1246 return getDecimalFormat().format(number, toAppendTo, pos); 1247 } 1248 if (number.scale() == 0) { 1249 return format(number.longValue(), toAppendTo, pos); 1250 } 1251 return format(number.doubleValue(), toAppendTo, pos); 1252 } 1253 1254 /** 1255 * Parses the specified string, beginning at the specified position, according 1256 * to this formatter's rules. This will match the string against all of the 1257 * formatter's public rule sets and return the value corresponding to the longest 1258 * parseable substring. This function's behavior is affected by the lenient 1259 * parse mode. 1260 * @param text The string to parse 1261 * @param parsePosition On entry, contains the position of the first character 1262 * in "text" to examine. On exit, has been updated to contain the position 1263 * of the first character in "text" that wasn't consumed by the parse. 1264 * @return The number that corresponds to the parsed text. This will be an 1265 * instance of either Long or Double, depending on whether the result has a 1266 * fractional part. 1267 * @see #setLenientParseMode 1268 */ 1269 @Override 1270 public Number parse(String text, ParsePosition parsePosition) { 1271 1272 // parsePosition tells us where to start parsing. We copy the 1273 // text in the string from here to the end inro a new string, 1274 // and create a new ParsePosition and result variable to use 1275 // for the duration of the parse operation 1276 String workingText = text.substring(parsePosition.getIndex()); 1277 ParsePosition workingPos = new ParsePosition(0); 1278 Number tempResult = null; 1279 1280 // keep track of the largest number of characters consumed in 1281 // the various trials, and the result that corresponds to it 1282 Number result = NFRule.ZERO; 1283 ParsePosition highWaterMark = new ParsePosition(workingPos.getIndex()); 1284 1285 // iterate over the public rule sets (beginning with the default one) 1286 // and try parsing the text with each of them. Keep track of which 1287 // one consumes the most characters: that's the one that determines 1288 // the result we return 1289 for (int i = ruleSets.length - 1; i >= 0; i--) { 1290 // skip private or unparseable rule sets 1291 if (!ruleSets[i].isPublic() || !ruleSets[i].isParseable()) { 1292 continue; 1293 } 1294 1295 // try parsing the string with the rule set. If it gets past the 1296 // high-water mark, update the high-water mark and the result 1297 tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE); 1298 if (workingPos.getIndex() > highWaterMark.getIndex()) { 1299 result = tempResult; 1300 highWaterMark.setIndex(workingPos.getIndex()); 1301 } 1302 // commented out because this API on ParsePosition doesn't exist in 1.1.x 1303 // if (workingPos.getErrorIndex() > highWaterMark.getErrorIndex()) { 1304 // highWaterMark.setErrorIndex(workingPos.getErrorIndex()); 1305 // } 1306 1307 // if we manage to use up all the characters in the string, 1308 // we don't have to try any more rule sets 1309 if (highWaterMark.getIndex() == workingText.length()) { 1310 break; 1311 } 1312 1313 // otherwise, reset our internal parse position to the 1314 // beginning and try again with the next rule set 1315 workingPos.setIndex(0); 1316 } 1317 1318 // add the high water mark to our original parse position and 1319 // return the result 1320 parsePosition.setIndex(parsePosition.getIndex() + highWaterMark.getIndex()); 1321 // commented out because this API on ParsePosition doesn't exist in 1.1.x 1322 // if (highWaterMark.getIndex() == 0) { 1323 // parsePosition.setErrorIndex(parsePosition.getIndex() + highWaterMark.getErrorIndex()); 1324 // } 1325 return result; 1326 } 1327 1328 /** 1329 * Turns lenient parse mode on and off. 1330 * 1331 * When in lenient parse mode, the formatter uses an RbnfLenientScanner 1332 * for parsing the text. Lenient parsing is only in effect if a scanner 1333 * is set. If a provider is not set, and this is used for parsing, 1334 * a default scanner <code>RbnfLenientScannerProviderImpl</code> will be set if 1335 * it is available on the classpath. Otherwise this will have no effect. 1336 * 1337 * @param enabled If true, turns lenient-parse mode on; if false, turns it off. 1338 * @see RbnfLenientScanner 1339 * @see RbnfLenientScannerProvider 1340 */ 1341 public void setLenientParseMode(boolean enabled) { 1342 lenientParse = enabled; 1343 } 1344 1345 /** 1346 * Returns true if lenient-parse mode is turned on. Lenient parsing is off 1347 * by default. 1348 * @return true if lenient-parse mode is turned on. 1349 * @see #setLenientParseMode 1350 */ 1351 public boolean lenientParseEnabled() { 1352 return lenientParse; 1353 } 1354 1355 /** 1356 * Sets the provider for the lenient scanner. If this has not been set, 1357 * {@link #setLenientParseMode} 1358 * has no effect. This is necessary to decouple collation from format code. 1359 * @param scannerProvider the provider 1360 * @see #setLenientParseMode 1361 * @see #getLenientScannerProvider 1362 */ 1363 public void setLenientScannerProvider(RbnfLenientScannerProvider scannerProvider) { 1364 this.scannerProvider = scannerProvider; 1365 } 1366 1367 /** 1368 * Returns the lenient scanner provider. If none was set, and lenient parse is 1369 * enabled, this will attempt to instantiate a default scanner, setting it if 1370 * it was successful. Otherwise this returns false. 1371 * 1372 * @see #setLenientScannerProvider 1373 */ 1374 public RbnfLenientScannerProvider getLenientScannerProvider() { 1375 // there's a potential race condition if two threads try to set/get the scanner at 1376 // the same time, but you get what you get, and you shouldn't be using this from 1377 // multiple threads anyway. 1378 if (scannerProvider == null && lenientParse && !lookedForScanner) { 1379 try { 1380 lookedForScanner = true; 1381 Class<?> cls = Class.forName("android.icu.impl.text.RbnfScannerProviderImpl"); 1382 RbnfLenientScannerProvider provider = (RbnfLenientScannerProvider)cls.newInstance(); 1383 setLenientScannerProvider(provider); 1384 } 1385 catch (Exception e) { 1386 // any failure, we just ignore and return null 1387 } 1388 } 1389 1390 return scannerProvider; 1391 } 1392 1393 /** 1394 * Override the default rule set to use. If ruleSetName is null, reset 1395 * to the initial default rule set. 1396 * @param ruleSetName the name of the rule set, or null to reset the initial default. 1397 * @throws IllegalArgumentException if ruleSetName is not the name of a public ruleset. 1398 */ 1399 public void setDefaultRuleSet(String ruleSetName) { 1400 if (ruleSetName == null) { 1401 if (publicRuleSetNames.length > 0) { 1402 defaultRuleSet = findRuleSet(publicRuleSetNames[0]); 1403 } else { 1404 defaultRuleSet = null; 1405 int n = ruleSets.length; 1406 while (--n >= 0) { 1407 String currentName = ruleSets[n].getName(); 1408 if (currentName.equals("%spellout-numbering") || 1409 currentName.equals("%digits-ordinal") || 1410 currentName.equals("%duration")) { 1411 1412 defaultRuleSet = ruleSets[n]; 1413 return; 1414 } 1415 } 1416 1417 n = ruleSets.length; 1418 while (--n >= 0) { 1419 if (ruleSets[n].isPublic()) { 1420 defaultRuleSet = ruleSets[n]; 1421 break; 1422 } 1423 } 1424 } 1425 } else if (ruleSetName.startsWith("%%")) { 1426 throw new IllegalArgumentException("cannot use private rule set: " + ruleSetName); 1427 } else { 1428 defaultRuleSet = findRuleSet(ruleSetName); 1429 } 1430 } 1431 1432 /** 1433 * Return the name of the current default rule set. 1434 * @return the name of the current default rule set, if it is public, else the empty string. 1435 */ 1436 public String getDefaultRuleSetName() { 1437 if (defaultRuleSet != null && defaultRuleSet.isPublic()) { 1438 return defaultRuleSet.getName(); 1439 } 1440 return ""; 1441 } 1442 1443 /** 1444 * Sets the decimal format symbols used by this formatter. The formatter uses a copy of the 1445 * provided symbols. 1446 * 1447 * @param newSymbols desired DecimalFormatSymbols 1448 * @see DecimalFormatSymbols 1449 */ 1450 public void setDecimalFormatSymbols(DecimalFormatSymbols newSymbols) { 1451 if (newSymbols != null) { 1452 decimalFormatSymbols = (DecimalFormatSymbols) newSymbols.clone(); 1453 if (decimalFormat != null) { 1454 decimalFormat.setDecimalFormatSymbols(decimalFormatSymbols); 1455 } 1456 if (defaultInfinityRule != null) { 1457 defaultInfinityRule = null; 1458 getDefaultInfinityRule(); // Reset with the new DecimalFormatSymbols 1459 } 1460 if (defaultNaNRule != null) { 1461 defaultNaNRule = null; 1462 getDefaultNaNRule(); // Reset with the new DecimalFormatSymbols 1463 } 1464 1465 // Apply the new decimalFormatSymbols by reparsing the rulesets 1466 for (NFRuleSet ruleSet : ruleSets) { 1467 ruleSet.setDecimalFormatSymbols(decimalFormatSymbols); 1468 } 1469 } 1470 } 1471 1472 /** 1473 * <strong>[icu]</strong> Set a particular DisplayContext value in the formatter, 1474 * such as CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see 1475 * NumberFormat. 1476 * 1477 * @param context The DisplayContext value to set. 1478 */ 1479 // Here we override the NumberFormat implementation in order to 1480 // lazily initialize relevant items 1481 @Override 1482 public void setContext(DisplayContext context) { 1483 super.setContext(context); 1484 if (!capitalizationInfoIsSet && 1485 (context==DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU || context==DisplayContext.CAPITALIZATION_FOR_STANDALONE)) { 1486 initCapitalizationContextInfo(locale); 1487 capitalizationInfoIsSet = true; 1488 } 1489 if (capitalizationBrkIter == null && (context==DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1490 (context==DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForListOrMenu) || 1491 (context==DisplayContext.CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone) )) { 1492 capitalizationBrkIter = BreakIterator.getSentenceInstance(locale); 1493 } 1494 } 1495 1496 /** 1497 * Returns the rounding mode. 1498 * 1499 * @return A rounding mode, between <code>BigDecimal.ROUND_UP</code> and 1500 * <code>BigDecimal.ROUND_UNNECESSARY</code>. 1501 * @see #setRoundingMode 1502 * @see java.math.BigDecimal 1503 */ 1504 @Override 1505 public int getRoundingMode() { 1506 return roundingMode; 1507 } 1508 1509 /** 1510 * Sets the rounding mode. This has no effect unless the rounding increment is greater 1511 * than zero. 1512 * 1513 * @param roundingMode A rounding mode, between <code>BigDecimal.ROUND_UP</code> and 1514 * <code>BigDecimal.ROUND_UNNECESSARY</code>. 1515 * @exception IllegalArgumentException if <code>roundingMode</code> is unrecognized. 1516 * @see #getRoundingMode 1517 * @see java.math.BigDecimal 1518 */ 1519 @Override 1520 public void setRoundingMode(int roundingMode) { 1521 if (roundingMode < BigDecimal.ROUND_UP || roundingMode > BigDecimal.ROUND_UNNECESSARY) { 1522 throw new IllegalArgumentException("Invalid rounding mode: " + roundingMode); 1523 } 1524 1525 this.roundingMode = roundingMode; 1526 } 1527 1528 1529 //----------------------------------------------------------------------- 1530 // package-internal API 1531 //----------------------------------------------------------------------- 1532 1533 /** 1534 * Returns a reference to the formatter's default rule set. The default 1535 * rule set is the last public rule set in the description, or the one 1536 * most recently set by setDefaultRuleSet. 1537 * @return The formatter's default rule set. 1538 */ 1539 NFRuleSet getDefaultRuleSet() { 1540 return defaultRuleSet; 1541 } 1542 1543 /** 1544 * Returns the scanner to use for lenient parsing. The scanner is 1545 * provided by the provider. 1546 * @return The collator to use for lenient parsing, or null if lenient parsing 1547 * is turned off. 1548 */ 1549 RbnfLenientScanner getLenientScanner() { 1550 if (lenientParse) { 1551 RbnfLenientScannerProvider provider = getLenientScannerProvider(); 1552 if (provider != null) { 1553 return provider.get(locale, lenientParseRules); 1554 } 1555 } 1556 return null; 1557 } 1558 1559 /** 1560 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1561 * instances owned by this formatter. This object is lazily created: this function 1562 * creates it the first time it's called. 1563 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat 1564 * instances owned by this formatter. 1565 */ 1566 DecimalFormatSymbols getDecimalFormatSymbols() { 1567 // lazy-evaluate the DecimalFormatSymbols object. This object 1568 // is shared by all DecimalFormat instances belonging to this 1569 // formatter 1570 if (decimalFormatSymbols == null) { 1571 decimalFormatSymbols = new DecimalFormatSymbols(locale); 1572 } 1573 return decimalFormatSymbols; 1574 } 1575 1576 DecimalFormat getDecimalFormat() { 1577 if (decimalFormat == null) { 1578 // Don't use NumberFormat.getInstance, which can cause a recursive call 1579 String pattern = getPattern(locale, NUMBERSTYLE); 1580 decimalFormat = new DecimalFormat(pattern, getDecimalFormatSymbols()); 1581 } 1582 return decimalFormat; 1583 } 1584 1585 PluralFormat createPluralFormat(PluralRules.PluralType pluralType, String pattern) { 1586 return new PluralFormat(locale, pluralType, pattern, getDecimalFormat()); 1587 } 1588 1589 /** 1590 * Returns the default rule for infinity. This object is lazily created: this function 1591 * creates it the first time it's called. 1592 */ 1593 NFRule getDefaultInfinityRule() { 1594 if (defaultInfinityRule == null) { 1595 defaultInfinityRule = new NFRule(this, "Inf: " + getDecimalFormatSymbols().getInfinity()); 1596 } 1597 return defaultInfinityRule; 1598 } 1599 1600 /** 1601 * Returns the default rule for NaN. This object is lazily created: this function 1602 * creates it the first time it's called. 1603 */ 1604 NFRule getDefaultNaNRule() { 1605 if (defaultNaNRule == null) { 1606 defaultNaNRule = new NFRule(this, "NaN: " + getDecimalFormatSymbols().getNaN()); 1607 } 1608 return defaultNaNRule; 1609 } 1610 1611 //----------------------------------------------------------------------- 1612 // construction implementation 1613 //----------------------------------------------------------------------- 1614 1615 /** 1616 * This extracts the special information from the rule sets before the 1617 * main parsing starts. Extra whitespace must have already been removed 1618 * from the description. If found, the special information is removed from the 1619 * description and returned, otherwise the description is unchanged and null 1620 * is returned. Note: the trailing semicolon at the end of the special 1621 * rules is stripped. 1622 * @param description the rbnf description with extra whitespace removed 1623 * @param specialName the name of the special rule text to extract 1624 * @return the special rule text, or null if the rule was not found 1625 */ 1626 private String extractSpecial(StringBuilder description, String specialName) { 1627 String result = null; 1628 int lp = description.indexOf(specialName); 1629 if (lp != -1) { 1630 // we've got to make sure we're not in the middle of a rule 1631 // (where specialName would actually get treated as 1632 // rule text) 1633 if (lp == 0 || description.charAt(lp - 1) == ';') { 1634 // locate the beginning and end of the actual special 1635 // rules (there may be whitespace between the name and 1636 // the first token in the description) 1637 int lpEnd = description.indexOf(";%", lp); 1638 1639 if (lpEnd == -1) { 1640 lpEnd = description.length() - 1; // later we add 1 back to get the '%' 1641 } 1642 int lpStart = lp + specialName.length(); 1643 while (lpStart < lpEnd && 1644 PatternProps.isWhiteSpace(description.charAt(lpStart))) { 1645 ++lpStart; 1646 } 1647 1648 // copy out the special rules 1649 result = description.substring(lpStart, lpEnd); 1650 1651 // remove the special rule from the description 1652 description.delete(lp, lpEnd+1); // delete the semicolon but not the '%' 1653 } 1654 } 1655 return result; 1656 } 1657 1658 /** 1659 * This function parses the description and uses it to build all of 1660 * internal data structures that the formatter uses to do formatting 1661 * @param description The description of the formatter's desired behavior. 1662 * This is either passed in by the caller or loaded out of a resource 1663 * by one of the constructors, and is in the description format specified 1664 * in the class docs. 1665 */ 1666 private void init(String description, String[][] localizations) { 1667 initLocalizations(localizations); 1668 1669 // start by stripping the trailing whitespace from all the rules 1670 // (this is all the whitespace follwing each semicolon in the 1671 // description). This allows us to look for rule-set boundaries 1672 // by searching for ";%" without having to worry about whitespace 1673 // between the ; and the % 1674 StringBuilder descBuf = stripWhitespace(description); 1675 1676 // check to see if there's a set of lenient-parse rules. If there 1677 // is, pull them out into our temporary holding place for them, 1678 // and delete them from the description before the real description- 1679 // parsing code sees them 1680 1681 lenientParseRules = extractSpecial(descBuf, "%%lenient-parse:"); 1682 postProcessRules = extractSpecial(descBuf, "%%post-process:"); 1683 1684 // pre-flight parsing the description and count the number of 1685 // rule sets (";%" marks the end of one rule set and the beginning 1686 // of the next) 1687 int numRuleSets = 1; 1688 int p = 0; 1689 while ((p = descBuf.indexOf(";%", p)) != -1) { 1690 ++numRuleSets; 1691 p += 2; // Skip the length of ";%" 1692 } 1693 1694 // our rule list is an array of the appropriate size 1695 ruleSets = new NFRuleSet[numRuleSets]; 1696 ruleSetsMap = new HashMap<String, NFRuleSet>(numRuleSets * 2 + 1); 1697 defaultRuleSet = null; 1698 1699 // Used to count the number of public rule sets 1700 // Public rule sets have names that begin with % instead of %%. 1701 int publicRuleSetCount = 0; 1702 1703 // divide up the descriptions into individual rule-set descriptions 1704 // and store them in a temporary array. At each step, we also 1705 // new up a rule set, but all this does is initialize its name 1706 // and remove it from its description. We can't actually parse 1707 // the rest of the descriptions and finish initializing everything 1708 // because we have to know the names and locations of all the rule 1709 // sets before we can actually set everything up 1710 String[] ruleSetDescriptions = new String[numRuleSets]; 1711 1712 int curRuleSet = 0; 1713 int start = 0; 1714 1715 while (curRuleSet < ruleSets.length) { 1716 p = descBuf.indexOf(";%", start); 1717 if (p < 0) { 1718 p = descBuf.length() - 1; 1719 } 1720 ruleSetDescriptions[curRuleSet] = descBuf.substring(start, p + 1); 1721 NFRuleSet ruleSet = new NFRuleSet(this, ruleSetDescriptions, curRuleSet); 1722 ruleSets[curRuleSet] = ruleSet; 1723 String currentName = ruleSet.getName(); 1724 ruleSetsMap.put(currentName, ruleSet); 1725 if (!currentName.startsWith("%%")) { 1726 ++publicRuleSetCount; 1727 if (defaultRuleSet == null 1728 && currentName.equals("%spellout-numbering") 1729 || currentName.equals("%digits-ordinal") 1730 || currentName.equals("%duration")) 1731 { 1732 defaultRuleSet = ruleSet; 1733 } 1734 } 1735 ++curRuleSet; 1736 start = p + 1; 1737 } 1738 1739 // now we can take note of the formatter's default rule set, which 1740 // is the last public rule set in the description (it's the last 1741 // rather than the first so that a user can create a new formatter 1742 // from an existing formatter and change its default behavior just 1743 // by appending more rule sets to the end) 1744 1745 // {dlf} Initialization of a fraction rule set requires the default rule 1746 // set to be known. For purposes of initialization, this is always the 1747 // last public rule set, no matter what the localization data says. 1748 1749 // Set the default ruleset to the last public ruleset, unless one of the predefined 1750 // ruleset names %spellout-numbering, %digits-ordinal, or %duration is found 1751 1752 if (defaultRuleSet == null) { 1753 for (int i = ruleSets.length - 1; i >= 0; --i) { 1754 if (!ruleSets[i].getName().startsWith("%%")) { 1755 defaultRuleSet = ruleSets[i]; 1756 break; 1757 } 1758 } 1759 } 1760 if (defaultRuleSet == null) { 1761 defaultRuleSet = ruleSets[ruleSets.length - 1]; 1762 } 1763 1764 // finally, we can go back through the temporary descriptions 1765 // list and finish setting up the substructure 1766 for (int i = 0; i < ruleSets.length; i++) { 1767 ruleSets[i].parseRules(ruleSetDescriptions[i]); 1768 } 1769 1770 // Now that the rules are initialized, the 'real' default rule 1771 // set can be adjusted by the localization data. 1772 1773 // prepare an array of the proper size and copy the names into it 1774 String[] publicRuleSetTemp = new String[publicRuleSetCount]; 1775 publicRuleSetCount = 0; 1776 for (int i = ruleSets.length - 1; i >= 0; i--) { 1777 if (!ruleSets[i].getName().startsWith("%%")) { 1778 publicRuleSetTemp[publicRuleSetCount++] = ruleSets[i].getName(); 1779 } 1780 } 1781 1782 if (publicRuleSetNames != null) { 1783 // confirm the names, if any aren't in the rules, that's an error 1784 // it is ok if the rules contain public rule sets that are not in this list 1785 loop: for (int i = 0; i < publicRuleSetNames.length; ++i) { 1786 String name = publicRuleSetNames[i]; 1787 for (int j = 0; j < publicRuleSetTemp.length; ++j) { 1788 if (name.equals(publicRuleSetTemp[j])) { 1789 continue loop; 1790 } 1791 } 1792 throw new IllegalArgumentException("did not find public rule set: " + name); 1793 } 1794 1795 defaultRuleSet = findRuleSet(publicRuleSetNames[0]); // might be different 1796 } else { 1797 publicRuleSetNames = publicRuleSetTemp; 1798 } 1799 } 1800 1801 /** 1802 * Take the localizations array and create a Map from the locale strings to 1803 * the localization arrays. 1804 */ 1805 private void initLocalizations(String[][] localizations) { 1806 if (localizations != null) { 1807 publicRuleSetNames = localizations[0].clone(); 1808 1809 Map<String, String[]> m = new HashMap<String, String[]>(); 1810 for (int i = 1; i < localizations.length; ++i) { 1811 String[] data = localizations[i]; 1812 String loc = data[0]; 1813 String[] names = new String[data.length-1]; 1814 if (names.length != publicRuleSetNames.length) { 1815 throw new IllegalArgumentException("public name length: " + publicRuleSetNames.length + 1816 " != localized names[" + i + "] length: " + names.length); 1817 } 1818 System.arraycopy(data, 1, names, 0, names.length); 1819 m.put(loc, names); 1820 } 1821 1822 if (!m.isEmpty()) { 1823 ruleSetDisplayNames = m; 1824 } 1825 } 1826 } 1827 1828 /** 1829 * Set capitalizationForListOrMenu, capitalizationForStandAlone 1830 */ 1831 private void initCapitalizationContextInfo(ULocale theLocale) { 1832 ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, theLocale); 1833 try { 1834 ICUResourceBundle rdb = rb.getWithFallback("contextTransforms/number-spellout"); 1835 int[] intVector = rdb.getIntVector(); 1836 if (intVector.length >= 2) { 1837 capitalizationForListOrMenu = (intVector[0] != 0); 1838 capitalizationForStandAlone = (intVector[1] != 0); 1839 } 1840 } catch (MissingResourceException e) { 1841 // use default 1842 } 1843 } 1844 1845 /** 1846 * This function is used by init() to strip whitespace between rules (i.e., 1847 * after semicolons). 1848 * @param description The formatter description 1849 * @return The description with all the whitespace that follows semicolons 1850 * taken out. 1851 */ 1852 private StringBuilder stripWhitespace(String description) { 1853 // since we don't have a method that deletes characters (why?!!) 1854 // create a new StringBuffer to copy the text into 1855 StringBuilder result = new StringBuilder(); 1856 int descriptionLength = description.length(); 1857 1858 // iterate through the characters... 1859 int start = 0; 1860 while (start < descriptionLength) { 1861 // seek to the first non-whitespace character... 1862 while (start < descriptionLength 1863 && PatternProps.isWhiteSpace(description.charAt(start))) 1864 { 1865 ++start; 1866 } 1867 1868 //if the first non-whitespace character is semicolon, skip it and continue 1869 if (start < descriptionLength && description.charAt(start) == ';') { 1870 start += 1; 1871 continue; 1872 } 1873 1874 // locate the next semicolon in the text and copy the text from 1875 // our current position up to that semicolon into the result 1876 int p = description.indexOf(';', start); 1877 if (p == -1) { 1878 // or if we don't find a semicolon, just copy the rest of 1879 // the string into the result 1880 result.append(description.substring(start)); 1881 break; 1882 } 1883 else if (p < descriptionLength) { 1884 result.append(description.substring(start, p + 1)); 1885 start = p + 1; 1886 } 1887 else { 1888 // when we get here, we've seeked off the end of the string, and 1889 // we terminate the loop (we continue until *start* is -1 rather 1890 // than until *p* is -1, because otherwise we'd miss the last 1891 // rule in the description) 1892 break; 1893 } 1894 } 1895 return result; 1896 } 1897 1898 //----------------------------------------------------------------------- 1899 // formatting implementation 1900 //----------------------------------------------------------------------- 1901 1902 /** 1903 * Bottleneck through which all the public format() methods 1904 * that take a double pass. By the time we get here, we know 1905 * which rule set we're using to do the formatting. 1906 * @param number The number to format 1907 * @param ruleSet The rule set to use to format the number 1908 * @return The text that resulted from formatting the number 1909 */ 1910 private String format(double number, NFRuleSet ruleSet) { 1911 // all API format() routines that take a double vector through 1912 // here. Create an empty string buffer where the result will 1913 // be built, and pass it to the rule set (along with an insertion 1914 // position of 0 and the number being formatted) to the rule set 1915 // for formatting 1916 StringBuilder result = new StringBuilder(); 1917 if (getRoundingMode() != BigDecimal.ROUND_UNNECESSARY && !Double.isNaN(number) && !Double.isInfinite(number)) { 1918 // We convert to a string because BigDecimal insists on excessive precision. 1919 number = new BigDecimal(Double.toString(number)).setScale(getMaximumFractionDigits(), roundingMode).doubleValue(); 1920 } 1921 ruleSet.format(number, result, 0, 0); 1922 postProcess(result, ruleSet); 1923 return result.toString(); 1924 } 1925 1926 /** 1927 * Bottleneck through which all the public format() methods 1928 * that take a long pass. By the time we get here, we know 1929 * which rule set we're using to do the formatting. 1930 * @param number The number to format 1931 * @param ruleSet The rule set to use to format the number 1932 * @return The text that resulted from formatting the number 1933 */ 1934 private String format(long number, NFRuleSet ruleSet) { 1935 // all API format() routines that take a double vector through 1936 // here. We have these two identical functions-- one taking a 1937 // double and one taking a long-- the couple digits of precision 1938 // that long has but double doesn't (both types are 8 bytes long, 1939 // but double has to borrow some of the mantissa bits to hold 1940 // the exponent). 1941 // Create an empty string buffer where the result will 1942 // be built, and pass it to the rule set (along with an insertion 1943 // position of 0 and the number being formatted) to the rule set 1944 // for formatting 1945 StringBuilder result = new StringBuilder(); 1946 if (number == Long.MIN_VALUE) { 1947 // We can't handle this value right now. Provide an accurate default value. 1948 result.append(getDecimalFormat().format(Long.MIN_VALUE)); 1949 } 1950 else { 1951 ruleSet.format(number, result, 0, 0); 1952 } 1953 postProcess(result, ruleSet); 1954 return result.toString(); 1955 } 1956 1957 /** 1958 * Post-process the rules if we have a post-processor. 1959 */ 1960 private void postProcess(StringBuilder result, NFRuleSet ruleSet) { 1961 if (postProcessRules != null) { 1962 if (postProcessor == null) { 1963 int ix = postProcessRules.indexOf(";"); 1964 if (ix == -1) { 1965 ix = postProcessRules.length(); 1966 } 1967 String ppClassName = postProcessRules.substring(0, ix).trim(); 1968 try { 1969 Class<?> cls = Class.forName(ppClassName); 1970 postProcessor = (RBNFPostProcessor)cls.newInstance(); 1971 postProcessor.init(this, postProcessRules); 1972 } 1973 catch (Exception e) { 1974 // if debug, print it out 1975 if (DEBUG) System.out.println("could not locate " + ppClassName + ", error " + 1976 e.getClass().getName() + ", " + e.getMessage()); 1977 postProcessor = null; 1978 postProcessRules = null; // don't try again 1979 return; 1980 } 1981 } 1982 1983 postProcessor.process(result, ruleSet); 1984 } 1985 } 1986 1987 /** 1988 * Adjust capitalization of formatted result for display context 1989 */ 1990 private String adjustForContext(String result) { 1991 DisplayContext capitalization = getContext(DisplayContext.Type.CAPITALIZATION); 1992 if (capitalization != DisplayContext.CAPITALIZATION_NONE && result != null && result.length() > 0 1993 && UCharacter.isLowerCase(result.codePointAt(0))) 1994 { 1995 if ( capitalization==DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1996 (capitalization == DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForListOrMenu) || 1997 (capitalization == DisplayContext.CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone) ) { 1998 if (capitalizationBrkIter == null) { 1999 // should only happen when deserializing, etc. 2000 capitalizationBrkIter = BreakIterator.getSentenceInstance(locale); 2001 } 2002 return UCharacter.toTitleCase(locale, result, capitalizationBrkIter, 2003 UCharacter.TITLECASE_NO_LOWERCASE | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT); 2004 } 2005 } 2006 return result; 2007 } 2008 2009 /** 2010 * Returns the named rule set. Throws an IllegalArgumentException 2011 * if this formatter doesn't have a rule set with that name. 2012 * @param name The name of the desired rule set 2013 * @return The rule set with that name 2014 */ 2015 NFRuleSet findRuleSet(String name) throws IllegalArgumentException { 2016 NFRuleSet result = ruleSetsMap.get(name); 2017 if (result == null) { 2018 throw new IllegalArgumentException("No rule set named " + name); 2019 } 2020 return result; 2021 } 2022 } 2023