Home | History | Annotate | Download | only in simple
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 2007-2014, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  *******************************************************************************
      6  */
      7 
      8 package com.ibm.icu.simple;
      9 
     10 import java.io.IOException;
     11 import java.io.ObjectInputStream;
     12 import java.text.NumberFormat;
     13 import java.text.ParsePosition;
     14 import java.util.Locale;
     15 import java.util.Map;
     16 
     17 import com.ibm.icu.simple.PluralRules.FixedDecimal;
     18 import com.ibm.icu.simple.PluralRules.PluralType;
     19 import com.ibm.icu.text.MessagePattern;
     20 
     21 /**
     22  * <p>
     23  * <code>PluralFormat</code> supports the creation of internationalized
     24  * messages with plural inflection. It is based on <i>plural
     25  * selection</i>, i.e. the caller specifies messages for each
     26  * plural case that can appear in the user's language and the
     27  * <code>PluralFormat</code> selects the appropriate message based on
     28  * the number.
     29  * </p>
     30  * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
     31  * <p>
     32  * Different languages have different ways to inflect
     33  * plurals. Creating internationalized messages that include plural
     34  * forms is only feasible when the framework is able to handle plural
     35  * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
     36  * doesn't handle this well, because it attaches a number interval to
     37  * each message and selects the message whose interval contains a
     38  * given number. This can only handle a finite number of
     39  * intervals. But in some languages, like Polish, one plural case
     40  * applies to infinitely many intervals (e.g., the paucal case applies to
     41  * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
     42  * 14). Thus <code>ChoiceFormat</code> is not adequate.
     43  * </p><p>
     44  * <code>PluralFormat</code> deals with this by breaking the problem
     45  * into two parts:
     46  * <ul>
     47  * <li>It uses <code>PluralRules</code> that can define more complex
     48  *     conditions for a plural case than just a single interval. These plural
     49  *     rules define both what plural cases exist in a language, and to
     50  *     which numbers these cases apply.
     51  * <li>It provides predefined plural rules for many languages. Thus, the programmer
     52  *     need not worry about the plural cases of a language and
     53  *     does not have to define the plural cases; they can simply
     54  *     use the predefined keywords. The whole plural formatting of messages can
     55  *     be done using localized patterns from resource bundles. For predefined plural
     56  *     rules, see the CLDR <i>Language Plural Rules</i> page at
     57  *    http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
     58  * </ul>
     59  * </p>
     60  * <h4>Usage of <code>PluralFormat</code></h4>
     61  * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
     62  * with a <code>plural</code> argument type,
     63  * rather than using a stand-alone <code>PluralFormat</code>.
     64  * </p><p>
     65  * This discussion assumes that you use <code>PluralFormat</code> with
     66  * a predefined set of plural rules. You can create one using one of
     67  * the constructors that takes a <code>ULocale</code> object. To
     68  * specify the message pattern, you can either pass it to the
     69  * constructor or set it explicitly using the
     70  * <code>applyPattern()</code> method. The <code>format()</code>
     71  * method takes a number object and selects the message of the
     72  * matching plural case. This message will be returned.
     73  * </p>
     74  * <h5>Patterns and Their Interpretation</h5>
     75  * <p>
     76  * The pattern text defines the message output for each plural case of the
     77  * specified locale. Syntax:
     78  * <blockquote><pre>
     79  * pluralStyle = [offsetValue] (selector '{' message '}')+
     80  * offsetValue = "offset:" number
     81  * selector = explicitValue | keyword
     82  * explicitValue = '=' number  // adjacent, no white space in between
     83  * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
     84  * message: see {@link MessageFormat}
     85  * </pre></blockquote>
     86  * Pattern_White_Space between syntax elements is ignored, except
     87  * between the {curly braces} and their sub-message,
     88  * and between the '=' and the number of an explicitValue.
     89  *
     90  * </p><p>
     91  * There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
     92  * 'other'. You always have to define a message text for the default plural case
     93  * "<code>other</code>" which is contained in every rule set.
     94  * If you do not specify a message text for a particular plural case, the
     95  * message text of the plural case "<code>other</code>" gets assigned to this
     96  * plural case.
     97  * </p><p>
     98  * When formatting, the input number is first matched against the explicitValue clauses.
     99  * If there is no exact-number match, then a keyword is selected by calling
    100  * the <code>PluralRules</code> with the input number <em>minus the offset</em>.
    101  * (The offset defaults to 0 if it is omitted from the pattern string.)
    102  * If there is no clause with that keyword, then the "other" clauses is returned.
    103  * </p><p>
    104  * An unquoted pound sign (<code>#</code>) in the selected sub-message
    105  * itself (i.e., outside of arguments nested in the sub-message)
    106  * is replaced by the input number minus the offset.
    107  * The number-minus-offset value is formatted using a
    108  * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
    109  * need special number formatting, you have to use a <code>MessageFormat</code>
    110  * and explicitly specify a <code>NumberFormat</code> argument.
    111  * <strong>Note:</strong> That argument is formatting without subtracting the offset!
    112  * If you need a custom format and have a non-zero offset, then you need to pass the
    113  * number-minus-offset value as a separate parameter.
    114  * </p>
    115  * For a usage example, see the {@link MessageFormat} class documentation.
    116  *
    117  * <h4>Defining Custom Plural Rules</h4>
    118  * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
    119  * create a <code>PluralRules</code> object and pass it to
    120  * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
    121  * constructor, this locale will be used to format the number in the message
    122  * texts.
    123  * </p><p>
    124  * For more information about <code>PluralRules</code>, see
    125  * {@link PluralRules}.
    126  * </p>
    127  *
    128  * @author tschumann (Tim Schumann)
    129  * @stable ICU 3.8
    130  */
    131 public class PluralFormat /* extends UFormat */ {
    132     private static final long serialVersionUID = 1L;
    133 
    134     /**
    135      * The locale used for standard number formatting and getting the predefined
    136      * plural rules (if they were not defined explicitely).
    137      * @serial
    138      */
    139     private Locale locale_ = null;
    140 
    141     /**
    142      * The plural rules used for plural selection.
    143      * @serial
    144      */
    145     private PluralRules pluralRules = null;
    146 
    147     /**
    148      * The applied pattern string.
    149      * @serial
    150      */
    151     private String pattern = null;
    152 
    153     /**
    154      * The MessagePattern which contains the parsed structure of the pattern string.
    155      */
    156     transient private MessagePattern msgPattern;
    157 
    158     /**
    159      * Obsolete with use of MessagePattern since ICU 4.8. Used to be:
    160      * The format messages for each plural case. It is a mapping:
    161      *  <code>String</code>(plural case keyword) --&gt; <code>String</code>
    162      *  (message for this plural case).
    163      * @serial
    164      */
    165     private Map<String, String> parsedValues = null;
    166 
    167     /**
    168      * This <code>NumberFormat</code> is used for the standard formatting of
    169      * the number inserted into the message.
    170      * @serial
    171      */
    172     private NumberFormat numberFormat = null;
    173 
    174     /**
    175      * The offset to subtract before invoking plural rules.
    176      */
    177     transient private double offset = 0;
    178 
    179     /**
    180      * Creates a new cardinal-number <code>PluralFormat</code> for the default <code>FORMAT</code> locale.
    181      * This locale will be used to get the set of plural rules and for standard
    182      * number formatting.
    183      * @see Category#FORMAT
    184      * @stable ICU 3.8
    185      */
    186     public PluralFormat() {
    187         init(null, PluralType.CARDINAL, Locale.getDefault());  // Category.FORMAT
    188     }
    189 
    190     /**
    191      * Creates a new cardinal-number <code>PluralFormat</code> for a given locale.
    192      * @param locale the <code>PluralFormat</code> will be configured with
    193      *        rules for this locale. This locale will also be used for standard
    194      *        number formatting.
    195      * @stable ICU 3.8
    196      */
    197     public PluralFormat(Locale locale) {
    198         init(null, PluralType.CARDINAL, locale);
    199     }
    200 
    201     /**
    202      * Creates a new <code>PluralFormat</code> for the plural type.
    203      * The standard number formatting will be done using the given locale.
    204      * @param locale the default number formatting will be done using this
    205      *        locale.
    206      * @param type The plural type (e.g., cardinal or ordinal).
    207      * @stable ICU 50
    208      */
    209     public PluralFormat(Locale locale, PluralType type) {
    210         init(null, type, locale);
    211     }
    212 
    213     /*
    214      * Initializes the <code>PluralRules</code> object.
    215      * Postcondition:<br/>
    216      *   <code>ulocale</code>    :  is <code>locale</code><br/>
    217      *   <code>pluralRules</code>:  if <code>rules</code> != <code>null</code>
    218      *                              it's set to rules, otherwise it is the
    219      *                              predefined plural rule set for the locale
    220      *                              <code>ulocale</code>.<br/>
    221      *   <code>parsedValues</code>: is <code>null</code><br/>
    222      *   <code>pattern</code>:      is <code>null</code><br/>
    223      *   <code>numberFormat</code>: a <code>NumberFormat</code> for the locale
    224      *                              <code>ulocale</code>.
    225      */
    226     private void init(PluralRules rules, PluralType type, Locale locale) {
    227         locale_ = locale;
    228         pluralRules = (rules == null) ? PluralRules.forLocale(locale, type)
    229                                       : rules;
    230         resetPattern();
    231         numberFormat = NumberFormat.getInstance(locale);
    232     }
    233 
    234     private void resetPattern() {
    235         pattern = null;
    236         if(msgPattern != null) {
    237             msgPattern.clear();
    238         }
    239         offset = 0;
    240     }
    241 
    242     /**
    243      * Sets the pattern used by this plural format.
    244      * The method parses the pattern and creates a map of format strings
    245      * for the plural rules.
    246      * Patterns and their interpretation are specified in the class description.
    247      *
    248      * @param pattern the pattern for this plural format.
    249      * @throws IllegalArgumentException if the pattern is invalid.
    250      * @stable ICU 3.8
    251      */
    252     public void applyPattern(String pattern) {
    253         this.pattern = pattern;
    254         if (msgPattern == null) {
    255             msgPattern = new MessagePattern();
    256         }
    257         try {
    258             msgPattern.parsePluralStyle(pattern);
    259             offset = msgPattern.getPluralOffset(0);
    260         } catch(RuntimeException e) {
    261             resetPattern();
    262             throw e;
    263         }
    264     }
    265 
    266     /**
    267      * Returns the pattern for this PluralFormat.
    268      *
    269      * @return the pattern string
    270      * @stable ICU 4.2
    271      */
    272     public String toPattern() {
    273         return pattern;
    274     }
    275 
    276     /**
    277      * Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
    278      * @param pattern A MessagePattern.
    279      * @param partIndex the index of the first PluralFormat argument style part.
    280      * @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
    281      * @param context worker object for the selector.
    282      * @param number a number to be matched to one of the PluralFormat argument's explicit values,
    283      *        or mapped via the PluralSelector.
    284      * @return the sub-message start part index.
    285      */
    286     /*package*/ static int findSubMessage(
    287             MessagePattern pattern, int partIndex,
    288             PluralSelector selector, Object context, double number) {
    289         int count=pattern.countParts();
    290         double offset;
    291         MessagePattern.Part part=pattern.getPart(partIndex);
    292         if(part.getType().hasNumericValue()) {
    293             offset=pattern.getNumericValue(part);
    294             ++partIndex;
    295         } else {
    296             offset=0;
    297         }
    298         // The keyword is null until we need to match against a non-explicit, not-"other" value.
    299         // Then we get the keyword from the selector.
    300         // (In other words, we never call the selector if we match against an explicit value,
    301         // or if the only non-explicit keyword is "other".)
    302         String keyword=null;
    303         // When we find a match, we set msgStart>0 and also set this boolean to true
    304         // to avoid matching the keyword again (duplicates are allowed)
    305         // while we continue to look for an explicit-value match.
    306         boolean haveKeywordMatch=false;
    307         // msgStart is 0 until we find any appropriate sub-message.
    308         // We remember the first "other" sub-message if we have not seen any
    309         // appropriate sub-message before.
    310         // We remember the first matching-keyword sub-message if we have not seen
    311         // one of those before.
    312         // (The parser allows [does not check for] duplicate keywords.
    313         // We just have to make sure to take the first one.)
    314         // We avoid matching the keyword twice by also setting haveKeywordMatch=true
    315         // at the first keyword match.
    316         // We keep going until we find an explicit-value match or reach the end of the plural style.
    317         int msgStart=0;
    318         // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
    319         // until ARG_LIMIT or end of plural-only pattern.
    320         do {
    321             part=pattern.getPart(partIndex++);
    322             MessagePattern.Part.Type type=part.getType();
    323             if(type==MessagePattern.Part.Type.ARG_LIMIT) {
    324                 break;
    325             }
    326             assert type==MessagePattern.Part.Type.ARG_SELECTOR;
    327             // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
    328             if(pattern.getPartType(partIndex).hasNumericValue()) {
    329                 // explicit value like "=2"
    330                 part=pattern.getPart(partIndex++);
    331                 if(number==pattern.getNumericValue(part)) {
    332                     // matches explicit value
    333                     return partIndex;
    334                 }
    335             } else if(!haveKeywordMatch) {
    336                 // plural keyword like "few" or "other"
    337                 // Compare "other" first and call the selector if this is not "other".
    338                 if(pattern.partSubstringMatches(part, "other")) {
    339                     if(msgStart==0) {
    340                         msgStart=partIndex;
    341                         if(keyword!=null && keyword.equals("other")) {
    342                             // This is the first "other" sub-message,
    343                             // and the selected keyword is also "other".
    344                             // Do not match "other" again.
    345                             haveKeywordMatch=true;
    346                         }
    347                     }
    348                 } else {
    349                     if(keyword==null) {
    350                         keyword=selector.select(context, number-offset);
    351                         if(msgStart!=0 && keyword.equals("other")) {
    352                             // We have already seen an "other" sub-message.
    353                             // Do not match "other" again.
    354                             haveKeywordMatch=true;
    355                             // Skip keyword matching but do getLimitPartIndex().
    356                         }
    357                     }
    358                     if(!haveKeywordMatch && pattern.partSubstringMatches(part, keyword)) {
    359                         // keyword matches
    360                         msgStart=partIndex;
    361                         // Do not match this keyword again.
    362                         haveKeywordMatch=true;
    363                     }
    364                 }
    365             }
    366             partIndex=pattern.getLimitPartIndex(partIndex);
    367         } while(++partIndex<count);
    368         return msgStart;
    369     }
    370 
    371     /**
    372      * Interface for selecting PluralFormat keywords for numbers.
    373      * The PluralRules class was intended to implement this interface,
    374      * but there is no public API that uses a PluralSelector,
    375      * only MessageFormat and PluralFormat have PluralSelector implementations.
    376      * Therefore, PluralRules is not marked to implement this non-public interface,
    377      * to avoid confusing users.
    378      * @internal
    379      */
    380     /*package*/ interface PluralSelector {
    381         /**
    382          * Given a number, returns the appropriate PluralFormat keyword.
    383          *
    384          * @param context worker object for the selector.
    385          * @param number The number to be plural-formatted.
    386          * @return The selected PluralFormat keyword.
    387          */
    388         public String select(Object context, double number);
    389     }
    390 
    391     // See PluralSelector:
    392     // We could avoid this adapter class if we made PluralSelector public
    393     // (or at least publicly visible) and had PluralRules implement PluralSelector.
    394     private final class PluralSelectorAdapter implements PluralSelector {
    395         public String select(Object context, double number) {
    396             FixedDecimal dec = (FixedDecimal) context;
    397             assert dec.source == number;
    398             return pluralRules.select(dec);
    399         }
    400     }
    401     transient private PluralSelectorAdapter pluralRulesWrapper = new PluralSelectorAdapter();
    402 
    403     /**
    404      * This method is not yet supported by <code>PluralFormat</code>.
    405      * @param text the string to be parsed.
    406      * @param parsePosition defines the position where parsing is to begin,
    407      * and upon return, the position where parsing left off.  If the position
    408      * has not changed upon return, then parsing failed.
    409      * @return nothing because this method is not yet implemented.
    410      * @throws UnsupportedOperationException will always be thrown by this method.
    411      * @stable ICU 3.8
    412      */
    413     public Number parse(String text, ParsePosition parsePosition) {
    414         throw new UnsupportedOperationException();
    415     }
    416 
    417     /**
    418      * This method is not yet supported by <code>PluralFormat</code>.
    419      * @param source the string to be parsed.
    420      * @param pos defines the position where parsing is to begin,
    421      * and upon return, the position where parsing left off.  If the position
    422      * has not changed upon return, then parsing failed.
    423      * @return nothing because this method is not yet implemented.
    424      * @throws UnsupportedOperationException will always be thrown by this method.
    425      * @stable ICU 3.8
    426      */
    427     public Object parseObject(String source, ParsePosition pos) {
    428         throw new UnsupportedOperationException();
    429     }
    430 
    431     /**
    432      * Returns true if this equals the provided PluralFormat.
    433      * @param rhs the PluralFormat to compare against
    434      * @return true if this equals rhs
    435      * @stable ICU 3.8
    436      */
    437     public boolean equals(PluralFormat rhs) {
    438         return equals((Object)rhs);
    439     }
    440 
    441     /**
    442      * {@inheritDoc}
    443      * @stable ICU 3.8
    444      */
    445     @Override
    446     public int hashCode() {
    447         return pluralRules.hashCode() ^ parsedValues.hashCode();
    448     }
    449 
    450     /**
    451      * {@inheritDoc}
    452      * @stable ICU 3.8
    453      */
    454     @Override
    455     public String toString() {
    456         StringBuilder buf = new StringBuilder();
    457         buf.append("locale=" + locale_);
    458         buf.append(", rules='" + pluralRules + "'");
    459         buf.append(", pattern='" + pattern + "'");
    460         buf.append(", format='" + numberFormat + "'");
    461         return buf.toString();
    462     }
    463 
    464     private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
    465         in.defaultReadObject();
    466         pluralRulesWrapper = new PluralSelectorAdapter();
    467         // Ignore the parsedValues from an earlier class version (before ICU 4.8)
    468         // and rebuild the msgPattern.
    469         parsedValues = null;
    470         if (pattern != null) {
    471             applyPattern(pattern);
    472         }
    473     }
    474 }
    475