Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2010, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 
      8 * File PLURFMT.H
      9 *
     10 * Modification History:*
     11 *   Date        Name        Description
     12 *
     13 ********************************************************************************
     14 */
     15 
     16 #ifndef PLURFMT
     17 #define PLURFMT
     18 
     19 #include "unicode/utypes.h"
     20 
     21 /**
     22  * \file
     23  * \brief C++ API: PluralFormat object
     24  */
     25 
     26 #if !UCONFIG_NO_FORMATTING
     27 
     28 #include "unicode/numfmt.h"
     29 #include "unicode/plurrule.h"
     30 
     31 U_NAMESPACE_BEGIN
     32 
     33 class Hashtable;
     34 
     35 /**
     36  * <p>
     37  * <code>PluralFormat</code> supports the creation of internationalized
     38  * messages with plural inflection. It is based on <i>plural
     39  * selection</i>, i.e. the caller specifies messages for each
     40  * plural case that can appear in the users language and the
     41  * <code>PluralFormat</code> selects the appropriate message based on
     42  * the number.
     43  * </p>
     44  * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
     45  * <p>
     46  * Different languages have different ways to inflect
     47  * plurals. Creating internationalized messages that include plural
     48  * forms is only feasible when the framework is able to handle plural
     49  * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
     50  * doesn't handle this well, because it attaches a number interval to
     51  * each message and selects the message whose interval contains a
     52  * given number. This can only handle a finite number of
     53  * intervals. But in some languages, like Polish, one plural case
     54  * applies to infinitely many intervals (e.g., paucal applies to
     55  * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
     56  * 14). Thus <code>ChoiceFormat</code> is not adequate.
     57  * </p><p>
     58  * <code>PluralFormat</code> deals with this by breaking the problem
     59  * into two parts:
     60  * <ul>
     61  * <li>It uses <code>PluralRules</code> that can define more complex
     62  *     conditions for a plural case than just a single interval. These plural
     63  *     rules define both what plural cases exist in a language, and to
     64  *     which numbers these cases apply.
     65  * <li>It provides predefined plural rules for many locales. Thus, the programmer
     66  *     need not worry about the plural cases of a language. On the flip side,
     67  *     the localizer does not have to specify the plural cases; he can simply
     68  *     use the predefined keywords. The whole plural formatting of messages can
     69  *     be done using localized patterns from resource bundles. For predefined plural
     70  *     rules, see CLDR <i>Language Plural Rules</i> page at
     71  *    http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
     72  * </ul>
     73  * </p>
     74  * <h4>Usage of <code>PluralFormat</code></h4>
     75  * <p>
     76  * This discussion assumes that you use <code>PluralFormat</code> with
     77  * a predefined set of plural rules. You can create one using one of
     78  * the constructors that takes a <code>locale</code> object. To
     79  * specify the message pattern, you can either pass it to the
     80  * constructor or set it explicitly using the
     81  * <code>applyPattern()</code> method. The <code>format()</code>
     82  * method takes a number object and selects the message of the
     83  * matching plural case. This message will be returned.
     84  * </p>
     85  * <h5>Patterns and Their Interpretation</h5>
     86  * <p>
     87  * The pattern text defines the message output for each plural case of the
     88  * used locale. The pattern is a sequence of
     89  * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
     90  * space characters. Each clause assigns the message <code><i>message</i></code>
     91  * to the plural case identified by <code><i>caseKeyword</i></code>.
     92  * </p><p>
     93  * There are 6 predefined casekeyword in ICU - 'zero', 'one', 'two', 'few', 'many' and
     94  * 'other'. You always have to define a message text for the default plural case
     95  * "<code>other</code>" which is contained in every rule set. If the plural
     96  * rules of the <code>PluralFormat</code> object do not contain a plural case
     97  * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING
     98  * will be set to status.
     99  * If you do not specify a message text for a particular plural case, the
    100  * message text of the plural case "<code>other</code>" gets assigned to this
    101  * plural case. If you specify more than one message for the same plural case,
    102  * U_DUPLICATE_KEYWORD will be set to status.
    103  * <br>
    104  * Spaces between <code><i>caseKeyword</i></code> and
    105  * <code><i>message</i></code>  will be ignored; spaces within
    106  * <code><i>message</i></code> will be preserved.
    107  * </p><p>
    108  * The message text for a particular plural case may contain other message
    109  * format patterns. <code>PluralFormat</code> preserves these so that you
    110  * can use the strings produced by <code>PluralFormat</code> with other
    111  * formatters. If you are using <code>PluralFormat</code> inside a
    112  * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
    113  * automatically evaluate the resulting format pattern.<br>
    114  * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
    115  * in message texts to define a nested format pattern.<br>
    116  * The pound sign (<code>#</code>) will be interpreted as the number placeholder
    117  * in the message text, if it is not contained in curly braces (to preserve
    118  * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
    119  * replace each of those pound signs by the number passed to the
    120  * <code>format()</code> method. It will be formatted using a
    121  * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
    122  * need special number formatting, you have to explicitly specify a
    123  * <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
    124  * </p>
    125  * Example
    126  * <pre>
    127  * \code
    128  * UErrorCode status = U_ZERO_ERROR;
    129  * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural,
    130  *   one{{0, number, C''est #,##0.0#  fichier}} other {Ce sont # fichiers}} dans la liste."),
    131  *   Locale("fr"), status);
    132  * if (U_FAILURE(status)) {
    133  *     return;
    134  * }
    135  * Formattable args1[] = {(int32_t)0};
    136  * Formattable args2[] = {(int32_t)3};
    137  * FieldPosition ignore(FieldPosition::DONT_CARE);
    138  * UnicodeString result;
    139  * msgFmt->format(args1, 1, result, ignore, status);
    140  * cout << result << endl;
    141  * result.remove();
    142  * msgFmt->format(args2, 1, result, ignore, status);
    143  * cout << result << endl;
    144  * \endcode
    145  * </pre>
    146  * Produces the output:<br>
    147  * <code>C'est 0,0 fichier dans la liste.</code><br>
    148  * <code>Ce sont 3 fichiers dans la liste.</code>
    149  * <p>
    150  * <strong>Note:</strong><br>
    151  *   Currently <code>PluralFormat</code>
    152  *   does not make use of quotes like <code>MessageFormat</code>.
    153  *   If you use plural format strings with <code>MessageFormat</code> and want
    154  *   to use a quote sign <code>'</code>, you have to write <code>''</code>.
    155  *   <code>MessageFormat</code> unquotes this pattern and  passes the unquoted
    156  *   pattern to <code>PluralFormat</code>. It's a bit trickier if you use
    157  *   nested formats that do quoting. In the example above, we wanted to insert
    158  *   <code>'</code> in the number format pattern. Since
    159  *   <code>NumberFormat</code> supports quotes, we had to insert
    160  *   <code>''</code>. But since <code>MessageFormat</code> unquotes the
    161  *   pattern before it gets passed to <code>PluralFormat</code>, we have to
    162  *   double these quotes, i.e. write <code>''''</code>.
    163  * </p>
    164  * <h4>Defining Custom Plural Rules</h4>
    165  * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
    166  * create a <code>PluralRules</code> object and pass it to
    167  * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
    168  * constructor, this locale will be used to format the number in the message
    169  * texts.
    170  * </p><p>
    171  * For more information about <code>PluralRules</code>, see
    172  * {@link PluralRules}.
    173  * </p>
    174  *
    175  * ported from Java
    176  * @stable ICU 4.0
    177  */
    178 
    179 class U_I18N_API PluralFormat : public Format {
    180 public:
    181 
    182     /**
    183      * Creates a new <code>PluralFormat</code> for the default locale.
    184      * This locale will be used to get the set of plural rules and for standard
    185      * number formatting.
    186      * @param status  output param set to success/failure code on exit, which
    187      *                must not indicate a failure before the function call.
    188      * @stable ICU 4.0
    189      */
    190     PluralFormat(UErrorCode& status);
    191 
    192     /**
    193      * Creates a new <code>PluralFormat</code> for a given locale.
    194      * @param locale the <code>PluralFormat</code> will be configured with
    195      *               rules for this locale. This locale will also be used for
    196      *               standard number formatting.
    197      * @param status output param set to success/failure code on exit, which
    198      *               must not indicate a failure before the function call.
    199      * @stable ICU 4.0
    200      */
    201     PluralFormat(const Locale& locale, UErrorCode& status);
    202 
    203     /**
    204      * Creates a new <code>PluralFormat</code> for a given set of rules.
    205      * The standard number formatting will be done using the default locale.
    206      * @param rules   defines the behavior of the <code>PluralFormat</code>
    207      *                object.
    208      * @param status  output param set to success/failure code on exit, which
    209      *                must not indicate a failure before the function call.
    210      * @stable ICU 4.0
    211      */
    212     PluralFormat(const PluralRules& rules, UErrorCode& status);
    213 
    214     /**
    215      * Creates a new <code>PluralFormat</code> for a given set of rules.
    216      * The standard number formatting will be done using the given locale.
    217      * @param locale  the default number formatting will be done using this
    218      *                locale.
    219      * @param rules   defines the behavior of the <code>PluralFormat</code>
    220      *                object.
    221      * @param status  output param set to success/failure code on exit, which
    222      *                must not indicate a failure before the function call.
    223      * @stable ICU 4.0
    224      */
    225     PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status);
    226 
    227     /**
    228      * Creates a new <code>PluralFormat</code> for a given pattern string.
    229      * The default locale will be used to get the set of plural rules and for
    230      * standard number formatting.
    231      * @param  pattern the pattern for this <code>PluralFormat</code>.
    232      *                 errors are returned to status if the pattern is invalid.
    233      * @param status   output param set to success/failure code on exit, which
    234      *                 must not indicate a failure before the function call.
    235      * @stable ICU 4.0
    236      */
    237     PluralFormat(const UnicodeString& pattern, UErrorCode& status);
    238 
    239     /**
    240      * Creates a new <code>PluralFormat</code> for a given pattern string and
    241      * locale.
    242      * The locale will be used to get the set of plural rules and for
    243      * standard number formatting.
    244      * @param locale   the <code>PluralFormat</code> will be configured with
    245      *                 rules for this locale. This locale will also be used for
    246      *                 standard number formatting.
    247      * @param pattern  the pattern for this <code>PluralFormat</code>.
    248      *                 errors are returned to status if the pattern is invalid.
    249      * @param status   output param set to success/failure code on exit, which
    250      *                 must not indicate a failure before the function call.
    251      * @stable ICU 4.0
    252      */
    253     PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status);
    254 
    255     /**
    256      * Creates a new <code>PluralFormat</code> for a given set of rules, a
    257      * pattern and a locale.
    258      * @param rules    defines the behavior of the <code>PluralFormat</code>
    259      *                 object.
    260      * @param pattern  the pattern for this <code>PluralFormat</code>.
    261      *                 errors are returned to status if the pattern is invalid.
    262      * @param status   output param set to success/failure code on exit, which
    263      *                 must not indicate a failure before the function call.
    264      * @stable ICU 4.0
    265      */
    266     PluralFormat(const PluralRules& rules,
    267                  const UnicodeString& pattern,
    268                  UErrorCode& status);
    269 
    270     /**
    271      * Creates a new <code>PluralFormat</code> for a given set of rules, a
    272      * pattern and a locale.
    273      * @param locale  the <code>PluralFormat</code> will be configured with
    274      *                rules for this locale. This locale will also be used for
    275      *                standard number formatting.
    276      * @param rules   defines the behavior of the <code>PluralFormat</code>
    277      *                object.
    278      * @param pattern the pattern for this <code>PluralFormat</code>.
    279      *                errors are returned to status if the pattern is invalid.
    280      * @param status  output param set to success/failure code on exit, which
    281      *                must not indicate a failure before the function call.
    282      * @stable ICU 4.0
    283      */
    284     PluralFormat(const Locale& locale,
    285                  const PluralRules& rules,
    286                  const UnicodeString& pattern,
    287                  UErrorCode& status);
    288 
    289     /**
    290       * copy constructor.
    291       * @stable ICU 4.0
    292       */
    293     PluralFormat(const PluralFormat& other);
    294 
    295     /**
    296      * Destructor.
    297      * @stable ICU 4.0
    298      */
    299     virtual ~PluralFormat();
    300 
    301     /**
    302      * Sets the pattern used by this plural format.
    303      * The method parses the pattern and creates a map of format strings
    304      * for the plural rules.
    305      * Patterns and their interpretation are specified in the class description.
    306      *
    307      * @param pattern the pattern for this plural format
    308      *                errors are returned to status if the pattern is invalid.
    309      * @param status  output param set to success/failure code on exit, which
    310      *                must not indicate a failure before the function call.
    311      * @stable ICU 4.0
    312      */
    313     void applyPattern(const UnicodeString& pattern, UErrorCode& status);
    314 
    315 
    316     using Format::format;
    317 
    318     /**
    319      * Formats a plural message for a given number.
    320      *
    321      * @param number  a number for which the plural message should be formatted
    322      *                for. If no pattern has been applied to this
    323      *                <code>PluralFormat</code> object yet, the formatted number
    324      *                will be returned.
    325      * @param status  output param set to success/failure code on exit, which
    326      *                must not indicate a failure before the function call.
    327      * @return        the string containing the formatted plural message.
    328      * @stable ICU 4.0
    329      */
    330     UnicodeString format(int32_t number, UErrorCode& status) const;
    331 
    332     /**
    333      * Formats a plural message for a given number.
    334      *
    335      * @param number  a number for which the plural message should be formatted
    336      *                for. If no pattern has been applied to this
    337      *                PluralFormat object yet, the formatted number
    338      *                will be returned.
    339      * @param status  output param set to success or failure code on exit, which
    340      *                must not indicate a failure before the function call.
    341      * @return        the string containing the formatted plural message.
    342      * @stable ICU 4.0
    343      */
    344     UnicodeString format(double number, UErrorCode& status) const;
    345 
    346     /**
    347      * Formats a plural message for a given number.
    348      *
    349      * @param number   a number for which the plural message should be formatted
    350      *                 for. If no pattern has been applied to this
    351      *                 <code>PluralFormat</code> object yet, the formatted number
    352      *                 will be returned.
    353      * @param appendTo output parameter to receive result.
    354      *                 result is appended to existing contents.
    355      * @param pos      On input: an alignment field, if desired.
    356      *                 On output: the offsets of the alignment field.
    357      * @param status   output param set to success/failure code on exit, which
    358      *                 must not indicate a failure before the function call.
    359      * @return         the string containing the formatted plural message.
    360      * @stable ICU 4.0
    361      */
    362     UnicodeString& format(int32_t number,
    363                           UnicodeString& appendTo,
    364                           FieldPosition& pos,
    365                           UErrorCode& status) const;
    366 
    367     /**
    368      * Formats a plural message for a given number.
    369      *
    370      * @param number   a number for which the plural message should be formatted
    371      *                 for. If no pattern has been applied to this
    372      *                 PluralFormat object yet, the formatted number
    373      *                 will be returned.
    374      * @param appendTo output parameter to receive result.
    375      *                 result is appended to existing contents.
    376      * @param pos      On input: an alignment field, if desired.
    377      *                 On output: the offsets of the alignment field.
    378      * @param status   output param set to success/failure code on exit, which
    379      *                 must not indicate a failure before the function call.
    380      * @return         the string containing the formatted plural message.
    381      * @stable ICU 4.0
    382      */
    383     UnicodeString& format(double number,
    384                           UnicodeString& appendTo,
    385                           FieldPosition& pos,
    386                           UErrorCode& status) const;
    387 
    388     /**
    389      * Sets the locale used by this <code>PluraFormat</code> object.
    390      * Note: Calling this method resets this <code>PluraFormat</code> object,
    391      *     i.e., a pattern that was applied previously will be removed,
    392      *     and the NumberFormat is set to the default number format for
    393      *     the locale.  The resulting format behaves the same as one
    394      *     constructed from {@link #PluralFormat(const Locale& locale, UErrorCode& status)}.
    395      * @param locale  the <code>locale</code> to use to configure the formatter.
    396      * @param status  output param set to success/failure code on exit, which
    397      *                must not indicate a failure before the function call.
    398      * @stable ICU 4.0
    399      */
    400     void setLocale(const Locale& locale, UErrorCode& status);
    401 
    402     /**
    403       * Sets the number format used by this formatter.  You only need to
    404       * call this if you want a different number format than the default
    405       * formatter for the locale.
    406       * @param format  the number format to use.
    407       * @param status  output param set to success/failure code on exit, which
    408       *                must not indicate a failure before the function call.
    409       * @stable ICU 4.0
    410       */
    411     void setNumberFormat(const NumberFormat* format, UErrorCode& status);
    412 
    413     /**
    414        * Assignment operator
    415        *
    416        * @param other    the PluralFormat object to copy from.
    417        * @stable ICU 4.0
    418        */
    419     PluralFormat& operator=(const PluralFormat& other);
    420 
    421     /**
    422       * Return true if another object is semantically equal to this one.
    423       *
    424       * @param other    the PluralFormat object to be compared with.
    425       * @return         true if other is semantically equal to this.
    426       * @stable ICU 4.0
    427       */
    428     virtual UBool operator==(const Format& other) const;
    429 
    430     /**
    431      * Return true if another object is semantically unequal to this one.
    432      *
    433      * @param other    the PluralFormat object to be compared with.
    434      * @return         true if other is semantically unequal to this.
    435      * @stable ICU 4.0
    436      */
    437     virtual UBool operator!=(const Format& other) const;
    438 
    439     /**
    440      * Clones this Format object polymorphically.  The caller owns the
    441      * result and should delete it when done.
    442      * @stable ICU 4.0
    443      */
    444     virtual Format* clone(void) const;
    445 
    446     /**
    447     * Redeclared Format method.
    448     *
    449     * @param obj       The object to be formatted into a string.
    450     * @param appendTo  output parameter to receive result.
    451     *                  Result is appended to existing contents.
    452     * @param pos       On input: an alignment field, if desired.
    453     *                  On output: the offsets of the alignment field.
    454     * @param status    output param filled with success/failure status.
    455     * @return          Reference to 'appendTo' parameter.
    456     * @stable ICU 4.0
    457     */
    458    UnicodeString& format(const Formattable& obj,
    459                          UnicodeString& appendTo,
    460                          FieldPosition& pos,
    461                          UErrorCode& status) const;
    462 
    463    /**
    464     * Returns the pattern from applyPattern() or constructor().
    465     *
    466     * @param  appendTo  output parameter to receive result.
    467      *                  Result is appended to existing contents.
    468     * @return the UnicodeString with inserted pattern.
    469     * @stable ICU 4.0
    470     */
    471    UnicodeString& toPattern(UnicodeString& appendTo);
    472 
    473    /**
    474     * This method is not yet supported by <code>PluralFormat</code>.
    475     * <P>
    476     * Before calling, set parse_pos.index to the offset you want to start
    477     * parsing at in the source. After calling, parse_pos.index is the end of
    478     * the text you parsed. If error occurs, index is unchanged.
    479     * <P>
    480     * When parsing, leading whitespace is discarded (with a successful parse),
    481     * while trailing whitespace is left as is.
    482     * <P>
    483     * See Format::parseObject() for more.
    484     *
    485     * @param source    The string to be parsed into an object.
    486     * @param result    Formattable to be set to the parse result.
    487     *                  If parse fails, return contents are undefined.
    488     * @param parse_pos The position to start parsing at. Upon return
    489     *                  this param is set to the position after the
    490     *                  last character successfully parsed. If the
    491     *                  source is not parsed successfully, this param
    492     *                  will remain unchanged.
    493     * @stable ICU 4.0
    494     */
    495    virtual void parseObject(const UnicodeString& source,
    496                             Formattable& result,
    497                             ParsePosition& parse_pos) const;
    498 
    499     /**
    500      * ICU "poor man's RTTI", returns a UClassID for this class.
    501      *
    502      * @stable ICU 4.0
    503      *
    504      */
    505     static UClassID U_EXPORT2 getStaticClassID(void);
    506 
    507     /**
    508      * ICU "poor man's RTTI", returns a UClassID for the actual class.
    509      *
    510      * @stable ICU 4.0
    511      */
    512      virtual UClassID getDynamicClassID() const;
    513 
    514 private:
    515     typedef enum fmtToken {
    516         none,
    517         tLetter,
    518         tNumber,
    519         tSpace,
    520         tNumberSign,
    521         tLeftBrace,
    522         tRightBrace
    523     }fmtToken;
    524 
    525     Locale  locale;
    526     PluralRules* pluralRules;
    527     UnicodeString pattern;
    528     Hashtable  *fParsedValuesHash;
    529     NumberFormat*  numberFormat;
    530     NumberFormat*  replacedNumberFormat;
    531 
    532     PluralFormat();   // default constructor not implemented
    533     void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status);
    534     UBool inRange(UChar ch, fmtToken& type);
    535     UBool checkSufficientDefinition();
    536     void parsingFailure();
    537     UnicodeString insertFormattedNumber(double number,
    538                                         UnicodeString& message,
    539                                         UnicodeString& appendTo,
    540                                         FieldPosition& pos) const;
    541     void copyHashtable(Hashtable *other, UErrorCode& status);
    542 };
    543 
    544 U_NAMESPACE_END
    545 
    546 #endif /* #if !UCONFIG_NO_FORMATTING */
    547 
    548 #endif // _PLURFMT
    549 //eof
    550