Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2009, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 
      8 * File PLURFMT.H
      9 *
     10 * Modification History:*
     11 *   Date        Name        Description
     12 *
     13 ********************************************************************************
     14 */
     15 
     16 #ifndef PLURFMT
     17 #define PLURFMT
     18 
     19 #include "unicode/utypes.h"
     20 
     21 /**
     22  * \file
     23  * \brief C++ API: PluralFormat object
     24  */
     25 
     26 #if !UCONFIG_NO_FORMATTING
     27 
     28 #include "unicode/numfmt.h"
     29 #include "unicode/plurrule.h"
     30 
     31 U_NAMESPACE_BEGIN
     32 
     33 class Hashtable;
     34 
     35 /**
     36  * <p>
     37  * <code>PluralFormat</code> supports the creation of internationalized
     38  * messages with plural inflection. It is based on <i>plural
     39  * selection</i>, i.e. the caller specifies messages for each
     40  * plural case that can appear in the users language and the
     41  * <code>PluralFormat</code> selects the appropriate message based on
     42  * the number.
     43  * </p>
     44  * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
     45  * <p>
     46  * Different languages have different ways to inflect
     47  * plurals. Creating internationalized messages that include plural
     48  * forms is only feasible when the framework is able to handle plural
     49  * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
     50  * doesn't handle this well, because it attaches a number interval to
     51  * each message and selects the message whose interval contains a
     52  * given number. This can only handle a finite number of
     53  * intervals. But in some languages, like Polish, one plural case
     54  * applies to infinitely many intervals (e.g., paucal applies to
     55  * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
     56  * 14). Thus <code>ChoiceFormat</code> is not adequate.
     57  * </p><p>
     58  * <code>PluralFormat</code> deals with this by breaking the problem
     59  * into two parts:
     60  * <ul>
     61  * <li>It uses <code>PluralRules</code> that can define more complex
     62  *     conditions for a plural case than just a single interval. These plural
     63  *     rules define both what plural cases exist in a language, and to
     64  *     which numbers these cases apply.
     65  * <li>It provides predefined plural rules for many locales. Thus, the programmer
     66  *     need not worry about the plural cases of a language. On the flip side,
     67  *     the localizer does not have to specify the plural cases; he can simply
     68  *     use the predefined keywords. The whole plural formatting of messages can
     69  *     be done using localized patterns from resource bundles.
     70  * </ul>
     71  * </p>
     72  * <h4>Usage of <code>PluralFormat</code></h4>
     73  * <p>
     74  * This discussion assumes that you use <code>PluralFormat</code> with
     75  * a predefined set of plural rules. You can create one using one of
     76  * the constructors that takes a <code>locale</code> object. To
     77  * specify the message pattern, you can either pass it to the
     78  * constructor or set it explicitly using the
     79  * <code>applyPattern()</code> method. The <code>format()</code>
     80  * method takes a number object and selects the message of the
     81  * matching plural case. This message will be returned.
     82  * </p>
     83  * <h5>Patterns and Their Interpretation</h5>
     84  * <p>
     85  * The pattern text defines the message output for each plural case of the
     86  * used locale. The pattern is a sequence of
     87  * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
     88  * space characters. Each clause assigns the message <code><i>message</i></code>
     89  * to the plural case identified by <code><i>caseKeyword</i></code>.
     90  * </p><p>
     91  * You always have to define a message text for the default plural case
     92  * "<code>other</code>" which is contained in every rule set. If the plural
     93  * rules of the <code>PluralFormat</code> object do not contain a plural case
     94  * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING
     95  * will be set to status.
     96  * If you do not specify a message text for a particular plural case, the
     97  * message text of the plural case "<code>other</code>" gets assigned to this
     98  * plural case. If you specify more than one message for the same plural case,
     99  * U_DUPLICATE_KEYWORD will be set to status.
    100  * <br/>
    101  * Spaces between <code><i>caseKeyword</i></code> and
    102  * <code><i>message</i></code>  will be ignored; spaces within
    103  * <code><i>message</i></code> will be preserved.
    104  * </p><p>
    105  * The message text for a particular plural case may contain other message
    106  * format patterns. <code>PluralFormat</code> preserves these so that you
    107  * can use the strings produced by <code>PluralFormat</code> with other
    108  * formatters. If you are using <code>PluralFormat</code> inside a
    109  * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
    110  * automatically evaluate the resulting format pattern.<br/>
    111  * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
    112  * in message texts to define a nested format pattern.<br/>
    113  * The pound sign (<code>#</code>) will be interpreted as the number placeholder
    114  * in the message text, if it is not contained in curly braces (to preserve
    115  * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
    116  * replace each of those pound signs by the number passed to the
    117  * <code>format()</code> method. It will be formatted using a
    118  * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
    119  * need special number formatting, you have to explicitly specify a
    120  * <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
    121  * </p>
    122  * Example
    123  * <pre>
    124  * UErrorCode status = U_ZERO_ERROR;
    125  * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural,
    126  *   one{{0, number, C''est #,##0.0#  fichier}} other {Ce sont # fichiers}} dans la liste."),
    127  *   Locale("fr"), status);
    128  * if (U_FAILURE(status)) {
    129  *     return;
    130  * }
    131  * Formattable args1[] = {(int32_t)0};
    132  * Formattable args2[] = {(int32_t)3};
    133  * FieldPosition ignore(FieldPosition::DONT_CARE);
    134  * UnicodeString result;
    135  * msgFmt->format(args1, 1, result, ignore, status);
    136  * cout << result << endl;
    137  * result.remove();
    138  * msgFmt->format(args2, 1, result, ignore, status);
    139  * cout << result << endl;
    140  * </pre>
    141  * Produces the output:<br/>
    142  * <code>C'est 0,0 fichier dans la liste.</code><br/>
    143  * <code>Ce sont 3 fichiers dans la liste."</code>
    144  * <p>
    145  * <strong>Note:</strong><br/>
    146  *   Currently <code>PluralFormat</code>
    147  *   does not make use of quotes like <code>MessageFormat</code>.
    148  *   If you use plural format strings with <code>MessageFormat</code> and want
    149  *   to use a quote sign "<code>'</code>", you have to write "<code>''</code>".
    150  *   <code>MessageFormat</code> unquotes this pattern and  passes the unquoted
    151  *   pattern to <code>PluralFormat</code>. It's a bit trickier if you use
    152  *   nested formats that do quoting. In the example above, we wanted to insert
    153  *   "<code>'</code>" in the number format pattern. Since
    154  *   <code>NumberFormat</code> supports quotes, we had to insert
    155  *   "<code>''</code>". But since <code>MessageFormat</code> unquotes the
    156  *   pattern before it gets passed to <code>PluralFormat</code>, we have to
    157  *   double these quotes, i.e. write "<code>''''</code>".
    158  * </p>
    159  * <h4>Defining Custom Plural Rules</h4>
    160  * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
    161  * create a <code>PluralRules</code> object and pass it to
    162  * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
    163  * constructor, this locale will be used to format the number in the message
    164  * texts.
    165  * </p><p>
    166  * For more information about <code>PluralRules</code>, see
    167  * {@link PluralRules}.
    168  * </p>
    169  *
    170  * ported from Java
    171  * @stable ICU 4.0
    172  */
    173 
    174 class U_I18N_API PluralFormat : public Format {
    175 public:
    176 
    177     /**
    178      * Creates a new <code>PluralFormat</code> for the default locale.
    179      * This locale will be used to get the set of plural rules and for standard
    180      * number formatting.
    181      * @param status  output param set to success/failure code on exit, which
    182      *                must not indicate a failure before the function call.
    183      * @stable ICU 4.0
    184      */
    185     PluralFormat(UErrorCode& status);
    186 
    187     /**
    188      * Creates a new <code>PluralFormat</code> for a given locale.
    189      * @param locale the <code>PluralFormat</code> will be configured with
    190      *               rules for this locale. This locale will also be used for
    191      *               standard number formatting.
    192      * @param status output param set to success/failure code on exit, which
    193      *               must not indicate a failure before the function call.
    194      * @stable ICU 4.0
    195      */
    196     PluralFormat(const Locale& locale, UErrorCode& status);
    197 
    198     /**
    199      * Creates a new <code>PluralFormat</code> for a given set of rules.
    200      * The standard number formatting will be done using the default locale.
    201      * @param rules   defines the behavior of the <code>PluralFormat</code>
    202      *                object.
    203      * @param status  output param set to success/failure code on exit, which
    204      *                must not indicate a failure before the function call.
    205      * @stable ICU 4.0
    206      */
    207     PluralFormat(const PluralRules& rules, UErrorCode& status);
    208 
    209     /**
    210      * Creates a new <code>PluralFormat</code> for a given set of rules.
    211      * The standard number formatting will be done using the given locale.
    212      * @param locale  the default number formatting will be done using this
    213      *                locale.
    214      * @param rules   defines the behavior of the <code>PluralFormat</code>
    215      *                object.
    216      * @param status  output param set to success/failure code on exit, which
    217      *                must not indicate a failure before the function call.
    218      * @stable ICU 4.0
    219      */
    220     PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status);
    221 
    222     /**
    223      * Creates a new <code>PluralFormat</code> for a given pattern string.
    224      * The default locale will be used to get the set of plural rules and for
    225      * standard number formatting.
    226      * @param  pattern the pattern for this <code>PluralFormat</code>.
    227      *                 errors are returned to status if the pattern is invalid.
    228      * @param status   output param set to success/failure code on exit, which
    229      *                 must not indicate a failure before the function call.
    230      * @stable ICU 4.0
    231      */
    232     PluralFormat(const UnicodeString& pattern, UErrorCode& status);
    233 
    234     /**
    235      * Creates a new <code>PluralFormat</code> for a given pattern string and
    236      * locale.
    237      * The locale will be used to get the set of plural rules and for
    238      * standard number formatting.
    239      * @param locale   the <code>PluralFormat</code> will be configured with
    240      *                 rules for this locale. This locale will also be used for
    241      *                 standard number formatting.
    242      * @param pattern  the pattern for this <code>PluralFormat</code>.
    243      *                 errors are returned to status if the pattern is invalid.
    244      * @param status   output param set to success/failure code on exit, which
    245      *                 must not indicate a failure before the function call.
    246      * @stable ICU 4.0
    247      */
    248     PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status);
    249 
    250     /**
    251      * Creates a new <code>PluralFormat</code> for a given set of rules, a
    252      * pattern and a locale.
    253      * @param rules    defines the behavior of the <code>PluralFormat</code>
    254      *                 object.
    255      * @param pattern  the pattern for this <code>PluralFormat</code>.
    256      *                 errors are returned to status if the pattern is invalid.
    257      * @param status   output param set to success/failure code on exit, which
    258      *                 must not indicate a failure before the function call.
    259      * @stable ICU 4.0
    260      */
    261     PluralFormat(const PluralRules& rules,
    262                  const UnicodeString& pattern,
    263                  UErrorCode& status);
    264 
    265     /**
    266      * Creates a new <code>PluralFormat</code> for a given set of rules, a
    267      * pattern and a locale.
    268      * @param locale  the <code>PluralFormat</code> will be configured with
    269      *                rules for this locale. This locale will also be used for
    270      *                standard number formatting.
    271      * @param rules   defines the behavior of the <code>PluralFormat</code>
    272      *                object.
    273      * @param pattern the pattern for this <code>PluralFormat</code>.
    274      *                errors are returned to status if the pattern is invalid.
    275      * @param status  output param set to success/failure code on exit, which
    276      *                must not indicate a failure before the function call.
    277      * @stable ICU 4.0
    278      */
    279     PluralFormat(const Locale& locale,
    280                  const PluralRules& rules,
    281                  const UnicodeString& pattern,
    282                  UErrorCode& status);
    283 
    284     /**
    285       * copy constructor.
    286       * @stable ICU 4.0
    287       */
    288     PluralFormat(const PluralFormat& other);
    289 
    290     /**
    291      * Destructor.
    292      * @stable ICU 4.0
    293      */
    294     virtual ~PluralFormat();
    295 
    296     /**
    297      * Sets the pattern used by this plural format.
    298      * The method parses the pattern and creates a map of format strings
    299      * for the plural rules.
    300      * Patterns and their interpretation are specified in the class description.
    301      *
    302      * @param pattern the pattern for this plural format
    303      *                errors are returned to status if the pattern is invalid.
    304      * @param status  output param set to success/failure code on exit, which
    305      *                must not indicate a failure before the function call.
    306      * @stable ICU 4.0
    307      */
    308     void applyPattern(const UnicodeString& pattern, UErrorCode& status);
    309 
    310     /**
    311      * Formats a plural message for a given number.
    312      *
    313      * @param number  a number for which the plural message should be formatted
    314      *                for. If no pattern has been applied to this
    315      *                <code>PluralFormat</code> object yet, the formatted number
    316      *                will be returned.
    317      * @param status  output param set to success/failure code on exit, which
    318      *                must not indicate a failure before the function call.
    319      * @return        the string containing the formatted plural message.
    320      * @stable ICU 4.0
    321      */
    322     UnicodeString format(int32_t number, UErrorCode& status) const;
    323 
    324     /**
    325      * Formats a plural message for a given number.
    326      *
    327      * @param number  a number for which the plural message should be formatted
    328      *                for. If no pattern has been applied to this
    329      *                PluralFormat object yet, the formatted number
    330      *                will be returned.
    331      * @param status  output param set to success or failure code on exit, which
    332      *                must not indicate a failure before the function call.
    333      * @return        the string containing the formatted plural message.
    334      * @stable ICU 4.0
    335      */
    336     UnicodeString format(double number, UErrorCode& status) const;
    337 
    338     /**
    339      * Formats a plural message for a given number.
    340      *
    341      * @param number   a number for which the plural message should be formatted
    342      *                 for. If no pattern has been applied to this
    343      *                 <code>PluralFormat</code> object yet, the formatted number
    344      *                 will be returned.
    345      * @param appendTo output parameter to receive result.
    346      *                 result is appended to existing contents.
    347      * @param pos      On input: an alignment field, if desired.
    348      *                 On output: the offsets of the alignment field.
    349      * @param status   output param set to success/failure code on exit, which
    350      *                 must not indicate a failure before the function call.
    351      * @return         the string containing the formatted plural message.
    352      * @stable ICU 4.0
    353      */
    354     UnicodeString& format(int32_t number,
    355                           UnicodeString& appendTo,
    356                           FieldPosition& pos,
    357                           UErrorCode& status) const;
    358 
    359     /**
    360      * Formats a plural message for a given number.
    361      *
    362      * @param number   a number for which the plural message should be formatted
    363      *                 for. If no pattern has been applied to this
    364      *                 <code>PluralFormat</code> object yet, the formatted number
    365      *                 will be returned.
    366      * @param appendTo output parameter to receive result.
    367      *                 result is appended to existing contents.
    368      * @param pos      On input: an alignment field, if desired.
    369      *                 On output: the offsets of the alignment field.
    370      * @param status   output param set to success/failure code on exit, which
    371      *                 must not indicate a failure before the function call.
    372      * @return         the string containing the formatted plural message.
    373      * @stable ICU 4.0
    374      */
    375     UnicodeString& format(double number,
    376                           UnicodeString& appendTo,
    377                           FieldPosition& pos,
    378                           UErrorCode& status) const;
    379 
    380     /**
    381      * Sets the locale used by this <code>PluraFormat</code> object.
    382      * Note: Calling this method resets this <code>PluraFormat</code> object,
    383      *     i.e., a pattern that was applied previously will be removed,
    384      *     and the NumberFormat is set to the default number format for
    385      *     the locale.  The resulting format behaves the same as one
    386      *     constructed from {@link #PluralFormat(const Locale& locale, UErrorCode& status)}.
    387      * @param locale  the <code>locale</code> to use to configure the formatter.
    388      * @param status  output param set to success/failure code on exit, which
    389      *                must not indicate a failure before the function call.
    390      * @stable ICU 4.0
    391      */
    392     void setLocale(const Locale& locale, UErrorCode& status);
    393 
    394     /**
    395       * Sets the number format used by this formatter.  You only need to
    396       * call this if you want a different number format than the default
    397       * formatter for the locale.
    398       * @param format  the number format to use.
    399       * @param status  output param set to success/failure code on exit, which
    400       *                must not indicate a failure before the function call.
    401       * @stable ICU 4.0
    402       */
    403     void setNumberFormat(const NumberFormat* format, UErrorCode& status);
    404 
    405     /**
    406        * Assignment operator
    407        *
    408        * @param other    the PluralFormat object to copy from.
    409        * @stable ICU 4.0
    410        */
    411     PluralFormat& operator=(const PluralFormat& other);
    412 
    413     /**
    414       * Return true if another object is semantically equal to this one.
    415       *
    416       * @param other    the PluralFormat object to be compared with.
    417       * @return         true if other is semantically equal to this.
    418       * @stable ICU 4.0
    419       */
    420     virtual UBool operator==(const Format& other) const;
    421 
    422     /**
    423      * Return true if another object is semantically unequal to this one.
    424      *
    425      * @param other    the PluralFormat object to be compared with.
    426      * @return         true if other is semantically unequal to this.
    427      * @stable ICU 4.0
    428      */
    429     virtual UBool operator!=(const Format& other) const;
    430 
    431     /**
    432      * Clones this Format object polymorphically.  The caller owns the
    433      * result and should delete it when done.
    434      * @stable ICU 4.0
    435      */
    436     virtual Format* clone(void) const;
    437 
    438     /**
    439     * Redeclared Format method.
    440     *
    441     * @param obj       The object to be formatted into a string.
    442     * @param appendTo  output parameter to receive result.
    443     *                  Result is appended to existing contents.
    444     * @param pos       On input: an alignment field, if desired.
    445     *                  On output: the offsets of the alignment field.
    446     * @param status    output param filled with success/failure status.
    447     * @return          Reference to 'appendTo' parameter.
    448     * @stable ICU 4.0
    449     */
    450    UnicodeString& format(const Formattable& obj,
    451                          UnicodeString& appendTo,
    452                          FieldPosition& pos,
    453                          UErrorCode& status) const;
    454 
    455    /**
    456     * Returns the pattern from applyPattern() or constructor().
    457     *
    458     * @param  appendTo  output parameter to receive result.
    459      *                  Result is appended to existing contents.
    460     * @return the UnicodeString with inserted pattern.
    461     * @stable ICU 4.0
    462     */
    463    UnicodeString& toPattern(UnicodeString& appendTo);
    464 
    465    /**
    466     * This method is not yet supported by <code>PluralFormat</code>.
    467     * <P>
    468     * Before calling, set parse_pos.index to the offset you want to start
    469     * parsing at in the source. After calling, parse_pos.index is the end of
    470     * the text you parsed. If error occurs, index is unchanged.
    471     * <P>
    472     * When parsing, leading whitespace is discarded (with a successful parse),
    473     * while trailing whitespace is left as is.
    474     * <P>
    475     * See Format::parseObject() for more.
    476     *
    477     * @param source    The string to be parsed into an object.
    478     * @param result    Formattable to be set to the parse result.
    479     *                  If parse fails, return contents are undefined.
    480     * @param parse_pos The position to start parsing at. Upon return
    481     *                  this param is set to the position after the
    482     *                  last character successfully parsed. If the
    483     *                  source is not parsed successfully, this param
    484     *                  will remain unchanged.
    485     * @stable ICU 4.0
    486     */
    487    virtual void parseObject(const UnicodeString& source,
    488                             Formattable& result,
    489                             ParsePosition& parse_pos) const;
    490 
    491     /**
    492      * ICU "poor man's RTTI", returns a UClassID for this class.
    493      *
    494      * @stable ICU 4.0
    495      *
    496      */
    497     static UClassID U_EXPORT2 getStaticClassID(void);
    498 
    499     /**
    500      * ICU "poor man's RTTI", returns a UClassID for the actual class.
    501      *
    502      * @stable ICU 4.0
    503      */
    504      virtual UClassID getDynamicClassID() const;
    505 
    506 private:
    507     typedef enum fmtToken {
    508         none,
    509         tLetter,
    510         tNumber,
    511         tSpace,
    512         tNumberSign,
    513         tLeftBrace,
    514         tRightBrace
    515     }fmtToken;
    516 
    517     Locale  locale;
    518     PluralRules* pluralRules;
    519     UnicodeString pattern;
    520     Hashtable  *fParsedValuesHash;
    521     NumberFormat*  numberFormat;
    522     NumberFormat*  replacedNumberFormat;
    523 
    524     PluralFormat();   // default constructor not implemented
    525     void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status);
    526     UBool inRange(UChar ch, fmtToken& type);
    527     UBool checkSufficientDefinition();
    528     void parsingFailure();
    529     UnicodeString insertFormattedNumber(double number,
    530                                         UnicodeString& message,
    531                                         UnicodeString& appendTo,
    532                                         FieldPosition& pos) const;
    533     void copyHashtable(Hashtable *other, UErrorCode& status);
    534 };
    535 
    536 U_NAMESPACE_END
    537 
    538 #endif /* #if !UCONFIG_NO_FORMATTING */
    539 
    540 #endif // _PLURFMT
    541 //eof
    542