Home | History | Annotate | Download | only in unicode
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 1997-2011, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  * Copyright (C) 2010 , Yahoo! Inc.
      8  ********************************************************************
      9  *
     10  * File SELFMT.H
     11  *
     12  * Modification History:
     13  *
     14  *   Date        Name        Description
     15  *   11/11/09    kirtig      Finished first cut of implementation.
     16  ********************************************************************/
     17 
     18 #ifndef SELFMT
     19 #define SELFMT
     20 
     21 #include "unicode/messagepattern.h"
     22 #include "unicode/numfmt.h"
     23 #include "unicode/utypes.h"
     24 
     25 /**
     26  * \file
     27  * \brief C++ API: SelectFormat object
     28  */
     29 
     30 #if !UCONFIG_NO_FORMATTING
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 class MessageFormat;
     35 
     36 /**
     37   * <p><code>SelectFormat</code> supports the creation of  internationalized
     38   * messages by selecting phrases based on keywords. The pattern  specifies
     39   * how to map keywords to phrases and provides a default phrase. The
     40   * object provided to the format method is a string that's matched
     41   * against the keywords. If there is a match, the corresponding phrase
     42   * is selected; otherwise, the default phrase is used.</p>
     43   *
     44   * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
     45   *
     46   * <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
     47   * with a <code>select</code> argument type,
     48   * rather than using a stand-alone <code>SelectFormat</code>.</p>
     49   *
     50   * <p>The main use case for the select format is gender based  inflection.
     51   * When names or nouns are inserted into sentences, their gender can  affect pronouns,
     52   * verb forms, articles, and adjectives. Special care needs to be
     53   * taken for the case where the gender cannot be determined.
     54   * The impact varies between languages:</p>
     55   * \htmlonly
     56   * <ul>
     57   * <li>English has three genders, and unknown gender is handled as a  special
     58   * case. Names use the gender of the named person (if known), nouns  referring
     59   * to people use natural gender, and inanimate objects are usually  neutral.
     60   * The gender only affects pronouns: "he", "she", "it", "they".
     61   *
     62   * <li>German differs from English in that the gender of nouns is  rather
     63   * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is  neutral).
     64   * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
     65   * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#x00E4;dchen").
     66   *
     67   * <li>French has only two genders; as in German the gender of nouns
     68   * is rather arbitrary - for sun and moon, the genders
     69   * are the opposite of those in German. The gender affects
     70   * pronouns ("il", "elle"), articles ("le", "la"),
     71   * adjective forms ("bon", "bonne"), and sometimes
     72   * verb forms ("all&#x00E9;", "all&#x00E9;e").
     73   *
     74   * <li>Polish distinguishes five genders (or noun classes),
     75   * human masculine, animate non-human masculine, inanimate masculine,
     76   * feminine, and neuter.
     77   * </ul>
     78   * \endhtmlonly
     79   * <p>Some other languages have noun classes that are not related to  gender,
     80   * but similar in grammatical use.
     81   * Some African languages have around 20 noun classes.</p>
     82   *
     83   * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
     84   * we usually need to distinguish only between female, male and other/unknown.</p>
     85   *
     86   * <p>To enable localizers to create sentence patterns that take their
     87   * language's gender dependencies into consideration, software has to  provide
     88   * information about the gender associated with a noun or name to
     89   * <code>MessageFormat</code>.
     90   * Two main cases can be distinguished:</p>
     91   *
     92   * <ul>
     93   * <li>For people, natural gender information should be maintained  for each person.
     94   * Keywords like "male", "female", "mixed" (for groups of people)
     95   * and "unknown" could be used.
     96   *
     97   * <li>For nouns, grammatical gender information should be maintained  for
     98   * each noun and per language, e.g., in resource bundles.
     99   * The keywords "masculine", "feminine", and "neuter" are commonly  used,
    100   * but some languages may require other keywords.
    101   * </ul>
    102   *
    103   * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
    104   * parameter separate from the name or noun it's associated with. For  example,
    105   * to generate a message such as "Jean went to Paris", three separate  arguments
    106   * would be provided: The name of the person as argument 0, the  gender of
    107   * the person as argument 1, and the name of the city as argument 2.
    108   * The sentence pattern for English, where the gender of the person has
    109   * no impact on this simple sentence, would not refer to argument 1  at all:</p>
    110   *
    111   * <pre>{0} went to {2}.</pre>
    112   *
    113   * <p><b>Note:</b> The entire sentence should be included (and partially repeated)
    114   * inside each phrase. Otherwise translators would have to be trained on how to
    115   * move bits of the sentence in and out of the select argument of a message.
    116   * (The examples below do not follow this recommendation!)</p>
    117   *
    118   * <p>The sentence pattern for French, where the gender of the person affects
    119   * the form of the participle, uses a select format based on argument 1:</p>
    120   *
    121   * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
    122   *
    123   * <p>Patterns can be nested, so that it's possible to handle  interactions of
    124   * number and gender where necessary. For example, if the above  sentence should
    125   * allow for the names of several people to be inserted, the  following sentence
    126   * pattern can be used (with argument 0 the list of people's names,
    127   * argument 1 the number of people, argument 2 their combined gender, and
    128   * argument 3 the city name):</p>
    129   *
    130   * \htmlonly
    131   * <pre>{0} {1, plural,
    132   *                 one {est {2, select, female {all&#x00E9;e} other  {all&#x00E9;}}}
    133   *                 other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
    134   *          }&#x00E0; {3}.</pre>
    135   * \endhtmlonly
    136   *
    137   * <h4>Patterns and Their Interpretation</h4>
    138   *
    139   * <p>The <code>SelectFormat</code> pattern string defines the phrase output
    140   * for each user-defined keyword.
    141   * The pattern is a sequence of (keyword, message) pairs.
    142   * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
    143   *
    144   * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
    145   *
    146   * <p>You always have to define a phrase for the default keyword
    147   * <code>other</code>; this phrase is returned when the keyword
    148   * provided to
    149   * the <code>format</code> method matches no other keyword.
    150   * If a pattern does not provide a phrase for <code>other</code>, the  method
    151   * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
    152   * <br>
    153   * Pattern_White_Space between keywords and messages is ignored.
    154   * Pattern_White_Space within a message is preserved and output.</p>
    155   *
    156   * <p><pre>Example:
    157   * \htmlonly
    158   *
    159   * UErrorCode status = U_ZERO_ERROR;
    160   * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"),  status);
    161   * if (U_FAILURE(status)) {
    162   *       return;
    163   * }
    164   * FieldPosition ignore(FieldPosition::DONT_CARE);
    165   * UnicodeString result;
    166   *
    167   * char* str1= "Kirti,female";
    168   * Formattable args1[] = {"Kirti","female"};
    169   * msgFmt->format(args1, 2, result, ignore, status);
    170   * cout << "Input is " << str1 << " and result is: " << result << endl;
    171   * delete msgFmt;
    172   *
    173   * \endhtmlonly
    174   * </pre>
    175   * </p>
    176   *
    177   * Produces the output:<br>
    178   * \htmlonly
    179   * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
    180   * \endhtmlonly
    181   *
    182   * @stable ICU 4.4
    183   */
    184 
    185 class U_I18N_API SelectFormat : public Format {
    186 public:
    187 
    188     /**
    189      * Creates a new <code>SelectFormat</code> for a given pattern string.
    190      * @param  pattern the pattern for this <code>SelectFormat</code>.
    191      *                 errors are returned to status if the pattern is invalid.
    192      * @param status   output param set to success/failure code on exit, which
    193      *                 must not indicate a failure before the function call.
    194      * @stable ICU 4.4
    195      */
    196     SelectFormat(const UnicodeString& pattern, UErrorCode& status);
    197 
    198     /**
    199      * copy constructor.
    200      * @stable ICU 4.4
    201      */
    202     SelectFormat(const SelectFormat& other);
    203 
    204     /**
    205      * Destructor.
    206      * @stable ICU 4.4
    207      */
    208     virtual ~SelectFormat();
    209 
    210     /**
    211      * Sets the pattern used by this select format.
    212      * for the keyword rules.
    213      * Patterns and their interpretation are specified in the class description.
    214      *
    215      * @param pattern the pattern for this select format
    216      *                errors are returned to status if the pattern is invalid.
    217      * @param status  output param set to success/failure code on exit, which
    218      *                must not indicate a failure before the function call.
    219      * @stable ICU 4.4
    220      */
    221     void applyPattern(const UnicodeString& pattern, UErrorCode& status);
    222 
    223 
    224     using Format::format;
    225 
    226     /**
    227      * Selects the phrase for  the given keyword
    228      *
    229      * @param keyword  The keyword that is used to select an alternative.
    230      * @param appendTo output parameter to receive result.
    231      *                 result is appended to existing contents.
    232      * @param pos      On input: an alignment field, if desired.
    233      *                 On output: the offsets of the alignment field.
    234      * @param status  output param set to success/failure code on exit, which
    235      *                 must not indicate a failure before the function call.
    236      * @return         Reference to 'appendTo' parameter.
    237      * @stable ICU 4.4
    238      */
    239     UnicodeString& format(const UnicodeString& keyword,
    240                             UnicodeString& appendTo,
    241                             FieldPosition& pos,
    242                             UErrorCode& status) const;
    243 
    244     /**
    245      * Assignment operator
    246      *
    247      * @param other    the SelectFormat object to copy from.
    248      * @stable ICU 4.4
    249      */
    250     SelectFormat& operator=(const SelectFormat& other);
    251 
    252     /**
    253      * Return true if another object is semantically equal to this one.
    254      *
    255      * @param other    the SelectFormat object to be compared with.
    256      * @return         true if other is semantically equal to this.
    257      * @stable ICU 4.4
    258      */
    259     virtual UBool operator==(const Format& other) const;
    260 
    261     /**
    262      * Return true if another object is semantically unequal to this one.
    263      *
    264      * @param other    the SelectFormat object to be compared with.
    265      * @return         true if other is semantically unequal to this.
    266      * @stable ICU 4.4
    267      */
    268     virtual UBool operator!=(const Format& other) const;
    269 
    270     /**
    271      * Clones this Format object polymorphically.  The caller owns the
    272      * result and should delete it when done.
    273      * @stable ICU 4.4
    274      */
    275     virtual Format* clone(void) const;
    276 
    277     /**
    278      * Format an object to produce a string.
    279      * This method handles keyword strings.
    280      * If the Formattable object is not a <code>UnicodeString</code>,
    281      * then it returns a failing UErrorCode.
    282      *
    283      * @param obj       A keyword string that is used to select an alternative.
    284      * @param appendTo  output parameter to receive result.
    285      *                  Result is appended to existing contents.
    286      * @param pos       On input: an alignment field, if desired.
    287      *                  On output: the offsets of the alignment field.
    288      * @param status    output param filled with success/failure status.
    289      * @return          Reference to 'appendTo' parameter.
    290      * @stable ICU 4.4
    291      */
    292     UnicodeString& format(const Formattable& obj,
    293                          UnicodeString& appendTo,
    294                          FieldPosition& pos,
    295                          UErrorCode& status) const;
    296 
    297     /**
    298      * Returns the pattern from applyPattern() or constructor.
    299      *
    300      * @param  appendTo  output parameter to receive result.
    301      *                  Result is appended to existing contents.
    302      * @return the UnicodeString with inserted pattern.
    303      * @stable ICU 4.4
    304      */
    305     UnicodeString& toPattern(UnicodeString& appendTo);
    306 
    307     /**
    308      * This method is not yet supported by <code>SelectFormat</code>.
    309      * <P>
    310      * Before calling, set parse_pos.index to the offset you want to start
    311      * parsing at in the source. After calling, parse_pos.index is the end of
    312      * the text you parsed. If error occurs, index is unchanged.
    313      * <P>
    314      * When parsing, leading whitespace is discarded (with a successful parse),
    315      * while trailing whitespace is left as is.
    316      * <P>
    317      * See Format::parseObject() for more.
    318      *
    319      * @param source     The string to be parsed into an object.
    320      * @param result     Formattable to be set to the parse result.
    321      *     If parse fails, return contents are undefined.
    322      * @param parse_pos The position to start parsing at. Upon return
    323      *     this param is set to the position after the
    324      *     last character successfully parsed. If the
    325      *     source is not parsed successfully, this param
    326      *     will remain unchanged.
    327      * @stable ICU 4.4
    328      */
    329     virtual void parseObject(const UnicodeString& source,
    330                             Formattable& result,
    331                             ParsePosition& parse_pos) const;
    332 
    333     /**
    334      * ICU "poor man's RTTI", returns a UClassID for this class.
    335      * @stable ICU 4.4
    336      */
    337     static UClassID U_EXPORT2 getStaticClassID(void);
    338 
    339     /**
    340      * ICU "poor man's RTTI", returns a UClassID for the actual class.
    341      * @stable ICU 4.4
    342      */
    343     virtual UClassID getDynamicClassID() const;
    344 
    345 private:
    346     friend class MessageFormat;
    347 
    348     SelectFormat();   // default constructor not implemented.
    349 
    350     /**
    351      * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
    352      * @param pattern A MessagePattern.
    353      * @param partIndex the index of the first SelectFormat argument style part.
    354      * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
    355      * @param ec Error code.
    356      * @return the sub-message start part index.
    357      */
    358     static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    359                                   const UnicodeString& keyword, UErrorCode& ec);
    360 
    361     MessagePattern msgPattern;
    362 };
    363 
    364 U_NAMESPACE_END
    365 
    366 #endif /* #if !UCONFIG_NO_FORMATTING */
    367 
    368 #endif // _SELFMT
    369 //eof
    370