1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 8 * File PLURFMT.H 9 * 10 * Modification History:* 11 * Date Name Description 12 * 13 ******************************************************************************** 14 */ 15 16 #ifndef PLURFMT 17 #define PLURFMT 18 19 #include "unicode/utypes.h" 20 21 /** 22 * \file 23 * \brief C++ API: PluralFormat object 24 */ 25 26 #if !UCONFIG_NO_FORMATTING 27 28 #include "unicode/numfmt.h" 29 #include "unicode/plurrule.h" 30 31 U_NAMESPACE_BEGIN 32 33 class Hashtable; 34 35 /** 36 * <p> 37 * <code>PluralFormat</code> supports the creation of internationalized 38 * messages with plural inflection. It is based on <i>plural 39 * selection</i>, i.e. the caller specifies messages for each 40 * plural case that can appear in the users language and the 41 * <code>PluralFormat</code> selects the appropriate message based on 42 * the number. 43 * </p> 44 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> 45 * <p> 46 * Different languages have different ways to inflect 47 * plurals. Creating internationalized messages that include plural 48 * forms is only feasible when the framework is able to handle plural 49 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> 50 * doesn't handle this well, because it attaches a number interval to 51 * each message and selects the message whose interval contains a 52 * given number. This can only handle a finite number of 53 * intervals. But in some languages, like Polish, one plural case 54 * applies to infinitely many intervals (e.g., paucal applies to 55 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or 56 * 14). Thus <code>ChoiceFormat</code> is not adequate. 57 * </p><p> 58 * <code>PluralFormat</code> deals with this by breaking the problem 59 * into two parts: 60 * <ul> 61 * <li>It uses <code>PluralRules</code> that can define more complex 62 * conditions for a plural case than just a single interval. These plural 63 * rules define both what plural cases exist in a language, and to 64 * which numbers these cases apply. 65 * <li>It provides predefined plural rules for many locales. Thus, the programmer 66 * need not worry about the plural cases of a language. On the flip side, 67 * the localizer does not have to specify the plural cases; he can simply 68 * use the predefined keywords. The whole plural formatting of messages can 69 * be done using localized patterns from resource bundles. For predefined plural 70 * rules, see CLDR <i>Language Plural Rules</i> page at 71 * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html 72 * </ul> 73 * </p> 74 * <h4>Usage of <code>PluralFormat</code></h4> 75 * <p> 76 * This discussion assumes that you use <code>PluralFormat</code> with 77 * a predefined set of plural rules. You can create one using one of 78 * the constructors that takes a <code>locale</code> object. To 79 * specify the message pattern, you can either pass it to the 80 * constructor or set it explicitly using the 81 * <code>applyPattern()</code> method. The <code>format()</code> 82 * method takes a number object and selects the message of the 83 * matching plural case. This message will be returned. 84 * </p> 85 * <h5>Patterns and Their Interpretation</h5> 86 * <p> 87 * The pattern text defines the message output for each plural case of the 88 * used locale. The pattern is a sequence of 89 * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white 90 * space characters. Each clause assigns the message <code><i>message</i></code> 91 * to the plural case identified by <code><i>caseKeyword</i></code>. 92 * </p><p> 93 * There are 6 predefined casekeyword in ICU - 'zero', 'one', 'two', 'few', 'many' and 94 * 'other'. You always have to define a message text for the default plural case 95 * "<code>other</code>" which is contained in every rule set. If the plural 96 * rules of the <code>PluralFormat</code> object do not contain a plural case 97 * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING 98 * will be set to status. 99 * If you do not specify a message text for a particular plural case, the 100 * message text of the plural case "<code>other</code>" gets assigned to this 101 * plural case. If you specify more than one message for the same plural case, 102 * U_DUPLICATE_KEYWORD will be set to status. 103 * <br> 104 * Spaces between <code><i>caseKeyword</i></code> and 105 * <code><i>message</i></code> will be ignored; spaces within 106 * <code><i>message</i></code> will be preserved. 107 * </p><p> 108 * The message text for a particular plural case may contain other message 109 * format patterns. <code>PluralFormat</code> preserves these so that you 110 * can use the strings produced by <code>PluralFormat</code> with other 111 * formatters. If you are using <code>PluralFormat</code> inside a 112 * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will 113 * automatically evaluate the resulting format pattern.<br> 114 * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed 115 * in message texts to define a nested format pattern.<br> 116 * The pound sign (<code>#</code>) will be interpreted as the number placeholder 117 * in the message text, if it is not contained in curly braces (to preserve 118 * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will 119 * replace each of those pound signs by the number passed to the 120 * <code>format()</code> method. It will be formatted using a 121 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you 122 * need special number formatting, you have to explicitly specify a 123 * <code>NumberFormat</code> for the <code>PluralFormat</code> to use. 124 * </p> 125 * Example 126 * <pre> 127 * \code 128 * UErrorCode status = U_ZERO_ERROR; 129 * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural, 130 * one{{0, number, C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste."), 131 * Locale("fr"), status); 132 * if (U_FAILURE(status)) { 133 * return; 134 * } 135 * Formattable args1[] = {(int32_t)0}; 136 * Formattable args2[] = {(int32_t)3}; 137 * FieldPosition ignore(FieldPosition::DONT_CARE); 138 * UnicodeString result; 139 * msgFmt->format(args1, 1, result, ignore, status); 140 * cout << result << endl; 141 * result.remove(); 142 * msgFmt->format(args2, 1, result, ignore, status); 143 * cout << result << endl; 144 * \endcode 145 * </pre> 146 * Produces the output:<br> 147 * <code>C'est 0,0 fichier dans la liste.</code><br> 148 * <code>Ce sont 3 fichiers dans la liste.</code> 149 * <p> 150 * <strong>Note:</strong><br> 151 * Currently <code>PluralFormat</code> 152 * does not make use of quotes like <code>MessageFormat</code>. 153 * If you use plural format strings with <code>MessageFormat</code> and want 154 * to use a quote sign <code>'</code>, you have to write <code>''</code>. 155 * <code>MessageFormat</code> unquotes this pattern and passes the unquoted 156 * pattern to <code>PluralFormat</code>. It's a bit trickier if you use 157 * nested formats that do quoting. In the example above, we wanted to insert 158 * <code>'</code> in the number format pattern. Since 159 * <code>NumberFormat</code> supports quotes, we had to insert 160 * <code>''</code>. But since <code>MessageFormat</code> unquotes the 161 * pattern before it gets passed to <code>PluralFormat</code>, we have to 162 * double these quotes, i.e. write <code>''''</code>. 163 * </p> 164 * <h4>Defining Custom Plural Rules</h4> 165 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can 166 * create a <code>PluralRules</code> object and pass it to 167 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this 168 * constructor, this locale will be used to format the number in the message 169 * texts. 170 * </p><p> 171 * For more information about <code>PluralRules</code>, see 172 * {@link PluralRules}. 173 * </p> 174 * 175 * ported from Java 176 * @stable ICU 4.0 177 */ 178 179 class U_I18N_API PluralFormat : public Format { 180 public: 181 182 /** 183 * Creates a new <code>PluralFormat</code> for the default locale. 184 * This locale will be used to get the set of plural rules and for standard 185 * number formatting. 186 * @param status output param set to success/failure code on exit, which 187 * must not indicate a failure before the function call. 188 * @stable ICU 4.0 189 */ 190 PluralFormat(UErrorCode& status); 191 192 /** 193 * Creates a new <code>PluralFormat</code> for a given locale. 194 * @param locale the <code>PluralFormat</code> will be configured with 195 * rules for this locale. This locale will also be used for 196 * standard number formatting. 197 * @param status output param set to success/failure code on exit, which 198 * must not indicate a failure before the function call. 199 * @stable ICU 4.0 200 */ 201 PluralFormat(const Locale& locale, UErrorCode& status); 202 203 /** 204 * Creates a new <code>PluralFormat</code> for a given set of rules. 205 * The standard number formatting will be done using the default locale. 206 * @param rules defines the behavior of the <code>PluralFormat</code> 207 * object. 208 * @param status output param set to success/failure code on exit, which 209 * must not indicate a failure before the function call. 210 * @stable ICU 4.0 211 */ 212 PluralFormat(const PluralRules& rules, UErrorCode& status); 213 214 /** 215 * Creates a new <code>PluralFormat</code> for a given set of rules. 216 * The standard number formatting will be done using the given locale. 217 * @param locale the default number formatting will be done using this 218 * locale. 219 * @param rules defines the behavior of the <code>PluralFormat</code> 220 * object. 221 * @param status output param set to success/failure code on exit, which 222 * must not indicate a failure before the function call. 223 * @stable ICU 4.0 224 */ 225 PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status); 226 227 /** 228 * Creates a new <code>PluralFormat</code> for a given pattern string. 229 * The default locale will be used to get the set of plural rules and for 230 * standard number formatting. 231 * @param pattern the pattern for this <code>PluralFormat</code>. 232 * errors are returned to status if the pattern is invalid. 233 * @param status output param set to success/failure code on exit, which 234 * must not indicate a failure before the function call. 235 * @stable ICU 4.0 236 */ 237 PluralFormat(const UnicodeString& pattern, UErrorCode& status); 238 239 /** 240 * Creates a new <code>PluralFormat</code> for a given pattern string and 241 * locale. 242 * The locale will be used to get the set of plural rules and for 243 * standard number formatting. 244 * @param locale the <code>PluralFormat</code> will be configured with 245 * rules for this locale. This locale will also be used for 246 * standard number formatting. 247 * @param pattern the pattern for this <code>PluralFormat</code>. 248 * errors are returned to status if the pattern is invalid. 249 * @param status output param set to success/failure code on exit, which 250 * must not indicate a failure before the function call. 251 * @stable ICU 4.0 252 */ 253 PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status); 254 255 /** 256 * Creates a new <code>PluralFormat</code> for a given set of rules, a 257 * pattern and a locale. 258 * @param rules defines the behavior of the <code>PluralFormat</code> 259 * object. 260 * @param pattern the pattern for this <code>PluralFormat</code>. 261 * errors are returned to status if the pattern is invalid. 262 * @param status output param set to success/failure code on exit, which 263 * must not indicate a failure before the function call. 264 * @stable ICU 4.0 265 */ 266 PluralFormat(const PluralRules& rules, 267 const UnicodeString& pattern, 268 UErrorCode& status); 269 270 /** 271 * Creates a new <code>PluralFormat</code> for a given set of rules, a 272 * pattern and a locale. 273 * @param locale the <code>PluralFormat</code> will be configured with 274 * rules for this locale. This locale will also be used for 275 * standard number formatting. 276 * @param rules defines the behavior of the <code>PluralFormat</code> 277 * object. 278 * @param pattern the pattern for this <code>PluralFormat</code>. 279 * errors are returned to status if the pattern is invalid. 280 * @param status output param set to success/failure code on exit, which 281 * must not indicate a failure before the function call. 282 * @stable ICU 4.0 283 */ 284 PluralFormat(const Locale& locale, 285 const PluralRules& rules, 286 const UnicodeString& pattern, 287 UErrorCode& status); 288 289 /** 290 * copy constructor. 291 * @stable ICU 4.0 292 */ 293 PluralFormat(const PluralFormat& other); 294 295 /** 296 * Destructor. 297 * @stable ICU 4.0 298 */ 299 virtual ~PluralFormat(); 300 301 /** 302 * Sets the pattern used by this plural format. 303 * The method parses the pattern and creates a map of format strings 304 * for the plural rules. 305 * Patterns and their interpretation are specified in the class description. 306 * 307 * @param pattern the pattern for this plural format 308 * errors are returned to status if the pattern is invalid. 309 * @param status output param set to success/failure code on exit, which 310 * must not indicate a failure before the function call. 311 * @stable ICU 4.0 312 */ 313 void applyPattern(const UnicodeString& pattern, UErrorCode& status); 314 315 316 using Format::format; 317 318 /** 319 * Formats a plural message for a given number. 320 * 321 * @param number a number for which the plural message should be formatted 322 * for. If no pattern has been applied to this 323 * <code>PluralFormat</code> object yet, the formatted number 324 * will be returned. 325 * @param status output param set to success/failure code on exit, which 326 * must not indicate a failure before the function call. 327 * @return the string containing the formatted plural message. 328 * @stable ICU 4.0 329 */ 330 UnicodeString format(int32_t number, UErrorCode& status) const; 331 332 /** 333 * Formats a plural message for a given number. 334 * 335 * @param number a number for which the plural message should be formatted 336 * for. If no pattern has been applied to this 337 * PluralFormat object yet, the formatted number 338 * will be returned. 339 * @param status output param set to success or failure code on exit, which 340 * must not indicate a failure before the function call. 341 * @return the string containing the formatted plural message. 342 * @stable ICU 4.0 343 */ 344 UnicodeString format(double number, UErrorCode& status) const; 345 346 /** 347 * Formats a plural message for a given number. 348 * 349 * @param number a number for which the plural message should be formatted 350 * for. If no pattern has been applied to this 351 * <code>PluralFormat</code> object yet, the formatted number 352 * will be returned. 353 * @param appendTo output parameter to receive result. 354 * result is appended to existing contents. 355 * @param pos On input: an alignment field, if desired. 356 * On output: the offsets of the alignment field. 357 * @param status output param set to success/failure code on exit, which 358 * must not indicate a failure before the function call. 359 * @return the string containing the formatted plural message. 360 * @stable ICU 4.0 361 */ 362 UnicodeString& format(int32_t number, 363 UnicodeString& appendTo, 364 FieldPosition& pos, 365 UErrorCode& status) const; 366 367 /** 368 * Formats a plural message for a given number. 369 * 370 * @param number a number for which the plural message should be formatted 371 * for. If no pattern has been applied to this 372 * PluralFormat object yet, the formatted number 373 * will be returned. 374 * @param appendTo output parameter to receive result. 375 * result is appended to existing contents. 376 * @param pos On input: an alignment field, if desired. 377 * On output: the offsets of the alignment field. 378 * @param status output param set to success/failure code on exit, which 379 * must not indicate a failure before the function call. 380 * @return the string containing the formatted plural message. 381 * @stable ICU 4.0 382 */ 383 UnicodeString& format(double number, 384 UnicodeString& appendTo, 385 FieldPosition& pos, 386 UErrorCode& status) const; 387 388 /** 389 * Sets the locale used by this <code>PluraFormat</code> object. 390 * Note: Calling this method resets this <code>PluraFormat</code> object, 391 * i.e., a pattern that was applied previously will be removed, 392 * and the NumberFormat is set to the default number format for 393 * the locale. The resulting format behaves the same as one 394 * constructed from {@link #PluralFormat(const Locale& locale, UErrorCode& status)}. 395 * @param locale the <code>locale</code> to use to configure the formatter. 396 * @param status output param set to success/failure code on exit, which 397 * must not indicate a failure before the function call. 398 * @stable ICU 4.0 399 */ 400 void setLocale(const Locale& locale, UErrorCode& status); 401 402 /** 403 * Sets the number format used by this formatter. You only need to 404 * call this if you want a different number format than the default 405 * formatter for the locale. 406 * @param format the number format to use. 407 * @param status output param set to success/failure code on exit, which 408 * must not indicate a failure before the function call. 409 * @stable ICU 4.0 410 */ 411 void setNumberFormat(const NumberFormat* format, UErrorCode& status); 412 413 /** 414 * Assignment operator 415 * 416 * @param other the PluralFormat object to copy from. 417 * @stable ICU 4.0 418 */ 419 PluralFormat& operator=(const PluralFormat& other); 420 421 /** 422 * Return true if another object is semantically equal to this one. 423 * 424 * @param other the PluralFormat object to be compared with. 425 * @return true if other is semantically equal to this. 426 * @stable ICU 4.0 427 */ 428 virtual UBool operator==(const Format& other) const; 429 430 /** 431 * Return true if another object is semantically unequal to this one. 432 * 433 * @param other the PluralFormat object to be compared with. 434 * @return true if other is semantically unequal to this. 435 * @stable ICU 4.0 436 */ 437 virtual UBool operator!=(const Format& other) const; 438 439 /** 440 * Clones this Format object polymorphically. The caller owns the 441 * result and should delete it when done. 442 * @stable ICU 4.0 443 */ 444 virtual Format* clone(void) const; 445 446 /** 447 * Redeclared Format method. 448 * 449 * @param obj The object to be formatted into a string. 450 * @param appendTo output parameter to receive result. 451 * Result is appended to existing contents. 452 * @param pos On input: an alignment field, if desired. 453 * On output: the offsets of the alignment field. 454 * @param status output param filled with success/failure status. 455 * @return Reference to 'appendTo' parameter. 456 * @stable ICU 4.0 457 */ 458 UnicodeString& format(const Formattable& obj, 459 UnicodeString& appendTo, 460 FieldPosition& pos, 461 UErrorCode& status) const; 462 463 /** 464 * Returns the pattern from applyPattern() or constructor(). 465 * 466 * @param appendTo output parameter to receive result. 467 * Result is appended to existing contents. 468 * @return the UnicodeString with inserted pattern. 469 * @stable ICU 4.0 470 */ 471 UnicodeString& toPattern(UnicodeString& appendTo); 472 473 /** 474 * This method is not yet supported by <code>PluralFormat</code>. 475 * <P> 476 * Before calling, set parse_pos.index to the offset you want to start 477 * parsing at in the source. After calling, parse_pos.index is the end of 478 * the text you parsed. If error occurs, index is unchanged. 479 * <P> 480 * When parsing, leading whitespace is discarded (with a successful parse), 481 * while trailing whitespace is left as is. 482 * <P> 483 * See Format::parseObject() for more. 484 * 485 * @param source The string to be parsed into an object. 486 * @param result Formattable to be set to the parse result. 487 * If parse fails, return contents are undefined. 488 * @param parse_pos The position to start parsing at. Upon return 489 * this param is set to the position after the 490 * last character successfully parsed. If the 491 * source is not parsed successfully, this param 492 * will remain unchanged. 493 * @stable ICU 4.0 494 */ 495 virtual void parseObject(const UnicodeString& source, 496 Formattable& result, 497 ParsePosition& parse_pos) const; 498 499 /** 500 * ICU "poor man's RTTI", returns a UClassID for this class. 501 * 502 * @stable ICU 4.0 503 * 504 */ 505 static UClassID U_EXPORT2 getStaticClassID(void); 506 507 /** 508 * ICU "poor man's RTTI", returns a UClassID for the actual class. 509 * 510 * @stable ICU 4.0 511 */ 512 virtual UClassID getDynamicClassID() const; 513 514 private: 515 typedef enum fmtToken { 516 none, 517 tLetter, 518 tNumber, 519 tSpace, 520 tNumberSign, 521 tLeftBrace, 522 tRightBrace 523 }fmtToken; 524 525 Locale locale; 526 PluralRules* pluralRules; 527 UnicodeString pattern; 528 Hashtable *fParsedValuesHash; 529 NumberFormat* numberFormat; 530 NumberFormat* replacedNumberFormat; 531 532 PluralFormat(); // default constructor not implemented 533 void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status); 534 UBool inRange(UChar ch, fmtToken& type); 535 UBool checkSufficientDefinition(); 536 void parsingFailure(); 537 UnicodeString insertFormattedNumber(double number, 538 UnicodeString& message, 539 UnicodeString& appendTo, 540 FieldPosition& pos) const; 541 void copyHashtable(Hashtable *other, UErrorCode& status); 542 }; 543 544 U_NAMESPACE_END 545 546 #endif /* #if !UCONFIG_NO_FORMATTING */ 547 548 #endif // _PLURFMT 549 //eof 550