1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 8 * File PLURFMT.H 9 * 10 * Modification History:* 11 * Date Name Description 12 * 13 ******************************************************************************** 14 */ 15 16 #ifndef PLURFMT 17 #define PLURFMT 18 19 #include "unicode/utypes.h" 20 21 /** 22 * \file 23 * \brief C++ API: PluralFormat object 24 */ 25 26 #if !UCONFIG_NO_FORMATTING 27 28 #include "unicode/numfmt.h" 29 #include "unicode/plurrule.h" 30 31 U_NAMESPACE_BEGIN 32 33 class Hashtable; 34 35 /** 36 * <p> 37 * <code>PluralFormat</code> supports the creation of internationalized 38 * messages with plural inflection. It is based on <i>plural 39 * selection</i>, i.e. the caller specifies messages for each 40 * plural case that can appear in the users language and the 41 * <code>PluralFormat</code> selects the appropriate message based on 42 * the number. 43 * </p> 44 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> 45 * <p> 46 * Different languages have different ways to inflect 47 * plurals. Creating internationalized messages that include plural 48 * forms is only feasible when the framework is able to handle plural 49 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> 50 * doesn't handle this well, because it attaches a number interval to 51 * each message and selects the message whose interval contains a 52 * given number. This can only handle a finite number of 53 * intervals. But in some languages, like Polish, one plural case 54 * applies to infinitely many intervals (e.g., paucal applies to 55 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or 56 * 14). Thus <code>ChoiceFormat</code> is not adequate. 57 * </p><p> 58 * <code>PluralFormat</code> deals with this by breaking the problem 59 * into two parts: 60 * <ul> 61 * <li>It uses <code>PluralRules</code> that can define more complex 62 * conditions for a plural case than just a single interval. These plural 63 * rules define both what plural cases exist in a language, and to 64 * which numbers these cases apply. 65 * <li>It provides predefined plural rules for many locales. Thus, the programmer 66 * need not worry about the plural cases of a language. On the flip side, 67 * the localizer does not have to specify the plural cases; he can simply 68 * use the predefined keywords. The whole plural formatting of messages can 69 * be done using localized patterns from resource bundles. 70 * </ul> 71 * </p> 72 * <h4>Usage of <code>PluralFormat</code></h4> 73 * <p> 74 * This discussion assumes that you use <code>PluralFormat</code> with 75 * a predefined set of plural rules. You can create one using one of 76 * the constructors that takes a <code>locale</code> object. To 77 * specify the message pattern, you can either pass it to the 78 * constructor or set it explicitly using the 79 * <code>applyPattern()</code> method. The <code>format()</code> 80 * method takes a number object and selects the message of the 81 * matching plural case. This message will be returned. 82 * </p> 83 * <h5>Patterns and Their Interpretation</h5> 84 * <p> 85 * The pattern text defines the message output for each plural case of the 86 * used locale. The pattern is a sequence of 87 * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white 88 * space characters. Each clause assigns the message <code><i>message</i></code> 89 * to the plural case identified by <code><i>caseKeyword</i></code>. 90 * </p><p> 91 * You always have to define a message text for the default plural case 92 * "<code>other</code>" which is contained in every rule set. If the plural 93 * rules of the <code>PluralFormat</code> object do not contain a plural case 94 * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING 95 * will be set to status. 96 * If you do not specify a message text for a particular plural case, the 97 * message text of the plural case "<code>other</code>" gets assigned to this 98 * plural case. If you specify more than one message for the same plural case, 99 * U_DUPLICATE_KEYWORD will be set to status. 100 * <br/> 101 * Spaces between <code><i>caseKeyword</i></code> and 102 * <code><i>message</i></code> will be ignored; spaces within 103 * <code><i>message</i></code> will be preserved. 104 * </p><p> 105 * The message text for a particular plural case may contain other message 106 * format patterns. <code>PluralFormat</code> preserves these so that you 107 * can use the strings produced by <code>PluralFormat</code> with other 108 * formatters. If you are using <code>PluralFormat</code> inside a 109 * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will 110 * automatically evaluate the resulting format pattern.<br/> 111 * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed 112 * in message texts to define a nested format pattern.<br/> 113 * The pound sign (<code>#</code>) will be interpreted as the number placeholder 114 * in the message text, if it is not contained in curly braces (to preserve 115 * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will 116 * replace each of those pound signs by the number passed to the 117 * <code>format()</code> method. It will be formatted using a 118 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you 119 * need special number formatting, you have to explicitly specify a 120 * <code>NumberFormat</code> for the <code>PluralFormat</code> to use. 121 * </p> 122 * Example 123 * <pre> 124 * UErrorCode status = U_ZERO_ERROR; 125 * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural, 126 * one{{0, number, C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste."), 127 * Locale("fr"), status); 128 * if (U_FAILURE(status)) { 129 * return; 130 * } 131 * Formattable args1[] = {(int32_t)0}; 132 * Formattable args2[] = {(int32_t)3}; 133 * FieldPosition ignore(FieldPosition::DONT_CARE); 134 * UnicodeString result; 135 * msgFmt->format(args1, 1, result, ignore, status); 136 * cout << result << endl; 137 * result.remove(); 138 * msgFmt->format(args2, 1, result, ignore, status); 139 * cout << result << endl; 140 * </pre> 141 * Produces the output:<br/> 142 * <code>C'est 0,0 fichier dans la liste.</code><br/> 143 * <code>Ce sont 3 fichiers dans la liste."</code> 144 * <p> 145 * <strong>Note:</strong><br/> 146 * Currently <code>PluralFormat</code> 147 * does not make use of quotes like <code>MessageFormat</code>. 148 * If you use plural format strings with <code>MessageFormat</code> and want 149 * to use a quote sign "<code>'</code>", you have to write "<code>''</code>". 150 * <code>MessageFormat</code> unquotes this pattern and passes the unquoted 151 * pattern to <code>PluralFormat</code>. It's a bit trickier if you use 152 * nested formats that do quoting. In the example above, we wanted to insert 153 * "<code>'</code>" in the number format pattern. Since 154 * <code>NumberFormat</code> supports quotes, we had to insert 155 * "<code>''</code>". But since <code>MessageFormat</code> unquotes the 156 * pattern before it gets passed to <code>PluralFormat</code>, we have to 157 * double these quotes, i.e. write "<code>''''</code>". 158 * </p> 159 * <h4>Defining Custom Plural Rules</h4> 160 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can 161 * create a <code>PluralRules</code> object and pass it to 162 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this 163 * constructor, this locale will be used to format the number in the message 164 * texts. 165 * </p><p> 166 * For more information about <code>PluralRules</code>, see 167 * {@link PluralRules}. 168 * </p> 169 * 170 * ported from Java 171 * @stable ICU 4.0 172 */ 173 174 class U_I18N_API PluralFormat : public Format { 175 public: 176 177 /** 178 * Creates a new <code>PluralFormat</code> for the default locale. 179 * This locale will be used to get the set of plural rules and for standard 180 * number formatting. 181 * @param status output param set to success/failure code on exit, which 182 * must not indicate a failure before the function call. 183 * @stable ICU 4.0 184 */ 185 PluralFormat(UErrorCode& status); 186 187 /** 188 * Creates a new <code>PluralFormat</code> for a given locale. 189 * @param locale the <code>PluralFormat</code> will be configured with 190 * rules for this locale. This locale will also be used for 191 * standard number formatting. 192 * @param status output param set to success/failure code on exit, which 193 * must not indicate a failure before the function call. 194 * @stable ICU 4.0 195 */ 196 PluralFormat(const Locale& locale, UErrorCode& status); 197 198 /** 199 * Creates a new <code>PluralFormat</code> for a given set of rules. 200 * The standard number formatting will be done using the default locale. 201 * @param rules defines the behavior of the <code>PluralFormat</code> 202 * object. 203 * @param status output param set to success/failure code on exit, which 204 * must not indicate a failure before the function call. 205 * @stable ICU 4.0 206 */ 207 PluralFormat(const PluralRules& rules, UErrorCode& status); 208 209 /** 210 * Creates a new <code>PluralFormat</code> for a given set of rules. 211 * The standard number formatting will be done using the given locale. 212 * @param locale the default number formatting will be done using this 213 * locale. 214 * @param rules defines the behavior of the <code>PluralFormat</code> 215 * object. 216 * @param status output param set to success/failure code on exit, which 217 * must not indicate a failure before the function call. 218 * @stable ICU 4.0 219 */ 220 PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status); 221 222 /** 223 * Creates a new <code>PluralFormat</code> for a given pattern string. 224 * The default locale will be used to get the set of plural rules and for 225 * standard number formatting. 226 * @param pattern the pattern for this <code>PluralFormat</code>. 227 * errors are returned to status if the pattern is invalid. 228 * @param status output param set to success/failure code on exit, which 229 * must not indicate a failure before the function call. 230 * @stable ICU 4.0 231 */ 232 PluralFormat(const UnicodeString& pattern, UErrorCode& status); 233 234 /** 235 * Creates a new <code>PluralFormat</code> for a given pattern string and 236 * locale. 237 * The locale will be used to get the set of plural rules and for 238 * standard number formatting. 239 * @param locale the <code>PluralFormat</code> will be configured with 240 * rules for this locale. This locale will also be used for 241 * standard number formatting. 242 * @param pattern the pattern for this <code>PluralFormat</code>. 243 * errors are returned to status if the pattern is invalid. 244 * @param status output param set to success/failure code on exit, which 245 * must not indicate a failure before the function call. 246 * @stable ICU 4.0 247 */ 248 PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status); 249 250 /** 251 * Creates a new <code>PluralFormat</code> for a given set of rules, a 252 * pattern and a locale. 253 * @param rules defines the behavior of the <code>PluralFormat</code> 254 * object. 255 * @param pattern the pattern for this <code>PluralFormat</code>. 256 * errors are returned to status if the pattern is invalid. 257 * @param status output param set to success/failure code on exit, which 258 * must not indicate a failure before the function call. 259 * @stable ICU 4.0 260 */ 261 PluralFormat(const PluralRules& rules, 262 const UnicodeString& pattern, 263 UErrorCode& status); 264 265 /** 266 * Creates a new <code>PluralFormat</code> for a given set of rules, a 267 * pattern and a locale. 268 * @param locale the <code>PluralFormat</code> will be configured with 269 * rules for this locale. This locale will also be used for 270 * standard number formatting. 271 * @param rules defines the behavior of the <code>PluralFormat</code> 272 * object. 273 * @param pattern the pattern for this <code>PluralFormat</code>. 274 * errors are returned to status if the pattern is invalid. 275 * @param status output param set to success/failure code on exit, which 276 * must not indicate a failure before the function call. 277 * @stable ICU 4.0 278 */ 279 PluralFormat(const Locale& locale, 280 const PluralRules& rules, 281 const UnicodeString& pattern, 282 UErrorCode& status); 283 284 /** 285 * copy constructor. 286 * @stable ICU 4.0 287 */ 288 PluralFormat(const PluralFormat& other); 289 290 /** 291 * Destructor. 292 * @stable ICU 4.0 293 */ 294 virtual ~PluralFormat(); 295 296 /** 297 * Sets the pattern used by this plural format. 298 * The method parses the pattern and creates a map of format strings 299 * for the plural rules. 300 * Patterns and their interpretation are specified in the class description. 301 * 302 * @param pattern the pattern for this plural format 303 * errors are returned to status if the pattern is invalid. 304 * @param status output param set to success/failure code on exit, which 305 * must not indicate a failure before the function call. 306 * @stable ICU 4.0 307 */ 308 void applyPattern(const UnicodeString& pattern, UErrorCode& status); 309 310 /** 311 * Formats a plural message for a given number. 312 * 313 * @param number a number for which the plural message should be formatted 314 * for. If no pattern has been applied to this 315 * <code>PluralFormat</code> object yet, the formatted number 316 * will be returned. 317 * @param status output param set to success/failure code on exit, which 318 * must not indicate a failure before the function call. 319 * @return the string containing the formatted plural message. 320 * @stable ICU 4.0 321 */ 322 UnicodeString format(int32_t number, UErrorCode& status) const; 323 324 /** 325 * Formats a plural message for a given number. 326 * 327 * @param number a number for which the plural message should be formatted 328 * for. If no pattern has been applied to this 329 * PluralFormat object yet, the formatted number 330 * will be returned. 331 * @param status output param set to success or failure code on exit, which 332 * must not indicate a failure before the function call. 333 * @return the string containing the formatted plural message. 334 * @stable ICU 4.0 335 */ 336 UnicodeString format(double number, UErrorCode& status) const; 337 338 /** 339 * Formats a plural message for a given number. 340 * 341 * @param number a number for which the plural message should be formatted 342 * for. If no pattern has been applied to this 343 * <code>PluralFormat</code> object yet, the formatted number 344 * will be returned. 345 * @param appendTo output parameter to receive result. 346 * result is appended to existing contents. 347 * @param pos On input: an alignment field, if desired. 348 * On output: the offsets of the alignment field. 349 * @param status output param set to success/failure code on exit, which 350 * must not indicate a failure before the function call. 351 * @return the string containing the formatted plural message. 352 * @stable ICU 4.0 353 */ 354 UnicodeString& format(int32_t number, 355 UnicodeString& appendTo, 356 FieldPosition& pos, 357 UErrorCode& status) const; 358 359 /** 360 * Formats a plural message for a given number. 361 * 362 * @param number a number for which the plural message should be formatted 363 * for. If no pattern has been applied to this 364 * <code>PluralFormat</code> object yet, the formatted number 365 * will be returned. 366 * @param appendTo output parameter to receive result. 367 * result is appended to existing contents. 368 * @param pos On input: an alignment field, if desired. 369 * On output: the offsets of the alignment field. 370 * @param status output param set to success/failure code on exit, which 371 * must not indicate a failure before the function call. 372 * @return the string containing the formatted plural message. 373 * @stable ICU 4.0 374 */ 375 UnicodeString& format(double number, 376 UnicodeString& appendTo, 377 FieldPosition& pos, 378 UErrorCode& status) const; 379 380 /** 381 * Sets the locale used by this <code>PluraFormat</code> object. 382 * Note: Calling this method resets this <code>PluraFormat</code> object, 383 * i.e., a pattern that was applied previously will be removed, 384 * and the NumberFormat is set to the default number format for 385 * the locale. The resulting format behaves the same as one 386 * constructed from {@link #PluralFormat(const Locale& locale, UErrorCode& status)}. 387 * @param locale the <code>locale</code> to use to configure the formatter. 388 * @param status output param set to success/failure code on exit, which 389 * must not indicate a failure before the function call. 390 * @stable ICU 4.0 391 */ 392 void setLocale(const Locale& locale, UErrorCode& status); 393 394 /** 395 * Sets the number format used by this formatter. You only need to 396 * call this if you want a different number format than the default 397 * formatter for the locale. 398 * @param format the number format to use. 399 * @param status output param set to success/failure code on exit, which 400 * must not indicate a failure before the function call. 401 * @stable ICU 4.0 402 */ 403 void setNumberFormat(const NumberFormat* format, UErrorCode& status); 404 405 /** 406 * Assignment operator 407 * 408 * @param other the PluralFormat object to copy from. 409 * @stable ICU 4.0 410 */ 411 PluralFormat& operator=(const PluralFormat& other); 412 413 /** 414 * Return true if another object is semantically equal to this one. 415 * 416 * @param other the PluralFormat object to be compared with. 417 * @return true if other is semantically equal to this. 418 * @stable ICU 4.0 419 */ 420 virtual UBool operator==(const Format& other) const; 421 422 /** 423 * Return true if another object is semantically unequal to this one. 424 * 425 * @param other the PluralFormat object to be compared with. 426 * @return true if other is semantically unequal to this. 427 * @stable ICU 4.0 428 */ 429 virtual UBool operator!=(const Format& other) const; 430 431 /** 432 * Clones this Format object polymorphically. The caller owns the 433 * result and should delete it when done. 434 * @stable ICU 4.0 435 */ 436 virtual Format* clone(void) const; 437 438 /** 439 * Redeclared Format method. 440 * 441 * @param obj The object to be formatted into a string. 442 * @param appendTo output parameter to receive result. 443 * Result is appended to existing contents. 444 * @param pos On input: an alignment field, if desired. 445 * On output: the offsets of the alignment field. 446 * @param status output param filled with success/failure status. 447 * @return Reference to 'appendTo' parameter. 448 * @stable ICU 4.0 449 */ 450 UnicodeString& format(const Formattable& obj, 451 UnicodeString& appendTo, 452 FieldPosition& pos, 453 UErrorCode& status) const; 454 455 /** 456 * Returns the pattern from applyPattern() or constructor(). 457 * 458 * @param appendTo output parameter to receive result. 459 * Result is appended to existing contents. 460 * @return the UnicodeString with inserted pattern. 461 * @stable ICU 4.0 462 */ 463 UnicodeString& toPattern(UnicodeString& appendTo); 464 465 /** 466 * This method is not yet supported by <code>PluralFormat</code>. 467 * <P> 468 * Before calling, set parse_pos.index to the offset you want to start 469 * parsing at in the source. After calling, parse_pos.index is the end of 470 * the text you parsed. If error occurs, index is unchanged. 471 * <P> 472 * When parsing, leading whitespace is discarded (with a successful parse), 473 * while trailing whitespace is left as is. 474 * <P> 475 * See Format::parseObject() for more. 476 * 477 * @param source The string to be parsed into an object. 478 * @param result Formattable to be set to the parse result. 479 * If parse fails, return contents are undefined. 480 * @param parse_pos The position to start parsing at. Upon return 481 * this param is set to the position after the 482 * last character successfully parsed. If the 483 * source is not parsed successfully, this param 484 * will remain unchanged. 485 * @stable ICU 4.0 486 */ 487 virtual void parseObject(const UnicodeString& source, 488 Formattable& result, 489 ParsePosition& parse_pos) const; 490 491 /** 492 * ICU "poor man's RTTI", returns a UClassID for this class. 493 * 494 * @stable ICU 4.0 495 * 496 */ 497 static UClassID U_EXPORT2 getStaticClassID(void); 498 499 /** 500 * ICU "poor man's RTTI", returns a UClassID for the actual class. 501 * 502 * @stable ICU 4.0 503 */ 504 virtual UClassID getDynamicClassID() const; 505 506 private: 507 typedef enum fmtToken { 508 none, 509 tLetter, 510 tNumber, 511 tSpace, 512 tNumberSign, 513 tLeftBrace, 514 tRightBrace 515 }fmtToken; 516 517 Locale locale; 518 PluralRules* pluralRules; 519 UnicodeString pattern; 520 Hashtable *fParsedValuesHash; 521 NumberFormat* numberFormat; 522 NumberFormat* replacedNumberFormat; 523 524 PluralFormat(); // default constructor not implemented 525 void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status); 526 UBool inRange(UChar ch, fmtToken& type); 527 UBool checkSufficientDefinition(); 528 void parsingFailure(); 529 UnicodeString insertFormattedNumber(double number, 530 UnicodeString& message, 531 UnicodeString& appendTo, 532 FieldPosition& pos) const; 533 void copyHashtable(Hashtable *other, UErrorCode& status); 534 }; 535 536 U_NAMESPACE_END 537 538 #endif /* #if !UCONFIG_NO_FORMATTING */ 539 540 #endif // _PLURFMT 541 //eof 542