1 /* 2 ******************************************************************************* 3 * Copyright (C) 2011-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: messagepattern.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2011mar14 12 * created by: Markus W. Scherer 13 */ 14 15 #ifndef __MESSAGEPATTERN_H__ 16 #define __MESSAGEPATTERN_H__ 17 18 /** 19 * \file 20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 21 */ 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_FORMATTING 26 27 #include "unicode/parseerr.h" 28 #include "unicode/unistr.h" 29 30 /** 31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 34 * <p> 35 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 36 * even when the pair is between two single, text-quoting apostrophes. 37 * <p> 38 * The following table shows examples of desired MessageFormat.format() output 39 * with the pattern strings that yield that output. 40 * <p> 41 * <table> 42 * <tr> 43 * <th>Desired output</th> 44 * <th>DOUBLE_OPTIONAL</th> 45 * <th>DOUBLE_REQUIRED</th> 46 * </tr> 47 * <tr> 48 * <td>I see {many}</td> 49 * <td>I see '{many}'</td> 50 * <td>(same)</td> 51 * </tr> 52 * <tr> 53 * <td>I said {'Wow!'}</td> 54 * <td>I said '{''Wow!''}'</td> 55 * <td>(same)</td> 56 * </tr> 57 * <tr> 58 * <td>I don't know</td> 59 * <td>I don't know OR<br> I don''t know</td> 60 * <td>I don''t know</td> 61 * </tr> 62 * </table> 63 * @stable ICU 4.8 64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 65 */ 66 enum UMessagePatternApostropheMode { 67 /** 68 * A literal apostrophe is represented by 69 * either a single or a double apostrophe pattern character. 70 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 71 * if it immediately precedes a curly brace {}, 72 * or a pipe symbol | if inside a choice format, 73 * or a pound symbol # if inside a plural format. 74 * <p> 75 * This is the default behavior starting with ICU 4.8. 76 * @stable ICU 4.8 77 */ 78 UMSGPAT_APOS_DOUBLE_OPTIONAL, 79 /** 80 * A literal apostrophe must be represented by 81 * a double apostrophe pattern character. 82 * A single apostrophe always starts quoted literal text. 83 * <p> 84 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 85 * @stable ICU 4.8 86 */ 87 UMSGPAT_APOS_DOUBLE_REQUIRED 88 }; 89 /** 90 * @stable ICU 4.8 91 */ 92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 93 94 /** 95 * MessagePattern::Part type constants. 96 * @stable ICU 4.8 97 */ 98 enum UMessagePatternPartType { 99 /** 100 * Start of a message pattern (main or nested). 101 * The length is 0 for the top-level message 102 * and for a choice argument sub-message, otherwise 1 for the '{'. 103 * The value indicates the nesting level, starting with 0 for the main message. 104 * <p> 105 * There is always a later MSG_LIMIT part. 106 * @stable ICU 4.8 107 */ 108 UMSGPAT_PART_TYPE_MSG_START, 109 /** 110 * End of a message pattern (main or nested). 111 * The length is 0 for the top-level message and 112 * the last sub-message of a choice argument, 113 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 114 * The value indicates the nesting level, starting with 0 for the main message. 115 * @stable ICU 4.8 116 */ 117 UMSGPAT_PART_TYPE_MSG_LIMIT, 118 /** 119 * Indicates a substring of the pattern string which is to be skipped when formatting. 120 * For example, an apostrophe that begins or ends quoted text 121 * would be indicated with such a part. 122 * The value is undefined and currently always 0. 123 * @stable ICU 4.8 124 */ 125 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 126 /** 127 * Indicates that a syntax character needs to be inserted for auto-quoting. 128 * The length is 0. 129 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 130 * @stable ICU 4.8 131 */ 132 UMSGPAT_PART_TYPE_INSERT_CHAR, 133 /** 134 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 135 * When formatting, replace this part's substring with the 136 * (value-offset) for the plural argument value. 137 * The value is undefined and currently always 0. 138 * @stable ICU 4.8 139 */ 140 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 141 /** 142 * Start of an argument. 143 * The length is 1 for the '{'. 144 * The value is the ordinal value of the ArgType. Use getArgType(). 145 * <p> 146 * This part is followed by either an ARG_NUMBER or ARG_NAME, 147 * followed by optional argument sub-parts (see UMessagePatternArgType constants) 148 * and finally an ARG_LIMIT part. 149 * @stable ICU 4.8 150 */ 151 UMSGPAT_PART_TYPE_ARG_START, 152 /** 153 * End of an argument. 154 * The length is 1 for the '}'. 155 * The value is the ordinal value of the ArgType. Use getArgType(). 156 * @stable ICU 4.8 157 */ 158 UMSGPAT_PART_TYPE_ARG_LIMIT, 159 /** 160 * The argument number, provided by the value. 161 * @stable ICU 4.8 162 */ 163 UMSGPAT_PART_TYPE_ARG_NUMBER, 164 /** 165 * The argument name. 166 * The value is undefined and currently always 0. 167 * @stable ICU 4.8 168 */ 169 UMSGPAT_PART_TYPE_ARG_NAME, 170 /** 171 * The argument type. 172 * The value is undefined and currently always 0. 173 * @stable ICU 4.8 174 */ 175 UMSGPAT_PART_TYPE_ARG_TYPE, 176 /** 177 * The argument style text. 178 * The value is undefined and currently always 0. 179 * @stable ICU 4.8 180 */ 181 UMSGPAT_PART_TYPE_ARG_STYLE, 182 /** 183 * A selector substring in a "complex" argument style. 184 * The value is undefined and currently always 0. 185 * @stable ICU 4.8 186 */ 187 UMSGPAT_PART_TYPE_ARG_SELECTOR, 188 /** 189 * An integer value, for example the offset or an explicit selector value 190 * in a PluralFormat style. 191 * The part value is the integer value. 192 * @stable ICU 4.8 193 */ 194 UMSGPAT_PART_TYPE_ARG_INT, 195 /** 196 * A numeric value, for example the offset or an explicit selector value 197 * in a PluralFormat style. 198 * The part value is an index into an internal array of numeric values; 199 * use getNumericValue(). 200 * @stable ICU 4.8 201 */ 202 UMSGPAT_PART_TYPE_ARG_DOUBLE 203 }; 204 /** 205 * @stable ICU 4.8 206 */ 207 typedef enum UMessagePatternPartType UMessagePatternPartType; 208 209 /** 210 * Argument type constants. 211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 212 * 213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 214 * with a nesting level one greater than the surrounding message. 215 * @stable ICU 4.8 216 */ 217 enum UMessagePatternArgType { 218 /** 219 * The argument has no specified type. 220 * @stable ICU 4.8 221 */ 222 UMSGPAT_ARG_TYPE_NONE, 223 /** 224 * The argument has a "simple" type which is provided by the ARG_TYPE part. 225 * An ARG_STYLE part might follow that. 226 * @stable ICU 4.8 227 */ 228 UMSGPAT_ARG_TYPE_SIMPLE, 229 /** 230 * The argument is a ChoiceFormat with one or more 231 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 232 * @stable ICU 4.8 233 */ 234 UMSGPAT_ARG_TYPE_CHOICE, 235 /** 236 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 237 * (e.g., offset:1) 238 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 239 * If the selector has an explicit value (e.g., =2), then 240 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 241 * Otherwise the message immediately follows the ARG_SELECTOR. 242 * @stable ICU 4.8 243 */ 244 UMSGPAT_ARG_TYPE_PLURAL, 245 /** 246 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 247 * @stable ICU 4.8 248 */ 249 UMSGPAT_ARG_TYPE_SELECT, 250 #ifndef U_HIDE_DRAFT_API 251 /** 252 * The argument is an ordinal-number PluralFormat 253 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. 254 * @draft ICU 50 255 */ 256 UMSGPAT_ARG_TYPE_SELECTORDINAL 257 #endif /* U_HIDE_DRAFT_API */ 258 }; 259 /** 260 * @stable ICU 4.8 261 */ 262 typedef enum UMessagePatternArgType UMessagePatternArgType; 263 264 #ifndef U_HIDE_DRAFT_API 265 /** 266 * Returns TRUE if the argument type has a plural style part sequence and semantics, 267 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. 268 * @draft ICU 50 269 */ 270 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ 271 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) 272 #endif /* U_HIDE_DRAFT_API */ 273 274 enum { 275 /** 276 * Return value from MessagePattern.validateArgumentName() for when 277 * the string is a valid "pattern identifier" but not a number. 278 * @stable ICU 4.8 279 */ 280 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 281 282 /** 283 * Return value from MessagePattern.validateArgumentName() for when 284 * the string is invalid. 285 * It might not be a valid "pattern identifier", 286 * or it have only ASCII digits but there is a leading zero or the number is too large. 287 * @stable ICU 4.8 288 */ 289 UMSGPAT_ARG_NAME_NOT_VALID=-2 290 }; 291 292 /** 293 * Special value that is returned by getNumericValue(Part) when no 294 * numeric value is defined for a part. 295 * @see MessagePattern.getNumericValue() 296 * @stable ICU 4.8 297 */ 298 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 299 300 U_NAMESPACE_BEGIN 301 302 class MessagePatternDoubleList; 303 class MessagePatternPartsList; 304 305 /** 306 * Parses and represents ICU MessageFormat patterns. 307 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 308 * Used in the implementations of those classes as well as in tools 309 * for message validation, translation and format conversion. 310 * <p> 311 * The parser handles all syntax relevant for identifying message arguments. 312 * This includes "complex" arguments whose style strings contain 313 * nested MessageFormat pattern substrings. 314 * For "simple" arguments (with no nested MessageFormat pattern substrings), 315 * the argument style is not parsed any further. 316 * <p> 317 * The parser handles named and numbered message arguments and allows both in one message. 318 * <p> 319 * Once a pattern has been parsed successfully, iterate through the parsed data 320 * with countParts(), getPart() and related methods. 321 * <p> 322 * The data logically represents a parse tree, but is stored and accessed 323 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 324 * Arguments and nested messages are best handled via recursion. 325 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 326 * the index of the corresponding _LIMIT "part". 327 * <p> 328 * List of "parts": 329 * <pre> 330 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 331 * argument = noneArg | simpleArg | complexArg 332 * complexArg = choiceArg | pluralArg | selectArg 333 * 334 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 335 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 336 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 337 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 338 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 339 * 340 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 341 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 342 * selectStyle = (ARG_SELECTOR message)+ 343 * </pre> 344 * <ul> 345 * <li>Literal output text is not represented directly by "parts" but accessed 346 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 347 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 348 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 349 * the less-than-or-equal-to sign (U+2264). 350 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 351 * The optional numeric Part between each (ARG_SELECTOR, message) pair 352 * is the value of an explicit-number selector like "=2", 353 * otherwise the selector is a non-numeric identifier. 354 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 355 * </ul> 356 * <p> 357 * This class is not intended for public subclassing. 358 * 359 * @stable ICU 4.8 360 */ 361 class U_COMMON_API MessagePattern : public UObject { 362 public: 363 /** 364 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 365 * @param errorCode Standard ICU error code. Its input value must 366 * pass the U_SUCCESS() test, or else the function returns 367 * immediately. Check for U_FAILURE() on output or use with 368 * function chaining. (See User Guide for details.) 369 * @stable ICU 4.8 370 */ 371 MessagePattern(UErrorCode &errorCode); 372 373 /** 374 * Constructs an empty MessagePattern. 375 * @param mode Explicit UMessagePatternApostropheMode. 376 * @param errorCode Standard ICU error code. Its input value must 377 * pass the U_SUCCESS() test, or else the function returns 378 * immediately. Check for U_FAILURE() on output or use with 379 * function chaining. (See User Guide for details.) 380 * @stable ICU 4.8 381 */ 382 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 383 384 /** 385 * Constructs a MessagePattern with default UMessagePatternApostropheMode and 386 * parses the MessageFormat pattern string. 387 * @param pattern a MessageFormat pattern string 388 * @param parseError Struct to receive information on the position 389 * of an error within the pattern. 390 * Can be NULL. 391 * @param errorCode Standard ICU error code. Its input value must 392 * pass the U_SUCCESS() test, or else the function returns 393 * immediately. Check for U_FAILURE() on output or use with 394 * function chaining. (See User Guide for details.) 395 * TODO: turn @throws into UErrorCode specifics? 396 * @throws IllegalArgumentException for syntax errors in the pattern string 397 * @throws IndexOutOfBoundsException if certain limits are exceeded 398 * (e.g., argument number too high, argument name too long, etc.) 399 * @throws NumberFormatException if a number could not be parsed 400 * @stable ICU 4.8 401 */ 402 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 403 404 /** 405 * Copy constructor. 406 * @param other Object to copy. 407 * @stable ICU 4.8 408 */ 409 MessagePattern(const MessagePattern &other); 410 411 /** 412 * Assignment operator. 413 * @param other Object to copy. 414 * @return *this=other 415 * @stable ICU 4.8 416 */ 417 MessagePattern &operator=(const MessagePattern &other); 418 419 /** 420 * Destructor. 421 * @stable ICU 4.8 422 */ 423 virtual ~MessagePattern(); 424 425 /** 426 * Parses a MessageFormat pattern string. 427 * @param pattern a MessageFormat pattern string 428 * @param parseError Struct to receive information on the position 429 * of an error within the pattern. 430 * Can be NULL. 431 * @param errorCode Standard ICU error code. Its input value must 432 * pass the U_SUCCESS() test, or else the function returns 433 * immediately. Check for U_FAILURE() on output or use with 434 * function chaining. (See User Guide for details.) 435 * @return *this 436 * @throws IllegalArgumentException for syntax errors in the pattern string 437 * @throws IndexOutOfBoundsException if certain limits are exceeded 438 * (e.g., argument number too high, argument name too long, etc.) 439 * @throws NumberFormatException if a number could not be parsed 440 * @stable ICU 4.8 441 */ 442 MessagePattern &parse(const UnicodeString &pattern, 443 UParseError *parseError, UErrorCode &errorCode); 444 445 /** 446 * Parses a ChoiceFormat pattern string. 447 * @param pattern a ChoiceFormat pattern string 448 * @param parseError Struct to receive information on the position 449 * of an error within the pattern. 450 * Can be NULL. 451 * @param errorCode Standard ICU error code. Its input value must 452 * pass the U_SUCCESS() test, or else the function returns 453 * immediately. Check for U_FAILURE() on output or use with 454 * function chaining. (See User Guide for details.) 455 * @return *this 456 * @throws IllegalArgumentException for syntax errors in the pattern string 457 * @throws IndexOutOfBoundsException if certain limits are exceeded 458 * (e.g., argument number too high, argument name too long, etc.) 459 * @throws NumberFormatException if a number could not be parsed 460 * @stable ICU 4.8 461 */ 462 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 463 UParseError *parseError, UErrorCode &errorCode); 464 465 /** 466 * Parses a PluralFormat pattern string. 467 * @param pattern a PluralFormat pattern string 468 * @param parseError Struct to receive information on the position 469 * of an error within the pattern. 470 * Can be NULL. 471 * @param errorCode Standard ICU error code. Its input value must 472 * pass the U_SUCCESS() test, or else the function returns 473 * immediately. Check for U_FAILURE() on output or use with 474 * function chaining. (See User Guide for details.) 475 * @return *this 476 * @throws IllegalArgumentException for syntax errors in the pattern string 477 * @throws IndexOutOfBoundsException if certain limits are exceeded 478 * (e.g., argument number too high, argument name too long, etc.) 479 * @throws NumberFormatException if a number could not be parsed 480 * @stable ICU 4.8 481 */ 482 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 483 UParseError *parseError, UErrorCode &errorCode); 484 485 /** 486 * Parses a SelectFormat pattern string. 487 * @param pattern a SelectFormat pattern string 488 * @param parseError Struct to receive information on the position 489 * of an error within the pattern. 490 * Can be NULL. 491 * @param errorCode Standard ICU error code. Its input value must 492 * pass the U_SUCCESS() test, or else the function returns 493 * immediately. Check for U_FAILURE() on output or use with 494 * function chaining. (See User Guide for details.) 495 * @return *this 496 * @throws IllegalArgumentException for syntax errors in the pattern string 497 * @throws IndexOutOfBoundsException if certain limits are exceeded 498 * (e.g., argument number too high, argument name too long, etc.) 499 * @throws NumberFormatException if a number could not be parsed 500 * @stable ICU 4.8 501 */ 502 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 503 UParseError *parseError, UErrorCode &errorCode); 504 505 /** 506 * Clears this MessagePattern. 507 * countParts() will return 0. 508 * @stable ICU 4.8 509 */ 510 void clear(); 511 512 /** 513 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 514 * countParts() will return 0. 515 * @param mode The new UMessagePatternApostropheMode. 516 * @stable ICU 4.8 517 */ 518 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 519 clear(); 520 aposMode=mode; 521 } 522 523 /** 524 * @param other another object to compare with. 525 * @return TRUE if this object is equivalent to the other one. 526 * @stable ICU 4.8 527 */ 528 UBool operator==(const MessagePattern &other) const; 529 530 /** 531 * @param other another object to compare with. 532 * @return FALSE if this object is equivalent to the other one. 533 * @stable ICU 4.8 534 */ 535 inline UBool operator!=(const MessagePattern &other) const { 536 return !operator==(other); 537 } 538 539 /** 540 * @return A hash code for this object. 541 * @stable ICU 4.8 542 */ 543 int32_t hashCode() const; 544 545 /** 546 * @return this instance's UMessagePatternApostropheMode. 547 * @stable ICU 4.8 548 */ 549 UMessagePatternApostropheMode getApostropheMode() const { 550 return aposMode; 551 } 552 553 // Java has package-private jdkAposMode() here. 554 // In C++, this is declared in the MessageImpl class. 555 556 /** 557 * @return the parsed pattern string (null if none was parsed). 558 * @stable ICU 4.8 559 */ 560 const UnicodeString &getPatternString() const { 561 return msg; 562 } 563 564 /** 565 * Does the parsed pattern have named arguments like {first_name}? 566 * @return TRUE if the parsed pattern has at least one named argument. 567 * @stable ICU 4.8 568 */ 569 UBool hasNamedArguments() const { 570 return hasArgNames; 571 } 572 573 /** 574 * Does the parsed pattern have numbered arguments like {2}? 575 * @return TRUE if the parsed pattern has at least one numbered argument. 576 * @stable ICU 4.8 577 */ 578 UBool hasNumberedArguments() const { 579 return hasArgNumbers; 580 } 581 582 /** 583 * Validates and parses an argument name or argument number string. 584 * An argument name must be a "pattern identifier", that is, it must contain 585 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 586 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 587 * @param name Input string. 588 * @return >=0 if the name is a valid number, 589 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 590 * ARG_NAME_NOT_VALID (-2) if it is neither. 591 * @stable ICU 4.8 592 */ 593 static int32_t validateArgumentName(const UnicodeString &name); 594 595 /** 596 * Returns a version of the parsed pattern string where each ASCII apostrophe 597 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 598 * <p> 599 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 600 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 601 * @return the deep-auto-quoted version of the parsed pattern string. 602 * @see MessageFormat.autoQuoteApostrophe() 603 * @stable ICU 4.8 604 */ 605 UnicodeString autoQuoteApostropheDeep() const; 606 607 class Part; 608 609 /** 610 * Returns the number of "parts" created by parsing the pattern string. 611 * Returns 0 if no pattern has been parsed or clear() was called. 612 * @return the number of pattern parts. 613 * @stable ICU 4.8 614 */ 615 int32_t countParts() const { 616 return partsLength; 617 } 618 619 /** 620 * Gets the i-th pattern "part". 621 * @param i The index of the Part data. (0..countParts()-1) 622 * @return the i-th pattern "part". 623 * @stable ICU 4.8 624 */ 625 const Part &getPart(int32_t i) const { 626 return parts[i]; 627 } 628 629 /** 630 * Returns the UMessagePatternPartType of the i-th pattern "part". 631 * Convenience method for getPart(i).getType(). 632 * @param i The index of the Part data. (0..countParts()-1) 633 * @return The UMessagePatternPartType of the i-th Part. 634 * @stable ICU 4.8 635 */ 636 UMessagePatternPartType getPartType(int32_t i) const { 637 return getPart(i).type; 638 } 639 640 /** 641 * Returns the pattern index of the specified pattern "part". 642 * Convenience method for getPart(partIndex).getIndex(). 643 * @param partIndex The index of the Part data. (0..countParts()-1) 644 * @return The pattern index of this Part. 645 * @stable ICU 4.8 646 */ 647 int32_t getPatternIndex(int32_t partIndex) const { 648 return getPart(partIndex).index; 649 } 650 651 /** 652 * Returns the substring of the pattern string indicated by the Part. 653 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 654 * @param part a part of this MessagePattern. 655 * @return the substring associated with part. 656 * @stable ICU 4.8 657 */ 658 UnicodeString getSubstring(const Part &part) const { 659 return msg.tempSubString(part.index, part.length); 660 } 661 662 /** 663 * Compares the part's substring with the input string s. 664 * @param part a part of this MessagePattern. 665 * @param s a string. 666 * @return TRUE if getSubstring(part).equals(s). 667 * @stable ICU 4.8 668 */ 669 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 670 return 0==msg.compare(part.index, part.length, s); 671 } 672 673 /** 674 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 675 * @param part a part of this MessagePattern. 676 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 677 * @stable ICU 4.8 678 */ 679 double getNumericValue(const Part &part) const; 680 681 /** 682 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 683 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 684 * @return the "offset:" value. 685 * @stable ICU 4.8 686 */ 687 double getPluralOffset(int32_t pluralStart) const; 688 689 /** 690 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 691 * @param start The index of some Part data (0..countParts()-1); 692 * this Part should be of Type ARG_START or MSG_START. 693 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 694 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 695 * @stable ICU 4.8 696 */ 697 int32_t getLimitPartIndex(int32_t start) const { 698 int32_t limit=getPart(start).limitPartIndex; 699 if(limit<start) { 700 return start; 701 } 702 return limit; 703 } 704 705 /** 706 * A message pattern "part", representing a pattern parsing event. 707 * There is a part for the start and end of a message or argument, 708 * for quoting and escaping of and with ASCII apostrophes, 709 * and for syntax elements of "complex" arguments. 710 * @stable ICU 4.8 711 */ 712 class Part : public UMemory { 713 public: 714 /** 715 * Default constructor, do not use. 716 * @internal 717 */ 718 Part() {} 719 720 /** 721 * Returns the type of this part. 722 * @return the part type. 723 * @stable ICU 4.8 724 */ 725 UMessagePatternPartType getType() const { 726 return type; 727 } 728 729 /** 730 * Returns the pattern string index associated with this Part. 731 * @return this part's pattern string index. 732 * @stable ICU 4.8 733 */ 734 int32_t getIndex() const { 735 return index; 736 } 737 738 /** 739 * Returns the length of the pattern substring associated with this Part. 740 * This is 0 for some parts. 741 * @return this part's pattern substring length. 742 * @stable ICU 4.8 743 */ 744 int32_t getLength() const { 745 return length; 746 } 747 748 /** 749 * Returns the pattern string limit (exclusive-end) index associated with this Part. 750 * Convenience method for getIndex()+getLength(). 751 * @return this part's pattern string limit index, same as getIndex()+getLength(). 752 * @stable ICU 4.8 753 */ 754 int32_t getLimit() const { 755 return index+length; 756 } 757 758 /** 759 * Returns a value associated with this part. 760 * See the documentation of each part type for details. 761 * @return the part value. 762 * @stable ICU 4.8 763 */ 764 int32_t getValue() const { 765 return value; 766 } 767 768 /** 769 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 770 * otherwise UMSGPAT_ARG_TYPE_NONE. 771 * @return the argument type for this part. 772 * @stable ICU 4.8 773 */ 774 UMessagePatternArgType getArgType() const { 775 UMessagePatternPartType type=getType(); 776 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 777 return (UMessagePatternArgType)value; 778 } else { 779 return UMSGPAT_ARG_TYPE_NONE; 780 } 781 } 782 783 /** 784 * Indicates whether the Part type has a numeric value. 785 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 786 * @param type The Part type to be tested. 787 * @return TRUE if the Part type has a numeric value. 788 * @stable ICU 4.8 789 */ 790 static UBool hasNumericValue(UMessagePatternPartType type) { 791 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 792 } 793 794 /** 795 * @param other another object to compare with. 796 * @return TRUE if this object is equivalent to the other one. 797 * @stable ICU 4.8 798 */ 799 UBool operator==(const Part &other) const; 800 801 /** 802 * @param other another object to compare with. 803 * @return FALSE if this object is equivalent to the other one. 804 * @stable ICU 4.8 805 */ 806 inline UBool operator!=(const Part &other) const { 807 return !operator==(other); 808 } 809 810 /** 811 * @return A hash code for this object. 812 * @stable ICU 4.8 813 */ 814 int32_t hashCode() const { 815 return ((type*37+index)*37+length)*37+value; 816 } 817 818 private: 819 friend class MessagePattern; 820 821 static const int32_t MAX_LENGTH=0xffff; 822 static const int32_t MAX_VALUE=0x7fff; 823 824 // Some fields are not final because they are modified during pattern parsing. 825 // After pattern parsing, the parts are effectively immutable. 826 UMessagePatternPartType type; 827 int32_t index; 828 uint16_t length; 829 int16_t value; 830 int32_t limitPartIndex; 831 }; 832 833 private: 834 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 835 836 void postParse(); 837 838 int32_t parseMessage(int32_t index, int32_t msgStartLength, 839 int32_t nestingLevel, UMessagePatternArgType parentType, 840 UParseError *parseError, UErrorCode &errorCode); 841 842 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 843 UParseError *parseError, UErrorCode &errorCode); 844 845 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 846 847 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 848 UParseError *parseError, UErrorCode &errorCode); 849 850 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 851 UParseError *parseError, UErrorCode &errorCode); 852 853 /** 854 * Validates and parses an argument name or argument number string. 855 * This internal method assumes that the input substring is a "pattern identifier". 856 * @return >=0 if the name is a valid number, 857 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 858 * ARG_NAME_NOT_VALID (-2) if it is neither. 859 * @see #validateArgumentName(String) 860 */ 861 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 862 863 int32_t parseArgNumber(int32_t start, int32_t limit) { 864 return parseArgNumber(msg, start, limit); 865 } 866 867 /** 868 * Parses a number from the specified message substring. 869 * @param start start index into the message string 870 * @param limit limit index into the message string, must be start<limit 871 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) 872 * @param parseError 873 * @param errorCode 874 */ 875 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 876 UParseError *parseError, UErrorCode &errorCode); 877 878 // Java has package-private appendReducedApostrophes() here. 879 // In C++, this is declared in the MessageImpl class. 880 881 int32_t skipWhiteSpace(int32_t index); 882 883 int32_t skipIdentifier(int32_t index); 884 885 /** 886 * Skips a sequence of characters that could occur in a double value. 887 * Does not fully parse or validate the value. 888 */ 889 int32_t skipDouble(int32_t index); 890 891 static UBool isArgTypeChar(UChar32 c); 892 893 UBool isChoice(int32_t index); 894 895 UBool isPlural(int32_t index); 896 897 UBool isSelect(int32_t index); 898 899 UBool isOrdinal(int32_t index); 900 901 /** 902 * @return TRUE if we are inside a MessageFormat (sub-)pattern, 903 * as opposed to inside a top-level choice/plural/select pattern. 904 */ 905 UBool inMessageFormatPattern(int32_t nestingLevel); 906 907 /** 908 * @return TRUE if we are in a MessageFormat sub-pattern 909 * of a top-level ChoiceFormat pattern. 910 */ 911 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 912 913 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 914 int32_t value, UErrorCode &errorCode); 915 916 void addLimitPart(int32_t start, 917 UMessagePatternPartType type, int32_t index, int32_t length, 918 int32_t value, UErrorCode &errorCode); 919 920 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 921 922 void setParseError(UParseError *parseError, int32_t index); 923 924 UBool init(UErrorCode &errorCode); 925 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 926 927 UMessagePatternApostropheMode aposMode; 928 UnicodeString msg; 929 // ArrayList<Part> parts=new ArrayList<Part>(); 930 MessagePatternPartsList *partsList; 931 Part *parts; 932 int32_t partsLength; 933 // ArrayList<Double> numericValues; 934 MessagePatternDoubleList *numericValuesList; 935 double *numericValues; 936 int32_t numericValuesLength; 937 UBool hasArgNames; 938 UBool hasArgNumbers; 939 UBool needsAutoQuoting; 940 }; 941 942 U_NAMESPACE_END 943 944 #endif // !UCONFIG_NO_FORMATTING 945 946 #endif // __MESSAGEPATTERN_H__ 947