Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2011-2013, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  messagepattern.h
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2011mar14
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #ifndef __MESSAGEPATTERN_H__
     16 #define __MESSAGEPATTERN_H__
     17 
     18 /**
     19  * \file
     20  * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
     21  */
     22 
     23 #include "unicode/utypes.h"
     24 
     25 #if !UCONFIG_NO_FORMATTING
     26 
     27 #include "unicode/parseerr.h"
     28 #include "unicode/unistr.h"
     29 
     30 /**
     31  * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
     32  * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
     33  * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
     34  * <p>
     35  * A pair of adjacent apostrophes always results in a single apostrophe in the output,
     36  * even when the pair is between two single, text-quoting apostrophes.
     37  * <p>
     38  * The following table shows examples of desired MessageFormat.format() output
     39  * with the pattern strings that yield that output.
     40  * <p>
     41  * <table>
     42  *   <tr>
     43  *     <th>Desired output</th>
     44  *     <th>DOUBLE_OPTIONAL</th>
     45  *     <th>DOUBLE_REQUIRED</th>
     46  *   </tr>
     47  *   <tr>
     48  *     <td>I see {many}</td>
     49  *     <td>I see '{many}'</td>
     50  *     <td>(same)</td>
     51  *   </tr>
     52  *   <tr>
     53  *     <td>I said {'Wow!'}</td>
     54  *     <td>I said '{''Wow!''}'</td>
     55  *     <td>(same)</td>
     56  *   </tr>
     57  *   <tr>
     58  *     <td>I don't know</td>
     59  *     <td>I don't know OR<br> I don''t know</td>
     60  *     <td>I don''t know</td>
     61  *   </tr>
     62  * </table>
     63  * @stable ICU 4.8
     64  * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
     65  */
     66 enum UMessagePatternApostropheMode {
     67     /**
     68      * A literal apostrophe is represented by
     69      * either a single or a double apostrophe pattern character.
     70      * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
     71      * if it immediately precedes a curly brace {},
     72      * or a pipe symbol | if inside a choice format,
     73      * or a pound symbol # if inside a plural format.
     74      * <p>
     75      * This is the default behavior starting with ICU 4.8.
     76      * @stable ICU 4.8
     77      */
     78     UMSGPAT_APOS_DOUBLE_OPTIONAL,
     79     /**
     80      * A literal apostrophe must be represented by
     81      * a double apostrophe pattern character.
     82      * A single apostrophe always starts quoted literal text.
     83      * <p>
     84      * This is the behavior of ICU 4.6 and earlier, and of the JDK.
     85      * @stable ICU 4.8
     86      */
     87     UMSGPAT_APOS_DOUBLE_REQUIRED
     88 };
     89 /**
     90  * @stable ICU 4.8
     91  */
     92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
     93 
     94 /**
     95  * MessagePattern::Part type constants.
     96  * @stable ICU 4.8
     97  */
     98 enum UMessagePatternPartType {
     99     /**
    100      * Start of a message pattern (main or nested).
    101      * The length is 0 for the top-level message
    102      * and for a choice argument sub-message, otherwise 1 for the '{'.
    103      * The value indicates the nesting level, starting with 0 for the main message.
    104      * <p>
    105      * There is always a later MSG_LIMIT part.
    106      * @stable ICU 4.8
    107      */
    108     UMSGPAT_PART_TYPE_MSG_START,
    109     /**
    110      * End of a message pattern (main or nested).
    111      * The length is 0 for the top-level message and
    112      * the last sub-message of a choice argument,
    113      * otherwise 1 for the '}' or (in a choice argument style) the '|'.
    114      * The value indicates the nesting level, starting with 0 for the main message.
    115      * @stable ICU 4.8
    116      */
    117     UMSGPAT_PART_TYPE_MSG_LIMIT,
    118     /**
    119      * Indicates a substring of the pattern string which is to be skipped when formatting.
    120      * For example, an apostrophe that begins or ends quoted text
    121      * would be indicated with such a part.
    122      * The value is undefined and currently always 0.
    123      * @stable ICU 4.8
    124      */
    125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
    126     /**
    127      * Indicates that a syntax character needs to be inserted for auto-quoting.
    128      * The length is 0.
    129      * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
    130      * @stable ICU 4.8
    131      */
    132     UMSGPAT_PART_TYPE_INSERT_CHAR,
    133     /**
    134      * Indicates a syntactic (non-escaped) # symbol in a plural variant.
    135      * When formatting, replace this part's substring with the
    136      * (value-offset) for the plural argument value.
    137      * The value is undefined and currently always 0.
    138      * @stable ICU 4.8
    139      */
    140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
    141     /**
    142      * Start of an argument.
    143      * The length is 1 for the '{'.
    144      * The value is the ordinal value of the ArgType. Use getArgType().
    145      * <p>
    146      * This part is followed by either an ARG_NUMBER or ARG_NAME,
    147      * followed by optional argument sub-parts (see UMessagePatternArgType constants)
    148      * and finally an ARG_LIMIT part.
    149      * @stable ICU 4.8
    150      */
    151     UMSGPAT_PART_TYPE_ARG_START,
    152     /**
    153      * End of an argument.
    154      * The length is 1 for the '}'.
    155      * The value is the ordinal value of the ArgType. Use getArgType().
    156      * @stable ICU 4.8
    157      */
    158     UMSGPAT_PART_TYPE_ARG_LIMIT,
    159     /**
    160      * The argument number, provided by the value.
    161      * @stable ICU 4.8
    162      */
    163     UMSGPAT_PART_TYPE_ARG_NUMBER,
    164     /**
    165      * The argument name.
    166      * The value is undefined and currently always 0.
    167      * @stable ICU 4.8
    168      */
    169     UMSGPAT_PART_TYPE_ARG_NAME,
    170     /**
    171      * The argument type.
    172      * The value is undefined and currently always 0.
    173      * @stable ICU 4.8
    174      */
    175     UMSGPAT_PART_TYPE_ARG_TYPE,
    176     /**
    177      * The argument style text.
    178      * The value is undefined and currently always 0.
    179      * @stable ICU 4.8
    180      */
    181     UMSGPAT_PART_TYPE_ARG_STYLE,
    182     /**
    183      * A selector substring in a "complex" argument style.
    184      * The value is undefined and currently always 0.
    185      * @stable ICU 4.8
    186      */
    187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
    188     /**
    189      * An integer value, for example the offset or an explicit selector value
    190      * in a PluralFormat style.
    191      * The part value is the integer value.
    192      * @stable ICU 4.8
    193      */
    194     UMSGPAT_PART_TYPE_ARG_INT,
    195     /**
    196      * A numeric value, for example the offset or an explicit selector value
    197      * in a PluralFormat style.
    198      * The part value is an index into an internal array of numeric values;
    199      * use getNumericValue().
    200      * @stable ICU 4.8
    201      */
    202     UMSGPAT_PART_TYPE_ARG_DOUBLE
    203 };
    204 /**
    205  * @stable ICU 4.8
    206  */
    207 typedef enum UMessagePatternPartType UMessagePatternPartType;
    208 
    209 /**
    210  * Argument type constants.
    211  * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
    212  *
    213  * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
    214  * with a nesting level one greater than the surrounding message.
    215  * @stable ICU 4.8
    216  */
    217 enum UMessagePatternArgType {
    218     /**
    219      * The argument has no specified type.
    220      * @stable ICU 4.8
    221      */
    222     UMSGPAT_ARG_TYPE_NONE,
    223     /**
    224      * The argument has a "simple" type which is provided by the ARG_TYPE part.
    225      * An ARG_STYLE part might follow that.
    226      * @stable ICU 4.8
    227      */
    228     UMSGPAT_ARG_TYPE_SIMPLE,
    229     /**
    230      * The argument is a ChoiceFormat with one or more
    231      * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
    232      * @stable ICU 4.8
    233      */
    234     UMSGPAT_ARG_TYPE_CHOICE,
    235     /**
    236      * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
    237      * (e.g., offset:1)
    238      * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
    239      * If the selector has an explicit value (e.g., =2), then
    240      * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
    241      * Otherwise the message immediately follows the ARG_SELECTOR.
    242      * @stable ICU 4.8
    243      */
    244     UMSGPAT_ARG_TYPE_PLURAL,
    245     /**
    246      * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
    247      * @stable ICU 4.8
    248      */
    249     UMSGPAT_ARG_TYPE_SELECT,
    250 #ifndef U_HIDE_DRAFT_API
    251     /**
    252      * The argument is an ordinal-number PluralFormat
    253      * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
    254      * @draft ICU 50
    255      */
    256     UMSGPAT_ARG_TYPE_SELECTORDINAL
    257 #endif /* U_HIDE_DRAFT_API */
    258 };
    259 /**
    260  * @stable ICU 4.8
    261  */
    262 typedef enum UMessagePatternArgType UMessagePatternArgType;
    263 
    264 #ifndef U_HIDE_DRAFT_API
    265 /**
    266  * Returns TRUE if the argument type has a plural style part sequence and semantics,
    267  * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
    268  * @draft ICU 50
    269  */
    270 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
    271     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
    272 #endif /* U_HIDE_DRAFT_API */
    273 
    274 enum {
    275     /**
    276      * Return value from MessagePattern.validateArgumentName() for when
    277      * the string is a valid "pattern identifier" but not a number.
    278      * @stable ICU 4.8
    279      */
    280     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
    281 
    282     /**
    283      * Return value from MessagePattern.validateArgumentName() for when
    284      * the string is invalid.
    285      * It might not be a valid "pattern identifier",
    286      * or it have only ASCII digits but there is a leading zero or the number is too large.
    287      * @stable ICU 4.8
    288      */
    289     UMSGPAT_ARG_NAME_NOT_VALID=-2
    290 };
    291 
    292 /**
    293  * Special value that is returned by getNumericValue(Part) when no
    294  * numeric value is defined for a part.
    295  * @see MessagePattern.getNumericValue()
    296  * @stable ICU 4.8
    297  */
    298 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
    299 
    300 U_NAMESPACE_BEGIN
    301 
    302 class MessagePatternDoubleList;
    303 class MessagePatternPartsList;
    304 
    305 /**
    306  * Parses and represents ICU MessageFormat patterns.
    307  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
    308  * Used in the implementations of those classes as well as in tools
    309  * for message validation, translation and format conversion.
    310  * <p>
    311  * The parser handles all syntax relevant for identifying message arguments.
    312  * This includes "complex" arguments whose style strings contain
    313  * nested MessageFormat pattern substrings.
    314  * For "simple" arguments (with no nested MessageFormat pattern substrings),
    315  * the argument style is not parsed any further.
    316  * <p>
    317  * The parser handles named and numbered message arguments and allows both in one message.
    318  * <p>
    319  * Once a pattern has been parsed successfully, iterate through the parsed data
    320  * with countParts(), getPart() and related methods.
    321  * <p>
    322  * The data logically represents a parse tree, but is stored and accessed
    323  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
    324  * Arguments and nested messages are best handled via recursion.
    325  * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
    326  * the index of the corresponding _LIMIT "part".
    327  * <p>
    328  * List of "parts":
    329  * <pre>
    330  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
    331  * argument = noneArg | simpleArg | complexArg
    332  * complexArg = choiceArg | pluralArg | selectArg
    333  *
    334  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
    335  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
    336  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
    337  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
    338  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
    339  *
    340  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
    341  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
    342  * selectStyle = (ARG_SELECTOR message)+
    343  * </pre>
    344  * <ul>
    345  *   <li>Literal output text is not represented directly by "parts" but accessed
    346  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
    347  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
    348  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
    349  *       the less-than-or-equal-to sign (U+2264).
    350  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
    351  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
    352  *       is the value of an explicit-number selector like "=2",
    353  *       otherwise the selector is a non-numeric identifier.
    354  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
    355  * </ul>
    356  * <p>
    357  * This class is not intended for public subclassing.
    358  *
    359  * @stable ICU 4.8
    360  */
    361 class U_COMMON_API MessagePattern : public UObject {
    362 public:
    363     /**
    364      * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
    365      * @param errorCode Standard ICU error code. Its input value must
    366      *                  pass the U_SUCCESS() test, or else the function returns
    367      *                  immediately. Check for U_FAILURE() on output or use with
    368      *                  function chaining. (See User Guide for details.)
    369      * @stable ICU 4.8
    370      */
    371     MessagePattern(UErrorCode &errorCode);
    372 
    373     /**
    374      * Constructs an empty MessagePattern.
    375      * @param mode Explicit UMessagePatternApostropheMode.
    376      * @param errorCode Standard ICU error code. Its input value must
    377      *                  pass the U_SUCCESS() test, or else the function returns
    378      *                  immediately. Check for U_FAILURE() on output or use with
    379      *                  function chaining. (See User Guide for details.)
    380      * @stable ICU 4.8
    381      */
    382     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
    383 
    384     /**
    385      * Constructs a MessagePattern with default UMessagePatternApostropheMode and
    386      * parses the MessageFormat pattern string.
    387      * @param pattern a MessageFormat pattern string
    388      * @param parseError Struct to receive information on the position
    389      *                   of an error within the pattern.
    390      *                   Can be NULL.
    391      * @param errorCode Standard ICU error code. Its input value must
    392      *                  pass the U_SUCCESS() test, or else the function returns
    393      *                  immediately. Check for U_FAILURE() on output or use with
    394      *                  function chaining. (See User Guide for details.)
    395      * TODO: turn @throws into UErrorCode specifics?
    396      * @throws IllegalArgumentException for syntax errors in the pattern string
    397      * @throws IndexOutOfBoundsException if certain limits are exceeded
    398      *         (e.g., argument number too high, argument name too long, etc.)
    399      * @throws NumberFormatException if a number could not be parsed
    400      * @stable ICU 4.8
    401      */
    402     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
    403 
    404     /**
    405      * Copy constructor.
    406      * @param other Object to copy.
    407      * @stable ICU 4.8
    408      */
    409     MessagePattern(const MessagePattern &other);
    410 
    411     /**
    412      * Assignment operator.
    413      * @param other Object to copy.
    414      * @return *this=other
    415      * @stable ICU 4.8
    416      */
    417     MessagePattern &operator=(const MessagePattern &other);
    418 
    419     /**
    420      * Destructor.
    421      * @stable ICU 4.8
    422      */
    423     virtual ~MessagePattern();
    424 
    425     /**
    426      * Parses a MessageFormat pattern string.
    427      * @param pattern a MessageFormat pattern string
    428      * @param parseError Struct to receive information on the position
    429      *                   of an error within the pattern.
    430      *                   Can be NULL.
    431      * @param errorCode Standard ICU error code. Its input value must
    432      *                  pass the U_SUCCESS() test, or else the function returns
    433      *                  immediately. Check for U_FAILURE() on output or use with
    434      *                  function chaining. (See User Guide for details.)
    435      * @return *this
    436      * @throws IllegalArgumentException for syntax errors in the pattern string
    437      * @throws IndexOutOfBoundsException if certain limits are exceeded
    438      *         (e.g., argument number too high, argument name too long, etc.)
    439      * @throws NumberFormatException if a number could not be parsed
    440      * @stable ICU 4.8
    441      */
    442     MessagePattern &parse(const UnicodeString &pattern,
    443                           UParseError *parseError, UErrorCode &errorCode);
    444 
    445     /**
    446      * Parses a ChoiceFormat pattern string.
    447      * @param pattern a ChoiceFormat pattern string
    448      * @param parseError Struct to receive information on the position
    449      *                   of an error within the pattern.
    450      *                   Can be NULL.
    451      * @param errorCode Standard ICU error code. Its input value must
    452      *                  pass the U_SUCCESS() test, or else the function returns
    453      *                  immediately. Check for U_FAILURE() on output or use with
    454      *                  function chaining. (See User Guide for details.)
    455      * @return *this
    456      * @throws IllegalArgumentException for syntax errors in the pattern string
    457      * @throws IndexOutOfBoundsException if certain limits are exceeded
    458      *         (e.g., argument number too high, argument name too long, etc.)
    459      * @throws NumberFormatException if a number could not be parsed
    460      * @stable ICU 4.8
    461      */
    462     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
    463                                      UParseError *parseError, UErrorCode &errorCode);
    464 
    465     /**
    466      * Parses a PluralFormat pattern string.
    467      * @param pattern a PluralFormat pattern string
    468      * @param parseError Struct to receive information on the position
    469      *                   of an error within the pattern.
    470      *                   Can be NULL.
    471      * @param errorCode Standard ICU error code. Its input value must
    472      *                  pass the U_SUCCESS() test, or else the function returns
    473      *                  immediately. Check for U_FAILURE() on output or use with
    474      *                  function chaining. (See User Guide for details.)
    475      * @return *this
    476      * @throws IllegalArgumentException for syntax errors in the pattern string
    477      * @throws IndexOutOfBoundsException if certain limits are exceeded
    478      *         (e.g., argument number too high, argument name too long, etc.)
    479      * @throws NumberFormatException if a number could not be parsed
    480      * @stable ICU 4.8
    481      */
    482     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
    483                                      UParseError *parseError, UErrorCode &errorCode);
    484 
    485     /**
    486      * Parses a SelectFormat pattern string.
    487      * @param pattern a SelectFormat pattern string
    488      * @param parseError Struct to receive information on the position
    489      *                   of an error within the pattern.
    490      *                   Can be NULL.
    491      * @param errorCode Standard ICU error code. Its input value must
    492      *                  pass the U_SUCCESS() test, or else the function returns
    493      *                  immediately. Check for U_FAILURE() on output or use with
    494      *                  function chaining. (See User Guide for details.)
    495      * @return *this
    496      * @throws IllegalArgumentException for syntax errors in the pattern string
    497      * @throws IndexOutOfBoundsException if certain limits are exceeded
    498      *         (e.g., argument number too high, argument name too long, etc.)
    499      * @throws NumberFormatException if a number could not be parsed
    500      * @stable ICU 4.8
    501      */
    502     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
    503                                      UParseError *parseError, UErrorCode &errorCode);
    504 
    505     /**
    506      * Clears this MessagePattern.
    507      * countParts() will return 0.
    508      * @stable ICU 4.8
    509      */
    510     void clear();
    511 
    512     /**
    513      * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
    514      * countParts() will return 0.
    515      * @param mode The new UMessagePatternApostropheMode.
    516      * @stable ICU 4.8
    517      */
    518     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
    519         clear();
    520         aposMode=mode;
    521     }
    522 
    523     /**
    524      * @param other another object to compare with.
    525      * @return TRUE if this object is equivalent to the other one.
    526      * @stable ICU 4.8
    527      */
    528     UBool operator==(const MessagePattern &other) const;
    529 
    530     /**
    531      * @param other another object to compare with.
    532      * @return FALSE if this object is equivalent to the other one.
    533      * @stable ICU 4.8
    534      */
    535     inline UBool operator!=(const MessagePattern &other) const {
    536         return !operator==(other);
    537     }
    538 
    539     /**
    540      * @return A hash code for this object.
    541      * @stable ICU 4.8
    542      */
    543     int32_t hashCode() const;
    544 
    545     /**
    546      * @return this instance's UMessagePatternApostropheMode.
    547      * @stable ICU 4.8
    548      */
    549     UMessagePatternApostropheMode getApostropheMode() const {
    550         return aposMode;
    551     }
    552 
    553     // Java has package-private jdkAposMode() here.
    554     // In C++, this is declared in the MessageImpl class.
    555 
    556     /**
    557      * @return the parsed pattern string (null if none was parsed).
    558      * @stable ICU 4.8
    559      */
    560     const UnicodeString &getPatternString() const {
    561         return msg;
    562     }
    563 
    564     /**
    565      * Does the parsed pattern have named arguments like {first_name}?
    566      * @return TRUE if the parsed pattern has at least one named argument.
    567      * @stable ICU 4.8
    568      */
    569     UBool hasNamedArguments() const {
    570         return hasArgNames;
    571     }
    572 
    573     /**
    574      * Does the parsed pattern have numbered arguments like {2}?
    575      * @return TRUE if the parsed pattern has at least one numbered argument.
    576      * @stable ICU 4.8
    577      */
    578     UBool hasNumberedArguments() const {
    579         return hasArgNumbers;
    580     }
    581 
    582     /**
    583      * Validates and parses an argument name or argument number string.
    584      * An argument name must be a "pattern identifier", that is, it must contain
    585      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
    586      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
    587      * @param name Input string.
    588      * @return &gt;=0 if the name is a valid number,
    589      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
    590      *         ARG_NAME_NOT_VALID (-2) if it is neither.
    591      * @stable ICU 4.8
    592      */
    593     static int32_t validateArgumentName(const UnicodeString &name);
    594 
    595     /**
    596      * Returns a version of the parsed pattern string where each ASCII apostrophe
    597      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
    598      * <p>
    599      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
    600      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
    601      * @return the deep-auto-quoted version of the parsed pattern string.
    602      * @see MessageFormat.autoQuoteApostrophe()
    603      * @stable ICU 4.8
    604      */
    605     UnicodeString autoQuoteApostropheDeep() const;
    606 
    607     class Part;
    608 
    609     /**
    610      * Returns the number of "parts" created by parsing the pattern string.
    611      * Returns 0 if no pattern has been parsed or clear() was called.
    612      * @return the number of pattern parts.
    613      * @stable ICU 4.8
    614      */
    615     int32_t countParts() const {
    616         return partsLength;
    617     }
    618 
    619     /**
    620      * Gets the i-th pattern "part".
    621      * @param i The index of the Part data. (0..countParts()-1)
    622      * @return the i-th pattern "part".
    623      * @stable ICU 4.8
    624      */
    625     const Part &getPart(int32_t i) const {
    626         return parts[i];
    627     }
    628 
    629     /**
    630      * Returns the UMessagePatternPartType of the i-th pattern "part".
    631      * Convenience method for getPart(i).getType().
    632      * @param i The index of the Part data. (0..countParts()-1)
    633      * @return The UMessagePatternPartType of the i-th Part.
    634      * @stable ICU 4.8
    635      */
    636     UMessagePatternPartType getPartType(int32_t i) const {
    637         return getPart(i).type;
    638     }
    639 
    640     /**
    641      * Returns the pattern index of the specified pattern "part".
    642      * Convenience method for getPart(partIndex).getIndex().
    643      * @param partIndex The index of the Part data. (0..countParts()-1)
    644      * @return The pattern index of this Part.
    645      * @stable ICU 4.8
    646      */
    647     int32_t getPatternIndex(int32_t partIndex) const {
    648         return getPart(partIndex).index;
    649     }
    650 
    651     /**
    652      * Returns the substring of the pattern string indicated by the Part.
    653      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
    654      * @param part a part of this MessagePattern.
    655      * @return the substring associated with part.
    656      * @stable ICU 4.8
    657      */
    658     UnicodeString getSubstring(const Part &part) const {
    659         return msg.tempSubString(part.index, part.length);
    660     }
    661 
    662     /**
    663      * Compares the part's substring with the input string s.
    664      * @param part a part of this MessagePattern.
    665      * @param s a string.
    666      * @return TRUE if getSubstring(part).equals(s).
    667      * @stable ICU 4.8
    668      */
    669     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
    670         return 0==msg.compare(part.index, part.length, s);
    671     }
    672 
    673     /**
    674      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
    675      * @param part a part of this MessagePattern.
    676      * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
    677      * @stable ICU 4.8
    678      */
    679     double getNumericValue(const Part &part) const;
    680 
    681     /**
    682      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
    683      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
    684      * @return the "offset:" value.
    685      * @stable ICU 4.8
    686      */
    687     double getPluralOffset(int32_t pluralStart) const;
    688 
    689     /**
    690      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
    691      * @param start The index of some Part data (0..countParts()-1);
    692      *        this Part should be of Type ARG_START or MSG_START.
    693      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
    694      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
    695      * @stable ICU 4.8
    696      */
    697     int32_t getLimitPartIndex(int32_t start) const {
    698         int32_t limit=getPart(start).limitPartIndex;
    699         if(limit<start) {
    700             return start;
    701         }
    702         return limit;
    703     }
    704 
    705     /**
    706      * A message pattern "part", representing a pattern parsing event.
    707      * There is a part for the start and end of a message or argument,
    708      * for quoting and escaping of and with ASCII apostrophes,
    709      * and for syntax elements of "complex" arguments.
    710      * @stable ICU 4.8
    711      */
    712     class Part : public UMemory {
    713     public:
    714         /**
    715          * Default constructor, do not use.
    716          * @internal
    717          */
    718         Part() {}
    719 
    720         /**
    721          * Returns the type of this part.
    722          * @return the part type.
    723          * @stable ICU 4.8
    724          */
    725         UMessagePatternPartType getType() const {
    726             return type;
    727         }
    728 
    729         /**
    730          * Returns the pattern string index associated with this Part.
    731          * @return this part's pattern string index.
    732          * @stable ICU 4.8
    733          */
    734         int32_t getIndex() const {
    735             return index;
    736         }
    737 
    738         /**
    739          * Returns the length of the pattern substring associated with this Part.
    740          * This is 0 for some parts.
    741          * @return this part's pattern substring length.
    742          * @stable ICU 4.8
    743          */
    744         int32_t getLength() const {
    745             return length;
    746         }
    747 
    748         /**
    749          * Returns the pattern string limit (exclusive-end) index associated with this Part.
    750          * Convenience method for getIndex()+getLength().
    751          * @return this part's pattern string limit index, same as getIndex()+getLength().
    752          * @stable ICU 4.8
    753          */
    754         int32_t getLimit() const {
    755             return index+length;
    756         }
    757 
    758         /**
    759          * Returns a value associated with this part.
    760          * See the documentation of each part type for details.
    761          * @return the part value.
    762          * @stable ICU 4.8
    763          */
    764         int32_t getValue() const {
    765             return value;
    766         }
    767 
    768         /**
    769          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
    770          * otherwise UMSGPAT_ARG_TYPE_NONE.
    771          * @return the argument type for this part.
    772          * @stable ICU 4.8
    773          */
    774         UMessagePatternArgType getArgType() const {
    775             UMessagePatternPartType type=getType();
    776             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    777                 return (UMessagePatternArgType)value;
    778             } else {
    779                 return UMSGPAT_ARG_TYPE_NONE;
    780             }
    781         }
    782 
    783         /**
    784          * Indicates whether the Part type has a numeric value.
    785          * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
    786          * @param type The Part type to be tested.
    787          * @return TRUE if the Part type has a numeric value.
    788          * @stable ICU 4.8
    789          */
    790         static UBool hasNumericValue(UMessagePatternPartType type) {
    791             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
    792         }
    793 
    794         /**
    795          * @param other another object to compare with.
    796          * @return TRUE if this object is equivalent to the other one.
    797          * @stable ICU 4.8
    798          */
    799         UBool operator==(const Part &other) const;
    800 
    801         /**
    802          * @param other another object to compare with.
    803          * @return FALSE if this object is equivalent to the other one.
    804          * @stable ICU 4.8
    805          */
    806         inline UBool operator!=(const Part &other) const {
    807             return !operator==(other);
    808         }
    809 
    810         /**
    811          * @return A hash code for this object.
    812          * @stable ICU 4.8
    813          */
    814         int32_t hashCode() const {
    815             return ((type*37+index)*37+length)*37+value;
    816         }
    817 
    818     private:
    819         friend class MessagePattern;
    820 
    821         static const int32_t MAX_LENGTH=0xffff;
    822         static const int32_t MAX_VALUE=0x7fff;
    823 
    824         // Some fields are not final because they are modified during pattern parsing.
    825         // After pattern parsing, the parts are effectively immutable.
    826         UMessagePatternPartType type;
    827         int32_t index;
    828         uint16_t length;
    829         int16_t value;
    830         int32_t limitPartIndex;
    831     };
    832 
    833 private:
    834     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
    835 
    836     void postParse();
    837 
    838     int32_t parseMessage(int32_t index, int32_t msgStartLength,
    839                          int32_t nestingLevel, UMessagePatternArgType parentType,
    840                          UParseError *parseError, UErrorCode &errorCode);
    841 
    842     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
    843                      UParseError *parseError, UErrorCode &errorCode);
    844 
    845     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
    846 
    847     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
    848                              UParseError *parseError, UErrorCode &errorCode);
    849 
    850     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
    851                                      UParseError *parseError, UErrorCode &errorCode);
    852 
    853     /**
    854      * Validates and parses an argument name or argument number string.
    855      * This internal method assumes that the input substring is a "pattern identifier".
    856      * @return &gt;=0 if the name is a valid number,
    857      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
    858      *         ARG_NAME_NOT_VALID (-2) if it is neither.
    859      * @see #validateArgumentName(String)
    860      */
    861     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
    862 
    863     int32_t parseArgNumber(int32_t start, int32_t limit) {
    864         return parseArgNumber(msg, start, limit);
    865     }
    866 
    867     /**
    868      * Parses a number from the specified message substring.
    869      * @param start start index into the message string
    870      * @param limit limit index into the message string, must be start<limit
    871      * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
    872      * @param parseError
    873      * @param errorCode
    874      */
    875     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
    876                      UParseError *parseError, UErrorCode &errorCode);
    877 
    878     // Java has package-private appendReducedApostrophes() here.
    879     // In C++, this is declared in the MessageImpl class.
    880 
    881     int32_t skipWhiteSpace(int32_t index);
    882 
    883     int32_t skipIdentifier(int32_t index);
    884 
    885     /**
    886      * Skips a sequence of characters that could occur in a double value.
    887      * Does not fully parse or validate the value.
    888      */
    889     int32_t skipDouble(int32_t index);
    890 
    891     static UBool isArgTypeChar(UChar32 c);
    892 
    893     UBool isChoice(int32_t index);
    894 
    895     UBool isPlural(int32_t index);
    896 
    897     UBool isSelect(int32_t index);
    898 
    899     UBool isOrdinal(int32_t index);
    900 
    901     /**
    902      * @return TRUE if we are inside a MessageFormat (sub-)pattern,
    903      *         as opposed to inside a top-level choice/plural/select pattern.
    904      */
    905     UBool inMessageFormatPattern(int32_t nestingLevel);
    906 
    907     /**
    908      * @return TRUE if we are in a MessageFormat sub-pattern
    909      *         of a top-level ChoiceFormat pattern.
    910      */
    911     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
    912 
    913     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
    914                  int32_t value, UErrorCode &errorCode);
    915 
    916     void addLimitPart(int32_t start,
    917                       UMessagePatternPartType type, int32_t index, int32_t length,
    918                       int32_t value, UErrorCode &errorCode);
    919 
    920     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
    921 
    922     void setParseError(UParseError *parseError, int32_t index);
    923 
    924     UBool init(UErrorCode &errorCode);
    925     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
    926 
    927     UMessagePatternApostropheMode aposMode;
    928     UnicodeString msg;
    929     // ArrayList<Part> parts=new ArrayList<Part>();
    930     MessagePatternPartsList *partsList;
    931     Part *parts;
    932     int32_t partsLength;
    933     // ArrayList<Double> numericValues;
    934     MessagePatternDoubleList *numericValuesList;
    935     double *numericValues;
    936     int32_t numericValuesLength;
    937     UBool hasArgNames;
    938     UBool hasArgNumbers;
    939     UBool needsAutoQuoting;
    940 };
    941 
    942 U_NAMESPACE_END
    943 
    944 #endif  // !UCONFIG_NO_FORMATTING
    945 
    946 #endif  // __MESSAGEPATTERN_H__
    947