Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2011-2013, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  messagepattern.h
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2011mar14
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __MESSAGEPATTERN_H__
     18 #define __MESSAGEPATTERN_H__
     19 
     20 /**
     21  * \file
     22  * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
     23  */
     24 
     25 #include "unicode/utypes.h"
     26 
     27 #if !UCONFIG_NO_FORMATTING
     28 
     29 #include "unicode/parseerr.h"
     30 #include "unicode/unistr.h"
     31 
     32 /**
     33  * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
     34  * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
     35  * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
     36  * <p>
     37  * A pair of adjacent apostrophes always results in a single apostrophe in the output,
     38  * even when the pair is between two single, text-quoting apostrophes.
     39  * <p>
     40  * The following table shows examples of desired MessageFormat.format() output
     41  * with the pattern strings that yield that output.
     42  * <p>
     43  * <table>
     44  *   <tr>
     45  *     <th>Desired output</th>
     46  *     <th>DOUBLE_OPTIONAL</th>
     47  *     <th>DOUBLE_REQUIRED</th>
     48  *   </tr>
     49  *   <tr>
     50  *     <td>I see {many}</td>
     51  *     <td>I see '{many}'</td>
     52  *     <td>(same)</td>
     53  *   </tr>
     54  *   <tr>
     55  *     <td>I said {'Wow!'}</td>
     56  *     <td>I said '{''Wow!''}'</td>
     57  *     <td>(same)</td>
     58  *   </tr>
     59  *   <tr>
     60  *     <td>I don't know</td>
     61  *     <td>I don't know OR<br> I don''t know</td>
     62  *     <td>I don''t know</td>
     63  *   </tr>
     64  * </table>
     65  * @stable ICU 4.8
     66  * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
     67  */
     68 enum UMessagePatternApostropheMode {
     69     /**
     70      * A literal apostrophe is represented by
     71      * either a single or a double apostrophe pattern character.
     72      * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
     73      * if it immediately precedes a curly brace {},
     74      * or a pipe symbol | if inside a choice format,
     75      * or a pound symbol # if inside a plural format.
     76      * <p>
     77      * This is the default behavior starting with ICU 4.8.
     78      * @stable ICU 4.8
     79      */
     80     UMSGPAT_APOS_DOUBLE_OPTIONAL,
     81     /**
     82      * A literal apostrophe must be represented by
     83      * a double apostrophe pattern character.
     84      * A single apostrophe always starts quoted literal text.
     85      * <p>
     86      * This is the behavior of ICU 4.6 and earlier, and of the JDK.
     87      * @stable ICU 4.8
     88      */
     89     UMSGPAT_APOS_DOUBLE_REQUIRED
     90 };
     91 /**
     92  * @stable ICU 4.8
     93  */
     94 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
     95 
     96 /**
     97  * MessagePattern::Part type constants.
     98  * @stable ICU 4.8
     99  */
    100 enum UMessagePatternPartType {
    101     /**
    102      * Start of a message pattern (main or nested).
    103      * The length is 0 for the top-level message
    104      * and for a choice argument sub-message, otherwise 1 for the '{'.
    105      * The value indicates the nesting level, starting with 0 for the main message.
    106      * <p>
    107      * There is always a later MSG_LIMIT part.
    108      * @stable ICU 4.8
    109      */
    110     UMSGPAT_PART_TYPE_MSG_START,
    111     /**
    112      * End of a message pattern (main or nested).
    113      * The length is 0 for the top-level message and
    114      * the last sub-message of a choice argument,
    115      * otherwise 1 for the '}' or (in a choice argument style) the '|'.
    116      * The value indicates the nesting level, starting with 0 for the main message.
    117      * @stable ICU 4.8
    118      */
    119     UMSGPAT_PART_TYPE_MSG_LIMIT,
    120     /**
    121      * Indicates a substring of the pattern string which is to be skipped when formatting.
    122      * For example, an apostrophe that begins or ends quoted text
    123      * would be indicated with such a part.
    124      * The value is undefined and currently always 0.
    125      * @stable ICU 4.8
    126      */
    127     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
    128     /**
    129      * Indicates that a syntax character needs to be inserted for auto-quoting.
    130      * The length is 0.
    131      * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
    132      * @stable ICU 4.8
    133      */
    134     UMSGPAT_PART_TYPE_INSERT_CHAR,
    135     /**
    136      * Indicates a syntactic (non-escaped) # symbol in a plural variant.
    137      * When formatting, replace this part's substring with the
    138      * (value-offset) for the plural argument value.
    139      * The value is undefined and currently always 0.
    140      * @stable ICU 4.8
    141      */
    142     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
    143     /**
    144      * Start of an argument.
    145      * The length is 1 for the '{'.
    146      * The value is the ordinal value of the ArgType. Use getArgType().
    147      * <p>
    148      * This part is followed by either an ARG_NUMBER or ARG_NAME,
    149      * followed by optional argument sub-parts (see UMessagePatternArgType constants)
    150      * and finally an ARG_LIMIT part.
    151      * @stable ICU 4.8
    152      */
    153     UMSGPAT_PART_TYPE_ARG_START,
    154     /**
    155      * End of an argument.
    156      * The length is 1 for the '}'.
    157      * The value is the ordinal value of the ArgType. Use getArgType().
    158      * @stable ICU 4.8
    159      */
    160     UMSGPAT_PART_TYPE_ARG_LIMIT,
    161     /**
    162      * The argument number, provided by the value.
    163      * @stable ICU 4.8
    164      */
    165     UMSGPAT_PART_TYPE_ARG_NUMBER,
    166     /**
    167      * The argument name.
    168      * The value is undefined and currently always 0.
    169      * @stable ICU 4.8
    170      */
    171     UMSGPAT_PART_TYPE_ARG_NAME,
    172     /**
    173      * The argument type.
    174      * The value is undefined and currently always 0.
    175      * @stable ICU 4.8
    176      */
    177     UMSGPAT_PART_TYPE_ARG_TYPE,
    178     /**
    179      * The argument style text.
    180      * The value is undefined and currently always 0.
    181      * @stable ICU 4.8
    182      */
    183     UMSGPAT_PART_TYPE_ARG_STYLE,
    184     /**
    185      * A selector substring in a "complex" argument style.
    186      * The value is undefined and currently always 0.
    187      * @stable ICU 4.8
    188      */
    189     UMSGPAT_PART_TYPE_ARG_SELECTOR,
    190     /**
    191      * An integer value, for example the offset or an explicit selector value
    192      * in a PluralFormat style.
    193      * The part value is the integer value.
    194      * @stable ICU 4.8
    195      */
    196     UMSGPAT_PART_TYPE_ARG_INT,
    197     /**
    198      * A numeric value, for example the offset or an explicit selector value
    199      * in a PluralFormat style.
    200      * The part value is an index into an internal array of numeric values;
    201      * use getNumericValue().
    202      * @stable ICU 4.8
    203      */
    204     UMSGPAT_PART_TYPE_ARG_DOUBLE
    205 };
    206 /**
    207  * @stable ICU 4.8
    208  */
    209 typedef enum UMessagePatternPartType UMessagePatternPartType;
    210 
    211 /**
    212  * Argument type constants.
    213  * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
    214  *
    215  * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
    216  * with a nesting level one greater than the surrounding message.
    217  * @stable ICU 4.8
    218  */
    219 enum UMessagePatternArgType {
    220     /**
    221      * The argument has no specified type.
    222      * @stable ICU 4.8
    223      */
    224     UMSGPAT_ARG_TYPE_NONE,
    225     /**
    226      * The argument has a "simple" type which is provided by the ARG_TYPE part.
    227      * An ARG_STYLE part might follow that.
    228      * @stable ICU 4.8
    229      */
    230     UMSGPAT_ARG_TYPE_SIMPLE,
    231     /**
    232      * The argument is a ChoiceFormat with one or more
    233      * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
    234      * @stable ICU 4.8
    235      */
    236     UMSGPAT_ARG_TYPE_CHOICE,
    237     /**
    238      * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
    239      * (e.g., offset:1)
    240      * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
    241      * If the selector has an explicit value (e.g., =2), then
    242      * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
    243      * Otherwise the message immediately follows the ARG_SELECTOR.
    244      * @stable ICU 4.8
    245      */
    246     UMSGPAT_ARG_TYPE_PLURAL,
    247     /**
    248      * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
    249      * @stable ICU 4.8
    250      */
    251     UMSGPAT_ARG_TYPE_SELECT,
    252     /**
    253      * The argument is an ordinal-number PluralFormat
    254      * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
    255      * @stable ICU 50
    256      */
    257     UMSGPAT_ARG_TYPE_SELECTORDINAL
    258 };
    259 /**
    260  * @stable ICU 4.8
    261  */
    262 typedef enum UMessagePatternArgType UMessagePatternArgType;
    263 
    264 /**
    265  * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
    266  * Returns TRUE if the argument type has a plural style part sequence and semantics,
    267  * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
    268  * @stable ICU 50
    269  */
    270 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
    271     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
    272 
    273 enum {
    274     /**
    275      * Return value from MessagePattern.validateArgumentName() for when
    276      * the string is a valid "pattern identifier" but not a number.
    277      * @stable ICU 4.8
    278      */
    279     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
    280 
    281     /**
    282      * Return value from MessagePattern.validateArgumentName() for when
    283      * the string is invalid.
    284      * It might not be a valid "pattern identifier",
    285      * or it have only ASCII digits but there is a leading zero or the number is too large.
    286      * @stable ICU 4.8
    287      */
    288     UMSGPAT_ARG_NAME_NOT_VALID=-2
    289 };
    290 
    291 /**
    292  * Special value that is returned by getNumericValue(Part) when no
    293  * numeric value is defined for a part.
    294  * @see MessagePattern.getNumericValue()
    295  * @stable ICU 4.8
    296  */
    297 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
    298 
    299 U_NAMESPACE_BEGIN
    300 
    301 class MessagePatternDoubleList;
    302 class MessagePatternPartsList;
    303 
    304 /**
    305  * Parses and represents ICU MessageFormat patterns.
    306  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
    307  * Used in the implementations of those classes as well as in tools
    308  * for message validation, translation and format conversion.
    309  * <p>
    310  * The parser handles all syntax relevant for identifying message arguments.
    311  * This includes "complex" arguments whose style strings contain
    312  * nested MessageFormat pattern substrings.
    313  * For "simple" arguments (with no nested MessageFormat pattern substrings),
    314  * the argument style is not parsed any further.
    315  * <p>
    316  * The parser handles named and numbered message arguments and allows both in one message.
    317  * <p>
    318  * Once a pattern has been parsed successfully, iterate through the parsed data
    319  * with countParts(), getPart() and related methods.
    320  * <p>
    321  * The data logically represents a parse tree, but is stored and accessed
    322  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
    323  * Arguments and nested messages are best handled via recursion.
    324  * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
    325  * the index of the corresponding _LIMIT "part".
    326  * <p>
    327  * List of "parts":
    328  * <pre>
    329  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
    330  * argument = noneArg | simpleArg | complexArg
    331  * complexArg = choiceArg | pluralArg | selectArg
    332  *
    333  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
    334  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
    335  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
    336  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
    337  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
    338  *
    339  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
    340  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
    341  * selectStyle = (ARG_SELECTOR message)+
    342  * </pre>
    343  * <ul>
    344  *   <li>Literal output text is not represented directly by "parts" but accessed
    345  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
    346  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
    347  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
    348  *       the less-than-or-equal-to sign (U+2264).
    349  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
    350  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
    351  *       is the value of an explicit-number selector like "=2",
    352  *       otherwise the selector is a non-numeric identifier.
    353  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
    354  * </ul>
    355  * <p>
    356  * This class is not intended for public subclassing.
    357  *
    358  * @stable ICU 4.8
    359  */
    360 class U_COMMON_API MessagePattern : public UObject {
    361 public:
    362     /**
    363      * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
    364      * @param errorCode Standard ICU error code. Its input value must
    365      *                  pass the U_SUCCESS() test, or else the function returns
    366      *                  immediately. Check for U_FAILURE() on output or use with
    367      *                  function chaining. (See User Guide for details.)
    368      * @stable ICU 4.8
    369      */
    370     MessagePattern(UErrorCode &errorCode);
    371 
    372     /**
    373      * Constructs an empty MessagePattern.
    374      * @param mode Explicit UMessagePatternApostropheMode.
    375      * @param errorCode Standard ICU error code. Its input value must
    376      *                  pass the U_SUCCESS() test, or else the function returns
    377      *                  immediately. Check for U_FAILURE() on output or use with
    378      *                  function chaining. (See User Guide for details.)
    379      * @stable ICU 4.8
    380      */
    381     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
    382 
    383     /**
    384      * Constructs a MessagePattern with default UMessagePatternApostropheMode and
    385      * parses the MessageFormat pattern string.
    386      * @param pattern a MessageFormat pattern string
    387      * @param parseError Struct to receive information on the position
    388      *                   of an error within the pattern.
    389      *                   Can be NULL.
    390      * @param errorCode Standard ICU error code. Its input value must
    391      *                  pass the U_SUCCESS() test, or else the function returns
    392      *                  immediately. Check for U_FAILURE() on output or use with
    393      *                  function chaining. (See User Guide for details.)
    394      * TODO: turn @throws into UErrorCode specifics?
    395      * @throws IllegalArgumentException for syntax errors in the pattern string
    396      * @throws IndexOutOfBoundsException if certain limits are exceeded
    397      *         (e.g., argument number too high, argument name too long, etc.)
    398      * @throws NumberFormatException if a number could not be parsed
    399      * @stable ICU 4.8
    400      */
    401     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
    402 
    403     /**
    404      * Copy constructor.
    405      * @param other Object to copy.
    406      * @stable ICU 4.8
    407      */
    408     MessagePattern(const MessagePattern &other);
    409 
    410     /**
    411      * Assignment operator.
    412      * @param other Object to copy.
    413      * @return *this=other
    414      * @stable ICU 4.8
    415      */
    416     MessagePattern &operator=(const MessagePattern &other);
    417 
    418     /**
    419      * Destructor.
    420      * @stable ICU 4.8
    421      */
    422     virtual ~MessagePattern();
    423 
    424     /**
    425      * Parses a MessageFormat pattern string.
    426      * @param pattern a MessageFormat pattern string
    427      * @param parseError Struct to receive information on the position
    428      *                   of an error within the pattern.
    429      *                   Can be NULL.
    430      * @param errorCode Standard ICU error code. Its input value must
    431      *                  pass the U_SUCCESS() test, or else the function returns
    432      *                  immediately. Check for U_FAILURE() on output or use with
    433      *                  function chaining. (See User Guide for details.)
    434      * @return *this
    435      * @throws IllegalArgumentException for syntax errors in the pattern string
    436      * @throws IndexOutOfBoundsException if certain limits are exceeded
    437      *         (e.g., argument number too high, argument name too long, etc.)
    438      * @throws NumberFormatException if a number could not be parsed
    439      * @stable ICU 4.8
    440      */
    441     MessagePattern &parse(const UnicodeString &pattern,
    442                           UParseError *parseError, UErrorCode &errorCode);
    443 
    444     /**
    445      * Parses a ChoiceFormat pattern string.
    446      * @param pattern a ChoiceFormat pattern string
    447      * @param parseError Struct to receive information on the position
    448      *                   of an error within the pattern.
    449      *                   Can be NULL.
    450      * @param errorCode Standard ICU error code. Its input value must
    451      *                  pass the U_SUCCESS() test, or else the function returns
    452      *                  immediately. Check for U_FAILURE() on output or use with
    453      *                  function chaining. (See User Guide for details.)
    454      * @return *this
    455      * @throws IllegalArgumentException for syntax errors in the pattern string
    456      * @throws IndexOutOfBoundsException if certain limits are exceeded
    457      *         (e.g., argument number too high, argument name too long, etc.)
    458      * @throws NumberFormatException if a number could not be parsed
    459      * @stable ICU 4.8
    460      */
    461     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
    462                                      UParseError *parseError, UErrorCode &errorCode);
    463 
    464     /**
    465      * Parses a PluralFormat pattern string.
    466      * @param pattern a PluralFormat pattern string
    467      * @param parseError Struct to receive information on the position
    468      *                   of an error within the pattern.
    469      *                   Can be NULL.
    470      * @param errorCode Standard ICU error code. Its input value must
    471      *                  pass the U_SUCCESS() test, or else the function returns
    472      *                  immediately. Check for U_FAILURE() on output or use with
    473      *                  function chaining. (See User Guide for details.)
    474      * @return *this
    475      * @throws IllegalArgumentException for syntax errors in the pattern string
    476      * @throws IndexOutOfBoundsException if certain limits are exceeded
    477      *         (e.g., argument number too high, argument name too long, etc.)
    478      * @throws NumberFormatException if a number could not be parsed
    479      * @stable ICU 4.8
    480      */
    481     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
    482                                      UParseError *parseError, UErrorCode &errorCode);
    483 
    484     /**
    485      * Parses a SelectFormat pattern string.
    486      * @param pattern a SelectFormat pattern string
    487      * @param parseError Struct to receive information on the position
    488      *                   of an error within the pattern.
    489      *                   Can be NULL.
    490      * @param errorCode Standard ICU error code. Its input value must
    491      *                  pass the U_SUCCESS() test, or else the function returns
    492      *                  immediately. Check for U_FAILURE() on output or use with
    493      *                  function chaining. (See User Guide for details.)
    494      * @return *this
    495      * @throws IllegalArgumentException for syntax errors in the pattern string
    496      * @throws IndexOutOfBoundsException if certain limits are exceeded
    497      *         (e.g., argument number too high, argument name too long, etc.)
    498      * @throws NumberFormatException if a number could not be parsed
    499      * @stable ICU 4.8
    500      */
    501     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
    502                                      UParseError *parseError, UErrorCode &errorCode);
    503 
    504     /**
    505      * Clears this MessagePattern.
    506      * countParts() will return 0.
    507      * @stable ICU 4.8
    508      */
    509     void clear();
    510 
    511     /**
    512      * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
    513      * countParts() will return 0.
    514      * @param mode The new UMessagePatternApostropheMode.
    515      * @stable ICU 4.8
    516      */
    517     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
    518         clear();
    519         aposMode=mode;
    520     }
    521 
    522     /**
    523      * @param other another object to compare with.
    524      * @return TRUE if this object is equivalent to the other one.
    525      * @stable ICU 4.8
    526      */
    527     UBool operator==(const MessagePattern &other) const;
    528 
    529     /**
    530      * @param other another object to compare with.
    531      * @return FALSE if this object is equivalent to the other one.
    532      * @stable ICU 4.8
    533      */
    534     inline UBool operator!=(const MessagePattern &other) const {
    535         return !operator==(other);
    536     }
    537 
    538     /**
    539      * @return A hash code for this object.
    540      * @stable ICU 4.8
    541      */
    542     int32_t hashCode() const;
    543 
    544     /**
    545      * @return this instance's UMessagePatternApostropheMode.
    546      * @stable ICU 4.8
    547      */
    548     UMessagePatternApostropheMode getApostropheMode() const {
    549         return aposMode;
    550     }
    551 
    552     // Java has package-private jdkAposMode() here.
    553     // In C++, this is declared in the MessageImpl class.
    554 
    555     /**
    556      * @return the parsed pattern string (null if none was parsed).
    557      * @stable ICU 4.8
    558      */
    559     const UnicodeString &getPatternString() const {
    560         return msg;
    561     }
    562 
    563     /**
    564      * Does the parsed pattern have named arguments like {first_name}?
    565      * @return TRUE if the parsed pattern has at least one named argument.
    566      * @stable ICU 4.8
    567      */
    568     UBool hasNamedArguments() const {
    569         return hasArgNames;
    570     }
    571 
    572     /**
    573      * Does the parsed pattern have numbered arguments like {2}?
    574      * @return TRUE if the parsed pattern has at least one numbered argument.
    575      * @stable ICU 4.8
    576      */
    577     UBool hasNumberedArguments() const {
    578         return hasArgNumbers;
    579     }
    580 
    581     /**
    582      * Validates and parses an argument name or argument number string.
    583      * An argument name must be a "pattern identifier", that is, it must contain
    584      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
    585      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
    586      * @param name Input string.
    587      * @return &gt;=0 if the name is a valid number,
    588      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
    589      *         ARG_NAME_NOT_VALID (-2) if it is neither.
    590      * @stable ICU 4.8
    591      */
    592     static int32_t validateArgumentName(const UnicodeString &name);
    593 
    594     /**
    595      * Returns a version of the parsed pattern string where each ASCII apostrophe
    596      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
    597      * <p>
    598      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
    599      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
    600      * @return the deep-auto-quoted version of the parsed pattern string.
    601      * @see MessageFormat.autoQuoteApostrophe()
    602      * @stable ICU 4.8
    603      */
    604     UnicodeString autoQuoteApostropheDeep() const;
    605 
    606     class Part;
    607 
    608     /**
    609      * Returns the number of "parts" created by parsing the pattern string.
    610      * Returns 0 if no pattern has been parsed or clear() was called.
    611      * @return the number of pattern parts.
    612      * @stable ICU 4.8
    613      */
    614     int32_t countParts() const {
    615         return partsLength;
    616     }
    617 
    618     /**
    619      * Gets the i-th pattern "part".
    620      * @param i The index of the Part data. (0..countParts()-1)
    621      * @return the i-th pattern "part".
    622      * @stable ICU 4.8
    623      */
    624     const Part &getPart(int32_t i) const {
    625         return parts[i];
    626     }
    627 
    628     /**
    629      * Returns the UMessagePatternPartType of the i-th pattern "part".
    630      * Convenience method for getPart(i).getType().
    631      * @param i The index of the Part data. (0..countParts()-1)
    632      * @return The UMessagePatternPartType of the i-th Part.
    633      * @stable ICU 4.8
    634      */
    635     UMessagePatternPartType getPartType(int32_t i) const {
    636         return getPart(i).type;
    637     }
    638 
    639     /**
    640      * Returns the pattern index of the specified pattern "part".
    641      * Convenience method for getPart(partIndex).getIndex().
    642      * @param partIndex The index of the Part data. (0..countParts()-1)
    643      * @return The pattern index of this Part.
    644      * @stable ICU 4.8
    645      */
    646     int32_t getPatternIndex(int32_t partIndex) const {
    647         return getPart(partIndex).index;
    648     }
    649 
    650     /**
    651      * Returns the substring of the pattern string indicated by the Part.
    652      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
    653      * @param part a part of this MessagePattern.
    654      * @return the substring associated with part.
    655      * @stable ICU 4.8
    656      */
    657     UnicodeString getSubstring(const Part &part) const {
    658         return msg.tempSubString(part.index, part.length);
    659     }
    660 
    661     /**
    662      * Compares the part's substring with the input string s.
    663      * @param part a part of this MessagePattern.
    664      * @param s a string.
    665      * @return TRUE if getSubstring(part).equals(s).
    666      * @stable ICU 4.8
    667      */
    668     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
    669         return 0==msg.compare(part.index, part.length, s);
    670     }
    671 
    672     /**
    673      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
    674      * @param part a part of this MessagePattern.
    675      * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
    676      * @stable ICU 4.8
    677      */
    678     double getNumericValue(const Part &part) const;
    679 
    680     /**
    681      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
    682      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
    683      * @return the "offset:" value.
    684      * @stable ICU 4.8
    685      */
    686     double getPluralOffset(int32_t pluralStart) const;
    687 
    688     /**
    689      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
    690      * @param start The index of some Part data (0..countParts()-1);
    691      *        this Part should be of Type ARG_START or MSG_START.
    692      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
    693      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
    694      * @stable ICU 4.8
    695      */
    696     int32_t getLimitPartIndex(int32_t start) const {
    697         int32_t limit=getPart(start).limitPartIndex;
    698         if(limit<start) {
    699             return start;
    700         }
    701         return limit;
    702     }
    703 
    704     /**
    705      * A message pattern "part", representing a pattern parsing event.
    706      * There is a part for the start and end of a message or argument,
    707      * for quoting and escaping of and with ASCII apostrophes,
    708      * and for syntax elements of "complex" arguments.
    709      * @stable ICU 4.8
    710      */
    711     class Part : public UMemory {
    712     public:
    713         /**
    714          * Default constructor, do not use.
    715          * @internal
    716          */
    717         Part() {}
    718 
    719         /**
    720          * Returns the type of this part.
    721          * @return the part type.
    722          * @stable ICU 4.8
    723          */
    724         UMessagePatternPartType getType() const {
    725             return type;
    726         }
    727 
    728         /**
    729          * Returns the pattern string index associated with this Part.
    730          * @return this part's pattern string index.
    731          * @stable ICU 4.8
    732          */
    733         int32_t getIndex() const {
    734             return index;
    735         }
    736 
    737         /**
    738          * Returns the length of the pattern substring associated with this Part.
    739          * This is 0 for some parts.
    740          * @return this part's pattern substring length.
    741          * @stable ICU 4.8
    742          */
    743         int32_t getLength() const {
    744             return length;
    745         }
    746 
    747         /**
    748          * Returns the pattern string limit (exclusive-end) index associated with this Part.
    749          * Convenience method for getIndex()+getLength().
    750          * @return this part's pattern string limit index, same as getIndex()+getLength().
    751          * @stable ICU 4.8
    752          */
    753         int32_t getLimit() const {
    754             return index+length;
    755         }
    756 
    757         /**
    758          * Returns a value associated with this part.
    759          * See the documentation of each part type for details.
    760          * @return the part value.
    761          * @stable ICU 4.8
    762          */
    763         int32_t getValue() const {
    764             return value;
    765         }
    766 
    767         /**
    768          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
    769          * otherwise UMSGPAT_ARG_TYPE_NONE.
    770          * @return the argument type for this part.
    771          * @stable ICU 4.8
    772          */
    773         UMessagePatternArgType getArgType() const {
    774             UMessagePatternPartType type=getType();
    775             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    776                 return (UMessagePatternArgType)value;
    777             } else {
    778                 return UMSGPAT_ARG_TYPE_NONE;
    779             }
    780         }
    781 
    782         /**
    783          * Indicates whether the Part type has a numeric value.
    784          * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
    785          * @param type The Part type to be tested.
    786          * @return TRUE if the Part type has a numeric value.
    787          * @stable ICU 4.8
    788          */
    789         static UBool hasNumericValue(UMessagePatternPartType type) {
    790             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
    791         }
    792 
    793         /**
    794          * @param other another object to compare with.
    795          * @return TRUE if this object is equivalent to the other one.
    796          * @stable ICU 4.8
    797          */
    798         UBool operator==(const Part &other) const;
    799 
    800         /**
    801          * @param other another object to compare with.
    802          * @return FALSE if this object is equivalent to the other one.
    803          * @stable ICU 4.8
    804          */
    805         inline UBool operator!=(const Part &other) const {
    806             return !operator==(other);
    807         }
    808 
    809         /**
    810          * @return A hash code for this object.
    811          * @stable ICU 4.8
    812          */
    813         int32_t hashCode() const {
    814             return ((type*37+index)*37+length)*37+value;
    815         }
    816 
    817     private:
    818         friend class MessagePattern;
    819 
    820         static const int32_t MAX_LENGTH=0xffff;
    821         static const int32_t MAX_VALUE=0x7fff;
    822 
    823         // Some fields are not final because they are modified during pattern parsing.
    824         // After pattern parsing, the parts are effectively immutable.
    825         UMessagePatternPartType type;
    826         int32_t index;
    827         uint16_t length;
    828         int16_t value;
    829         int32_t limitPartIndex;
    830     };
    831 
    832 private:
    833     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
    834 
    835     void postParse();
    836 
    837     int32_t parseMessage(int32_t index, int32_t msgStartLength,
    838                          int32_t nestingLevel, UMessagePatternArgType parentType,
    839                          UParseError *parseError, UErrorCode &errorCode);
    840 
    841     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
    842                      UParseError *parseError, UErrorCode &errorCode);
    843 
    844     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
    845 
    846     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
    847                              UParseError *parseError, UErrorCode &errorCode);
    848 
    849     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
    850                                      UParseError *parseError, UErrorCode &errorCode);
    851 
    852     /**
    853      * Validates and parses an argument name or argument number string.
    854      * This internal method assumes that the input substring is a "pattern identifier".
    855      * @return &gt;=0 if the name is a valid number,
    856      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
    857      *         ARG_NAME_NOT_VALID (-2) if it is neither.
    858      * @see #validateArgumentName(String)
    859      */
    860     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
    861 
    862     int32_t parseArgNumber(int32_t start, int32_t limit) {
    863         return parseArgNumber(msg, start, limit);
    864     }
    865 
    866     /**
    867      * Parses a number from the specified message substring.
    868      * @param start start index into the message string
    869      * @param limit limit index into the message string, must be start<limit
    870      * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
    871      * @param parseError
    872      * @param errorCode
    873      */
    874     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
    875                      UParseError *parseError, UErrorCode &errorCode);
    876 
    877     // Java has package-private appendReducedApostrophes() here.
    878     // In C++, this is declared in the MessageImpl class.
    879 
    880     int32_t skipWhiteSpace(int32_t index);
    881 
    882     int32_t skipIdentifier(int32_t index);
    883 
    884     /**
    885      * Skips a sequence of characters that could occur in a double value.
    886      * Does not fully parse or validate the value.
    887      */
    888     int32_t skipDouble(int32_t index);
    889 
    890     static UBool isArgTypeChar(UChar32 c);
    891 
    892     UBool isChoice(int32_t index);
    893 
    894     UBool isPlural(int32_t index);
    895 
    896     UBool isSelect(int32_t index);
    897 
    898     UBool isOrdinal(int32_t index);
    899 
    900     /**
    901      * @return TRUE if we are inside a MessageFormat (sub-)pattern,
    902      *         as opposed to inside a top-level choice/plural/select pattern.
    903      */
    904     UBool inMessageFormatPattern(int32_t nestingLevel);
    905 
    906     /**
    907      * @return TRUE if we are in a MessageFormat sub-pattern
    908      *         of a top-level ChoiceFormat pattern.
    909      */
    910     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
    911 
    912     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
    913                  int32_t value, UErrorCode &errorCode);
    914 
    915     void addLimitPart(int32_t start,
    916                       UMessagePatternPartType type, int32_t index, int32_t length,
    917                       int32_t value, UErrorCode &errorCode);
    918 
    919     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
    920 
    921     void setParseError(UParseError *parseError, int32_t index);
    922 
    923     UBool init(UErrorCode &errorCode);
    924     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
    925 
    926     UMessagePatternApostropheMode aposMode;
    927     UnicodeString msg;
    928     // ArrayList<Part> parts=new ArrayList<Part>();
    929     MessagePatternPartsList *partsList;
    930     Part *parts;
    931     int32_t partsLength;
    932     // ArrayList<Double> numericValues;
    933     MessagePatternDoubleList *numericValuesList;
    934     double *numericValues;
    935     int32_t numericValuesLength;
    936     UBool hasArgNames;
    937     UBool hasArgNumbers;
    938     UBool needsAutoQuoting;
    939 };
    940 
    941 U_NAMESPACE_END
    942 
    943 #endif  // !UCONFIG_NO_FORMATTING
    944 
    945 #endif  // __MESSAGEPATTERN_H__
    946