Home | History | Annotate | Download | only in i18n
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************
      6  *
      7  * File MSGFMT.CPP
      8  *
      9  * Modification History:
     10  *
     11  *   Date        Name        Description
     12  *   02/19/97    aliu        Converted from java.
     13  *   03/20/97    helena      Finished first cut of implementation.
     14  *   04/10/97    aliu        Made to work on AIX.  Added stoi to replace wtoi.
     15  *   06/11/97    helena      Fixed addPattern to take the pattern correctly.
     16  *   06/17/97    helena      Fixed the getPattern to return the correct pattern.
     17  *   07/09/97    helena      Made ParsePosition into a class.
     18  *   02/22/99    stephen     Removed character literals for EBCDIC safety
     19  *   11/01/09    kirtig      Added SelectFormat
     20  ********************************************************************/
     21 
     22 #include "unicode/utypes.h"
     23 
     24 #if !UCONFIG_NO_FORMATTING
     25 
     26 #include "unicode/msgfmt.h"
     27 #include "unicode/decimfmt.h"
     28 #include "unicode/datefmt.h"
     29 #include "unicode/smpdtfmt.h"
     30 #include "unicode/choicfmt.h"
     31 #include "unicode/plurfmt.h"
     32 #include "unicode/selfmt.h"
     33 #include "unicode/ustring.h"
     34 #include "unicode/ucnv_err.h"
     35 #include "unicode/uchar.h"
     36 #include "unicode/umsg.h"
     37 #include "unicode/rbnf.h"
     38 #include "cmemory.h"
     39 #include "msgfmt_impl.h"
     40 #include "../common/util.h"
     41 #include "uassert.h"
     42 #include "ustrfmt.h"
     43 #include "uvector.h"
     44 
     45 //Todo:remove stdio
     46 #include "stdio.h"
     47 
     48 
     49 // *****************************************************************************
     50 // class MessageFormat
     51 // *****************************************************************************
     52 
     53 #define COMMA             ((UChar)0x002C)
     54 #define SINGLE_QUOTE      ((UChar)0x0027)
     55 #define LEFT_CURLY_BRACE  ((UChar)0x007B)
     56 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
     57 
     58 //---------------------------------------
     59 // static data
     60 
     61 static const UChar ID_EMPTY[]     = {
     62     0 /* empty string, used for default so that null can mark end of list */
     63 };
     64 
     65 static const UChar ID_NUMBER[]    = {
     66     0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0  /* "number" */
     67 };
     68 static const UChar ID_DATE[]      = {
     69     0x64, 0x61, 0x74, 0x65, 0              /* "date" */
     70 };
     71 static const UChar ID_TIME[]      = {
     72     0x74, 0x69, 0x6D, 0x65, 0              /* "time" */
     73 };
     74 static const UChar ID_CHOICE[]    = {
     75     0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0  /* "choice" */
     76 };
     77 static const UChar ID_SPELLOUT[]  = {
     78     0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
     79 };
     80 static const UChar ID_ORDINAL[]   = {
     81     0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
     82 };
     83 static const UChar ID_DURATION[]  = {
     84     0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
     85 };
     86 static const UChar ID_PLURAL[]  = {
     87     0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0  /* "plural" */
     88 };
     89 static const UChar ID_SELECT[]  = {
     90     0x73, 0x65, 0x6C, 0x65, 0x63, 0x74, 0  /* "select" */
     91 };
     92 
     93 // MessageFormat Type List  Number, Date, Time or Choice
     94 static const UChar * const TYPE_IDS[] = {
     95     ID_EMPTY,
     96     ID_NUMBER,
     97     ID_DATE,
     98     ID_TIME,
     99     ID_CHOICE,
    100     ID_SPELLOUT,
    101     ID_ORDINAL,
    102     ID_DURATION,
    103     ID_PLURAL,
    104     ID_SELECT,
    105     NULL,
    106 };
    107 
    108 static const UChar ID_CURRENCY[]  = {
    109     0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0  /* "currency" */
    110 };
    111 static const UChar ID_PERCENT[]   = {
    112     0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0        /* "percent" */
    113 };
    114 static const UChar ID_INTEGER[]   = {
    115     0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0        /* "integer" */
    116 };
    117 
    118 // NumberFormat modifier list, default, currency, percent or integer
    119 static const UChar * const NUMBER_STYLE_IDS[] = {
    120     ID_EMPTY,
    121     ID_CURRENCY,
    122     ID_PERCENT,
    123     ID_INTEGER,
    124     NULL,
    125 };
    126 
    127 static const UChar ID_SHORT[]     = {
    128     0x73, 0x68, 0x6F, 0x72, 0x74, 0        /* "short" */
    129 };
    130 static const UChar ID_MEDIUM[]    = {
    131     0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0  /* "medium" */
    132 };
    133 static const UChar ID_LONG[]      = {
    134     0x6C, 0x6F, 0x6E, 0x67, 0              /* "long" */
    135 };
    136 static const UChar ID_FULL[]      = {
    137     0x66, 0x75, 0x6C, 0x6C, 0              /* "full" */
    138 };
    139 
    140 // DateFormat modifier list, default, short, medium, long or full
    141 static const UChar * const DATE_STYLE_IDS[] = {
    142     ID_EMPTY,
    143     ID_SHORT,
    144     ID_MEDIUM,
    145     ID_LONG,
    146     ID_FULL,
    147     NULL,
    148 };
    149 
    150 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = {
    151     U_NAMESPACE_QUALIFIER DateFormat::kDefault,
    152     U_NAMESPACE_QUALIFIER DateFormat::kShort,
    153     U_NAMESPACE_QUALIFIER DateFormat::kMedium,
    154     U_NAMESPACE_QUALIFIER DateFormat::kLong,
    155     U_NAMESPACE_QUALIFIER DateFormat::kFull,
    156 };
    157 
    158 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
    159 
    160 U_NAMESPACE_BEGIN
    161 
    162 // -------------------------------------
    163 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
    164 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
    165 
    166 //--------------------------------------------------------------------
    167 
    168 /**
    169  * Convert a string to an unsigned decimal, ignoring rule whitespace.
    170  * @return a non-negative number if successful, or a negative number
    171  *         upon failure.
    172  */
    173 static int32_t stou(const UnicodeString& string) {
    174     int32_t n = 0;
    175     int32_t count = 0;
    176     UChar32 c;
    177     for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) {
    178         c = string.char32At(i);
    179         if (uprv_isRuleWhiteSpace(c)) {
    180             continue;
    181         }
    182         int32_t d = u_digit(c, 10);
    183         if (d < 0 || ++count > 10) {
    184             return -1;
    185         }
    186         n = 10*n + d;
    187     }
    188     return n;
    189 }
    190 
    191 /**
    192  * Convert an integer value to a string and append the result to
    193  * the given UnicodeString.
    194  */
    195 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
    196     UChar temp[16];
    197     uprv_itou(temp,16,i,10,0); // 10 == radix
    198     appendTo.append(temp);
    199     return appendTo;
    200 }
    201 
    202 /*
    203  * A structure representing one subformat of this MessageFormat.
    204  * Each subformat has a Format object, an offset into the plain
    205  * pattern text fPattern, and an argument number.  The argument
    206  * number corresponds to the array of arguments to be formatted.
    207  * @internal
    208  */
    209 class MessageFormat::Subformat : public UMemory {
    210 public:
    211     /**
    212      * @internal
    213      */
    214     Format* format; // formatter
    215     /**
    216      * @internal
    217      */
    218     int32_t offset; // offset into fPattern
    219     /**
    220      * @internal
    221      */
    222     // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number
    223     int32_t argNum;    // 0-based argument number
    224     /**
    225      * @internal
    226      */
    227     UnicodeString* argName; // argument name or number
    228 
    229     /**
    230      * Clone that.format and assign it to this.format
    231      * Do NOT delete this.format
    232      * @internal
    233      */
    234     Subformat& operator=(const Subformat& that) {
    235         if (this != &that) {
    236             format = that.format ? that.format->clone() : NULL;
    237             offset = that.offset;
    238             argNum = that.argNum;
    239             argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL;
    240         }
    241         return *this;
    242     }
    243 
    244     /**
    245      * @internal
    246      */
    247     UBool operator==(const Subformat& that) const {
    248         // Do cheap comparisons first
    249         return offset == that.offset &&
    250                argNum == that.argNum &&
    251                ((argName == that.argName) ||
    252                 (*argName == *that.argName)) &&
    253                ((format == that.format) || // handles NULL
    254                 (*format == *that.format));
    255     }
    256 
    257     /**
    258      * @internal
    259      */
    260     UBool operator!=(const Subformat& that) const {
    261         return !operator==(that);
    262     }
    263 };
    264 
    265 // -------------------------------------
    266 // Creates a MessageFormat instance based on the pattern.
    267 
    268 MessageFormat::MessageFormat(const UnicodeString& pattern,
    269                              UErrorCode& success)
    270 : fLocale(Locale::getDefault()),  // Uses the default locale
    271   formatAliases(NULL),
    272   formatAliasesCapacity(0),
    273   idStart(UCHAR_ID_START),
    274   idContinue(UCHAR_ID_CONTINUE),
    275   subformats(NULL),
    276   subformatCount(0),
    277   subformatCapacity(0),
    278   argTypes(NULL),
    279   argTypeCount(0),
    280   argTypeCapacity(0),
    281   isArgNumeric(TRUE),
    282   defaultNumberFormat(NULL),
    283   defaultDateFormat(NULL)
    284 {
    285     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
    286         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
    287         success = U_MEMORY_ALLOCATION_ERROR;
    288         return;
    289     }
    290     applyPattern(pattern, success);
    291     setLocaleIDs(fLocale.getName(), fLocale.getName());
    292 }
    293 
    294 MessageFormat::MessageFormat(const UnicodeString& pattern,
    295                              const Locale& newLocale,
    296                              UErrorCode& success)
    297 : fLocale(newLocale),
    298   formatAliases(NULL),
    299   formatAliasesCapacity(0),
    300   idStart(UCHAR_ID_START),
    301   idContinue(UCHAR_ID_CONTINUE),
    302   subformats(NULL),
    303   subformatCount(0),
    304   subformatCapacity(0),
    305   argTypes(NULL),
    306   argTypeCount(0),
    307   argTypeCapacity(0),
    308   isArgNumeric(TRUE),
    309   defaultNumberFormat(NULL),
    310   defaultDateFormat(NULL)
    311 {
    312     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
    313         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
    314         success = U_MEMORY_ALLOCATION_ERROR;
    315         return;
    316     }
    317     applyPattern(pattern, success);
    318     setLocaleIDs(fLocale.getName(), fLocale.getName());
    319 }
    320 
    321 MessageFormat::MessageFormat(const UnicodeString& pattern,
    322                              const Locale& newLocale,
    323                              UParseError& parseError,
    324                              UErrorCode& success)
    325 : fLocale(newLocale),
    326   formatAliases(NULL),
    327   formatAliasesCapacity(0),
    328   idStart(UCHAR_ID_START),
    329   idContinue(UCHAR_ID_CONTINUE),
    330   subformats(NULL),
    331   subformatCount(0),
    332   subformatCapacity(0),
    333   argTypes(NULL),
    334   argTypeCount(0),
    335   argTypeCapacity(0),
    336   isArgNumeric(TRUE),
    337   defaultNumberFormat(NULL),
    338   defaultDateFormat(NULL)
    339 {
    340     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
    341         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
    342         success = U_MEMORY_ALLOCATION_ERROR;
    343         return;
    344     }
    345     applyPattern(pattern, parseError, success);
    346     setLocaleIDs(fLocale.getName(), fLocale.getName());
    347 }
    348 
    349 MessageFormat::MessageFormat(const MessageFormat& that)
    350 : Format(that),
    351   formatAliases(NULL),
    352   formatAliasesCapacity(0),
    353   idStart(UCHAR_ID_START),
    354   idContinue(UCHAR_ID_CONTINUE),
    355   subformats(NULL),
    356   subformatCount(0),
    357   subformatCapacity(0),
    358   argTypes(NULL),
    359   argTypeCount(0),
    360   argTypeCapacity(0),
    361   isArgNumeric(TRUE),
    362   defaultNumberFormat(NULL),
    363   defaultDateFormat(NULL)
    364 {
    365     *this = that;
    366 }
    367 
    368 MessageFormat::~MessageFormat()
    369 {
    370     int32_t idx;
    371     for (idx = 0; idx < subformatCount; idx++) {
    372         delete subformats[idx].format;
    373         delete subformats[idx].argName;
    374     }
    375     uprv_free(subformats);
    376     subformats = NULL;
    377     subformatCount = subformatCapacity = 0;
    378 
    379     uprv_free(argTypes);
    380     argTypes = NULL;
    381     argTypeCount = argTypeCapacity = 0;
    382 
    383     uprv_free(formatAliases);
    384 
    385     delete defaultNumberFormat;
    386     delete defaultDateFormat;
    387 }
    388 
    389 //--------------------------------------------------------------------
    390 // Variable-size array management
    391 
    392 /**
    393  * Allocate subformats[] to at least the given capacity and return
    394  * TRUE if successful.  If not, leave subformats[] unchanged.
    395  *
    396  * If subformats is NULL, allocate it.  If it is not NULL, enlarge it
    397  * if necessary to be at least as large as specified.
    398  */
    399 UBool MessageFormat::allocateSubformats(int32_t capacity) {
    400     if (subformats == NULL) {
    401         subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity);
    402         subformatCapacity = capacity;
    403         subformatCount = 0;
    404         if (subformats == NULL) {
    405             subformatCapacity = 0;
    406             return FALSE;
    407         }
    408     } else if (subformatCapacity < capacity) {
    409         if (capacity < 2*subformatCapacity) {
    410             capacity = 2*subformatCapacity;
    411         }
    412         Subformat* a = (Subformat*)
    413             uprv_realloc(subformats, sizeof(*subformats) * capacity);
    414         if (a == NULL) {
    415             return FALSE; // request failed
    416         }
    417         subformats = a;
    418         subformatCapacity = capacity;
    419     }
    420     return TRUE;
    421 }
    422 
    423 /**
    424  * Allocate argTypes[] to at least the given capacity and return
    425  * TRUE if successful.  If not, leave argTypes[] unchanged.
    426  *
    427  * If argTypes is NULL, allocate it.  If it is not NULL, enlarge it
    428  * if necessary to be at least as large as specified.
    429  */
    430 UBool MessageFormat::allocateArgTypes(int32_t capacity) {
    431     if (argTypes == NULL) {
    432         argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity);
    433         argTypeCount = 0;
    434         argTypeCapacity = capacity;
    435         if (argTypes == NULL) {
    436             argTypeCapacity = 0;
    437             return FALSE;
    438         }
    439         for (int32_t i=0; i<capacity; ++i) {
    440             argTypes[i] = Formattable::kString;
    441         }
    442     } else if (argTypeCapacity < capacity) {
    443         if (capacity < 2*argTypeCapacity) {
    444             capacity = 2*argTypeCapacity;
    445         }
    446         Formattable::Type* a = (Formattable::Type*)
    447             uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
    448         if (a == NULL) {
    449             return FALSE; // request failed
    450         }
    451         for (int32_t i=argTypeCapacity; i<capacity; ++i) {
    452             a[i] = Formattable::kString;
    453         }
    454         argTypes = a;
    455         argTypeCapacity = capacity;
    456     }
    457     return TRUE;
    458 }
    459 
    460 // -------------------------------------
    461 // assignment operator
    462 
    463 const MessageFormat&
    464 MessageFormat::operator=(const MessageFormat& that)
    465 {
    466     // Reallocate the arrays BEFORE changing this object
    467     if (this != &that &&
    468         allocateSubformats(that.subformatCount) &&
    469         allocateArgTypes(that.argTypeCount)) {
    470 
    471         // Calls the super class for assignment first.
    472         Format::operator=(that);
    473 
    474         fPattern = that.fPattern;
    475         setLocale(that.fLocale);
    476         isArgNumeric = that.isArgNumeric;
    477         int32_t j;
    478         for (j=0; j<subformatCount; ++j) {
    479             delete subformats[j].format;
    480         }
    481         subformatCount = 0;
    482 
    483         for (j=0; j<that.subformatCount; ++j) {
    484             // Subformat::operator= does NOT delete this.format
    485             subformats[j] = that.subformats[j];
    486         }
    487         subformatCount = that.subformatCount;
    488 
    489         for (j=0; j<that.argTypeCount; ++j) {
    490             argTypes[j] = that.argTypes[j];
    491         }
    492         argTypeCount = that.argTypeCount;
    493     }
    494     return *this;
    495 }
    496 
    497 UBool
    498 MessageFormat::operator==(const Format& rhs) const
    499 {
    500     if (this == &rhs) return TRUE;
    501 
    502     MessageFormat& that = (MessageFormat&)rhs;
    503 
    504     // Check class ID before checking MessageFormat members
    505     if (!Format::operator==(rhs) ||
    506         fPattern != that.fPattern ||
    507         fLocale != that.fLocale ||
    508         isArgNumeric != that.isArgNumeric) {
    509         return FALSE;
    510     }
    511 
    512     int32_t j;
    513     for (j=0; j<subformatCount; ++j) {
    514         if (subformats[j] != that.subformats[j]) {
    515             return FALSE;
    516         }
    517     }
    518 
    519     return TRUE;
    520 }
    521 
    522 // -------------------------------------
    523 // Creates a copy of this MessageFormat, the caller owns the copy.
    524 
    525 Format*
    526 MessageFormat::clone() const
    527 {
    528     return new MessageFormat(*this);
    529 }
    530 
    531 // -------------------------------------
    532 // Sets the locale of this MessageFormat object to theLocale.
    533 
    534 void
    535 MessageFormat::setLocale(const Locale& theLocale)
    536 {
    537     if (fLocale != theLocale) {
    538         delete defaultNumberFormat;
    539         defaultNumberFormat = NULL;
    540         delete defaultDateFormat;
    541         defaultDateFormat = NULL;
    542     }
    543     fLocale = theLocale;
    544     setLocaleIDs(fLocale.getName(), fLocale.getName());
    545 }
    546 
    547 // -------------------------------------
    548 // Gets the locale of this MessageFormat object.
    549 
    550 const Locale&
    551 MessageFormat::getLocale() const
    552 {
    553     return fLocale;
    554 }
    555 
    556 
    557 
    558 
    559 void
    560 MessageFormat::applyPattern(const UnicodeString& newPattern,
    561                             UErrorCode& status)
    562 {
    563     UParseError parseError;
    564     applyPattern(newPattern,parseError,status);
    565 }
    566 
    567 
    568 // -------------------------------------
    569 // Applies the new pattern and returns an error if the pattern
    570 // is not correct.
    571 void
    572 MessageFormat::applyPattern(const UnicodeString& pattern,
    573                             UParseError& parseError,
    574                             UErrorCode& ec)
    575 {
    576     if(U_FAILURE(ec)) {
    577         return;
    578     }
    579     // The pattern is broken up into segments.  Each time a subformat
    580     // is encountered, 4 segments are recorded.  For example, consider
    581     // the pattern:
    582     //  "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
    583     // The first set of segments is:
    584     //  segments[0] = "There "
    585     //  segments[1] = "0"
    586     //  segments[2] = "choice"
    587     //  segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
    588 
    589     // During parsing, the plain text is accumulated into segments[0].
    590     // Segments 1..3 are used to parse each subpattern.  Each time a
    591     // subpattern is parsed, it creates a format object that is stored
    592     // in the subformats array, together with an offset and argument
    593     // number.  The offset into the plain text stored in
    594     // segments[0].
    595 
    596     // Quotes in segment 0 are handled normally.  They are removed.
    597     // Quotes may not occur in segments 1 or 2.
    598     // Quotes in segment 3 are parsed and _copied_.  This makes
    599     //  subformat patterns work, e.g., {1,number,'#'.##} passes
    600     //  the pattern "'#'.##" to DecimalFormat.
    601 
    602     UnicodeString segments[4];
    603     int32_t part = 0; // segment we are in, 0..3
    604     // Record the highest argument number in the pattern.  (In the
    605     // subpattern {3,number} the argument number is 3.)
    606     int32_t formatNumber = 0;
    607     UBool inQuote = FALSE;
    608     int32_t braceStack = 0;
    609     // Clear error struct
    610     parseError.offset = -1;
    611     parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
    612     int32_t patLen = pattern.length();
    613     int32_t i;
    614 
    615     for (i=0; i<subformatCount; ++i) {
    616         delete subformats[i].format;
    617     }
    618     subformatCount = 0;
    619     argTypeCount = 0;
    620 
    621     for (i=0; i<patLen; ++i) {
    622         UChar ch = pattern[i];
    623         if (part == 0) {
    624             // In segment 0, recognize and remove quotes
    625             if (ch == SINGLE_QUOTE) {
    626                 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) {
    627                     segments[0] += ch;
    628                     ++i;
    629                 } else {
    630                     inQuote = !inQuote;
    631                 }
    632             } else if (ch == LEFT_CURLY_BRACE && !inQuote) {
    633                 // The only way we get from segment 0 to 1 is via an
    634                 // unquoted '{'.
    635                 part = 1;
    636             } else {
    637                 segments[0] += ch;
    638             }
    639         } else if (inQuote) {
    640             // In segments 1..3, recognize quoted matter, and copy it
    641             // into the segment, together with the quotes.  This takes
    642             // care of '' as well.
    643             segments[part] += ch;
    644             if (ch == SINGLE_QUOTE) {
    645                 inQuote = FALSE;
    646             }
    647         } else {
    648             // We have an unquoted character in segment 1..3
    649             switch (ch) {
    650             case COMMA:
    651                 // Commas bump us to the next segment, except for segment 3,
    652                 // which can contain commas.  See example above.
    653                 if (part < 3)
    654                     part += 1;
    655                 else
    656                     segments[3] += ch;
    657                 break;
    658             case LEFT_CURLY_BRACE:
    659                 // Handle '{' within segment 3.  The initial '{'
    660                 // before segment 1 is handled above.
    661                 if (part != 3) {
    662                     ec = U_PATTERN_SYNTAX_ERROR;
    663                     goto SYNTAX_ERROR;
    664                 }
    665                 ++braceStack;
    666                 segments[part] += ch;
    667                 break;
    668             case RIGHT_CURLY_BRACE:
    669                 if (braceStack == 0) {
    670                     makeFormat(formatNumber, segments, parseError,ec);
    671                     if (U_FAILURE(ec)){
    672                         goto SYNTAX_ERROR;
    673                     }
    674                     formatNumber++;
    675 
    676                     segments[1].remove();
    677                     segments[2].remove();
    678                     segments[3].remove();
    679                     part = 0;
    680                 } else {
    681                     --braceStack;
    682                     segments[part] += ch;
    683                 }
    684                 break;
    685             case SINGLE_QUOTE:
    686                 inQuote = TRUE;
    687                 // fall through (copy quote chars in segments 1..3)
    688             default:
    689                 segments[part] += ch;
    690                 break;
    691             }
    692         }
    693     }
    694     if (braceStack != 0 || part != 0) {
    695         // Unmatched braces in the pattern
    696         ec = U_UNMATCHED_BRACES;
    697         goto SYNTAX_ERROR;
    698     }
    699     fPattern = segments[0];
    700     return;
    701 
    702  SYNTAX_ERROR:
    703     syntaxError(pattern, i, parseError);
    704     for (i=0; i<subformatCount; ++i) {
    705         delete subformats[i].format;
    706     }
    707     argTypeCount = subformatCount = 0;
    708 }
    709 // -------------------------------------
    710 // Converts this MessageFormat instance to a pattern.
    711 
    712 UnicodeString&
    713 MessageFormat::toPattern(UnicodeString& appendTo) const {
    714     // later, make this more extensible
    715     int32_t lastOffset = 0;
    716     int32_t i;
    717     for (i=0; i<subformatCount; ++i) {
    718         copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo);
    719         lastOffset = subformats[i].offset;
    720         appendTo += LEFT_CURLY_BRACE;
    721         if (isArgNumeric) {
    722             itos(subformats[i].argNum, appendTo);
    723         }
    724         else {
    725             appendTo += *subformats[i].argName;
    726         }
    727         Format* fmt = subformats[i].format;
    728         if (fmt == NULL) {
    729             // do nothing, string format
    730         }
    731         else if (fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
    732 
    733             UErrorCode ec = U_ZERO_ERROR;
    734             NumberFormat& formatAlias = *(NumberFormat*)fmt;
    735             NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec);
    736             NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec);
    737             NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec);
    738             NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec);
    739 
    740             appendTo += COMMA;
    741             appendTo += ID_NUMBER;
    742             if (formatAlias != *defaultTemplate) {
    743                 appendTo += COMMA;
    744                 if (formatAlias == *currencyTemplate) {
    745                     appendTo += ID_CURRENCY;
    746                 }
    747                 else if (formatAlias == *percentTemplate) {
    748                     appendTo += ID_PERCENT;
    749                 }
    750                 else if (formatAlias == *integerTemplate) {
    751                     appendTo += ID_INTEGER;
    752                 }
    753                 else {
    754                     UnicodeString buffer;
    755                     appendTo += ((DecimalFormat*)fmt)->toPattern(buffer);
    756                 }
    757             }
    758 
    759             delete defaultTemplate;
    760             delete currencyTemplate;
    761             delete percentTemplate;
    762             delete integerTemplate;
    763         }
    764         else if (fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
    765             DateFormat& formatAlias = *(DateFormat*)fmt;
    766             DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale);
    767             DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale);
    768             DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale);
    769             DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale);
    770             DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale);
    771             DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale);
    772             DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale);
    773             DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale);
    774 
    775 
    776             appendTo += COMMA;
    777             if (formatAlias == *defaultDateTemplate) {
    778                 appendTo += ID_DATE;
    779             }
    780             else if (formatAlias == *shortDateTemplate) {
    781                 appendTo += ID_DATE;
    782                 appendTo += COMMA;
    783                 appendTo += ID_SHORT;
    784             }
    785             else if (formatAlias == *defaultDateTemplate) {
    786                 appendTo += ID_DATE;
    787                 appendTo += COMMA;
    788                 appendTo += ID_MEDIUM;
    789             }
    790             else if (formatAlias == *longDateTemplate) {
    791                 appendTo += ID_DATE;
    792                 appendTo += COMMA;
    793                 appendTo += ID_LONG;
    794             }
    795             else if (formatAlias == *fullDateTemplate) {
    796                 appendTo += ID_DATE;
    797                 appendTo += COMMA;
    798                 appendTo += ID_FULL;
    799             }
    800             else if (formatAlias == *defaultTimeTemplate) {
    801                 appendTo += ID_TIME;
    802             }
    803             else if (formatAlias == *shortTimeTemplate) {
    804                 appendTo += ID_TIME;
    805                 appendTo += COMMA;
    806                 appendTo += ID_SHORT;
    807             }
    808             else if (formatAlias == *defaultTimeTemplate) {
    809                 appendTo += ID_TIME;
    810                 appendTo += COMMA;
    811                 appendTo += ID_MEDIUM;
    812             }
    813             else if (formatAlias == *longTimeTemplate) {
    814                 appendTo += ID_TIME;
    815                 appendTo += COMMA;
    816                 appendTo += ID_LONG;
    817             }
    818             else if (formatAlias == *fullTimeTemplate) {
    819                 appendTo += ID_TIME;
    820                 appendTo += COMMA;
    821                 appendTo += ID_FULL;
    822             }
    823             else {
    824                 UnicodeString buffer;
    825                 appendTo += ID_DATE;
    826                 appendTo += COMMA;
    827                 appendTo += ((SimpleDateFormat*)fmt)->toPattern(buffer);
    828             }
    829 
    830             delete defaultDateTemplate;
    831             delete shortDateTemplate;
    832             delete longDateTemplate;
    833             delete fullDateTemplate;
    834             delete defaultTimeTemplate;
    835             delete shortTimeTemplate;
    836             delete longTimeTemplate;
    837             delete fullTimeTemplate;
    838             // {sfb} there should be a more efficient way to do this!
    839         }
    840         else if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID()) {
    841             UnicodeString buffer;
    842             appendTo += COMMA;
    843             appendTo += ID_CHOICE;
    844             appendTo += COMMA;
    845             appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer);
    846         }
    847         else if (fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) {
    848             UnicodeString buffer;
    849             appendTo += ((PluralFormat*)fmt)->toPattern(buffer);
    850         }
    851         else if (fmt->getDynamicClassID() == SelectFormat::getStaticClassID()) {
    852             UnicodeString buffer;
    853             appendTo += ((SelectFormat*)fmt)->toPattern(buffer);
    854         }
    855         else {
    856             //appendTo += ", unknown";
    857         }
    858         appendTo += RIGHT_CURLY_BRACE;
    859     }
    860     copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo);
    861     return appendTo;
    862 }
    863 
    864 // -------------------------------------
    865 // Adopts the new formats array and updates the array count.
    866 // This MessageFormat instance owns the new formats.
    867 
    868 void
    869 MessageFormat::adoptFormats(Format** newFormats,
    870                             int32_t count) {
    871     if (newFormats == NULL || count < 0) {
    872         return;
    873     }
    874 
    875     int32_t i;
    876     if (allocateSubformats(count)) {
    877         for (i=0; i<subformatCount; ++i) {
    878             delete subformats[i].format;
    879         }
    880         for (i=0; i<count; ++i) {
    881             subformats[i].format = newFormats[i];
    882         }
    883         subformatCount = count;
    884     } else {
    885         // An adopt method must always take ownership.  Delete
    886         // the incoming format objects and return unchanged.
    887         for (i=0; i<count; ++i) {
    888             delete newFormats[i];
    889         }
    890     }
    891 
    892     // TODO: What about the .offset and .argNum fields?
    893 }
    894 
    895 // -------------------------------------
    896 // Sets the new formats array and updates the array count.
    897 // This MessageFormat instance maks a copy of the new formats.
    898 
    899 void
    900 MessageFormat::setFormats(const Format** newFormats,
    901                           int32_t count) {
    902     if (newFormats == NULL || count < 0) {
    903         return;
    904     }
    905 
    906     if (allocateSubformats(count)) {
    907         int32_t i;
    908         for (i=0; i<subformatCount; ++i) {
    909             delete subformats[i].format;
    910         }
    911         subformatCount = 0;
    912 
    913         for (i=0; i<count; ++i) {
    914             subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL;
    915         }
    916         subformatCount = count;
    917     }
    918 
    919     // TODO: What about the .offset and .arg fields?
    920 }
    921 
    922 // -------------------------------------
    923 // Adopt a single format by format number.
    924 // Do nothing if the format number is not less than the array count.
    925 
    926 void
    927 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
    928     if (n < 0 || n >= subformatCount) {
    929         delete newFormat;
    930     } else {
    931         delete subformats[n].format;
    932         subformats[n].format = newFormat;
    933     }
    934 }
    935 
    936 // -------------------------------------
    937 // Adopt a single format by format name.
    938 // Do nothing if there is no match of formatName.
    939 void
    940 MessageFormat::adoptFormat(const UnicodeString& formatName,
    941                            Format* formatToAdopt,
    942                            UErrorCode& status) {
    943     if (isArgNumeric ) {
    944         int32_t argumentNumber = stou(formatName);
    945         if (argumentNumber<0) {
    946             status = U_ARGUMENT_TYPE_MISMATCH;
    947             return;
    948         }
    949         adoptFormat(argumentNumber, formatToAdopt);
    950         return;
    951     }
    952     for (int32_t i=0; i<subformatCount; ++i) {
    953         if (formatName==*subformats[i].argName) {
    954             delete subformats[i].format;
    955             if ( formatToAdopt== NULL) {
    956                 // This should never happen -- but we'll be nice if it does
    957                 subformats[i].format = NULL;
    958             } else {
    959                 subformats[i].format = formatToAdopt;
    960             }
    961         }
    962     }
    963 }
    964 
    965 // -------------------------------------
    966 // Set a single format.
    967 // Do nothing if the variable is not less than the array count.
    968 
    969 void
    970 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
    971     if (n >= 0 && n < subformatCount) {
    972         delete subformats[n].format;
    973         if (&newFormat == NULL) {
    974             // This should never happen -- but we'll be nice if it does
    975             subformats[n].format = NULL;
    976         } else {
    977             subformats[n].format = newFormat.clone();
    978         }
    979     }
    980 }
    981 
    982 // -------------------------------------
    983 // Get a single format by format name.
    984 // Do nothing if the variable is not less than the array count.
    985 Format *
    986 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
    987 
    988     if (U_FAILURE(status)) return NULL;
    989 
    990     if (isArgNumeric ) {
    991         int32_t argumentNumber = stou(formatName);
    992         if (argumentNumber<0) {
    993             status = U_ARGUMENT_TYPE_MISMATCH;
    994             return NULL;
    995         }
    996         if (argumentNumber < 0 || argumentNumber >= subformatCount) {
    997             return subformats[argumentNumber].format;
    998         }
    999         else {
   1000             return NULL;
   1001         }
   1002     }
   1003 
   1004     for (int32_t i=0; i<subformatCount; ++i) {
   1005         if (formatName==*subformats[i].argName)
   1006         {
   1007             return subformats[i].format;
   1008         }
   1009     }
   1010     return NULL;
   1011 }
   1012 
   1013 // -------------------------------------
   1014 // Set a single format by format name
   1015 // Do nothing if the variable is not less than the array count.
   1016 void
   1017 MessageFormat::setFormat(const UnicodeString& formatName,
   1018                          const Format& newFormat,
   1019                          UErrorCode& status) {
   1020     if (isArgNumeric) {
   1021         status = U_ARGUMENT_TYPE_MISMATCH;
   1022         return;
   1023     }
   1024     for (int32_t i=0; i<subformatCount; ++i) {
   1025         if (formatName==*subformats[i].argName)
   1026         {
   1027             delete subformats[i].format;
   1028             if (&newFormat == NULL) {
   1029                 // This should never happen -- but we'll be nice if it does
   1030                 subformats[i].format = NULL;
   1031             } else {
   1032                 subformats[i].format = newFormat.clone();
   1033             }
   1034             break;
   1035         }
   1036     }
   1037 }
   1038 
   1039 // -------------------------------------
   1040 // Gets the format array.
   1041 
   1042 const Format**
   1043 MessageFormat::getFormats(int32_t& cnt) const
   1044 {
   1045     // This old API returns an array (which we hold) of Format*
   1046     // pointers.  The array is valid up to the next call to any
   1047     // method on this object.  We construct and resize an array
   1048     // on demand that contains aliases to the subformats[i].format
   1049     // pointers.
   1050     MessageFormat* t = (MessageFormat*) this;
   1051     cnt = 0;
   1052     if (formatAliases == NULL) {
   1053         t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount;
   1054         Format** a = (Format**)
   1055             uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
   1056         if (a == NULL) {
   1057             return NULL;
   1058         }
   1059         t->formatAliases = a;
   1060     } else if (subformatCount > formatAliasesCapacity) {
   1061         Format** a = (Format**)
   1062             uprv_realloc(formatAliases, sizeof(Format*) * subformatCount);
   1063         if (a == NULL) {
   1064             return NULL;
   1065         }
   1066         t->formatAliases = a;
   1067         t->formatAliasesCapacity = subformatCount;
   1068     }
   1069     for (int32_t i=0; i<subformatCount; ++i) {
   1070         t->formatAliases[i] = subformats[i].format;
   1071     }
   1072     cnt = subformatCount;
   1073     return (const Format**)formatAliases;
   1074 }
   1075 
   1076 
   1077 StringEnumeration*
   1078 MessageFormat::getFormatNames(UErrorCode& status) {
   1079     if (U_FAILURE(status))  return NULL;
   1080 
   1081     if (isArgNumeric) {
   1082         status = U_ARGUMENT_TYPE_MISMATCH;
   1083         return NULL;
   1084     }
   1085     UVector *fFormatNames = new UVector(status);
   1086     if (U_FAILURE(status)) {
   1087         status = U_MEMORY_ALLOCATION_ERROR;
   1088         return NULL;
   1089     }
   1090     for (int32_t i=0; i<subformatCount; ++i) {
   1091         fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status);
   1092     }
   1093 
   1094     StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
   1095     return nameEnumerator;
   1096 }
   1097 
   1098 // -------------------------------------
   1099 // Formats the source Formattable array and copy into the result buffer.
   1100 // Ignore the FieldPosition result for error checking.
   1101 
   1102 UnicodeString&
   1103 MessageFormat::format(const Formattable* source,
   1104                       int32_t cnt,
   1105                       UnicodeString& appendTo,
   1106                       FieldPosition& ignore,
   1107                       UErrorCode& success) const
   1108 {
   1109     if (U_FAILURE(success))
   1110         return appendTo;
   1111 
   1112     return format(source, cnt, appendTo, ignore, 0, success);
   1113 }
   1114 
   1115 // -------------------------------------
   1116 // Internally creates a MessageFormat instance based on the
   1117 // pattern and formats the arguments Formattable array and
   1118 // copy into the appendTo buffer.
   1119 
   1120 UnicodeString&
   1121 MessageFormat::format(  const UnicodeString& pattern,
   1122                         const Formattable* arguments,
   1123                         int32_t cnt,
   1124                         UnicodeString& appendTo,
   1125                         UErrorCode& success)
   1126 {
   1127     MessageFormat temp(pattern, success);
   1128     FieldPosition ignore(0);
   1129     temp.format(arguments, cnt, appendTo, ignore, success);
   1130     return appendTo;
   1131 }
   1132 
   1133 // -------------------------------------
   1134 // Formats the source Formattable object and copy into the
   1135 // appendTo buffer.  The Formattable object must be an array
   1136 // of Formattable instances, returns error otherwise.
   1137 
   1138 UnicodeString&
   1139 MessageFormat::format(const Formattable& source,
   1140                       UnicodeString& appendTo,
   1141                       FieldPosition& ignore,
   1142                       UErrorCode& success) const
   1143 {
   1144     int32_t cnt;
   1145 
   1146     if (U_FAILURE(success))
   1147         return appendTo;
   1148     if (source.getType() != Formattable::kArray) {
   1149         success = U_ILLEGAL_ARGUMENT_ERROR;
   1150         return appendTo;
   1151     }
   1152     const Formattable* tmpPtr = source.getArray(cnt);
   1153 
   1154     return format(tmpPtr, cnt, appendTo, ignore, 0, success);
   1155 }
   1156 
   1157 
   1158 UnicodeString&
   1159 MessageFormat::format(const UnicodeString* argumentNames,
   1160                       const Formattable* arguments,
   1161                       int32_t count,
   1162                       UnicodeString& appendTo,
   1163                       UErrorCode& success) const {
   1164     FieldPosition ignore(0);
   1165     return format(arguments, argumentNames, count, appendTo, ignore, 0, success);
   1166 }
   1167 
   1168 UnicodeString&
   1169 MessageFormat::format(const Formattable* arguments,
   1170                       int32_t cnt,
   1171                       UnicodeString& appendTo,
   1172                       FieldPosition& status,
   1173                       int32_t recursionProtection,
   1174                       UErrorCode& success) const
   1175 {
   1176     return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
   1177 }
   1178 
   1179 // -------------------------------------
   1180 // Formats the arguments Formattable array and copy into the appendTo buffer.
   1181 // Ignore the FieldPosition result for error checking.
   1182 
   1183 UnicodeString&
   1184 MessageFormat::format(const Formattable* arguments,
   1185                       const UnicodeString *argumentNames,
   1186                       int32_t cnt,
   1187                       UnicodeString& appendTo,
   1188                       FieldPosition& status,
   1189                       int32_t recursionProtection,
   1190                       UErrorCode& success) const
   1191 {
   1192     int32_t lastOffset = 0;
   1193     int32_t argumentNumber=0;
   1194     if (cnt < 0 || (cnt && arguments == NULL)) {
   1195         success = U_ILLEGAL_ARGUMENT_ERROR;
   1196         return appendTo;
   1197     }
   1198 
   1199     if ( !isArgNumeric && argumentNames== NULL ) {
   1200         success = U_ILLEGAL_ARGUMENT_ERROR;
   1201         return appendTo;
   1202     }
   1203 
   1204     const Formattable *obj=NULL;
   1205     for (int32_t i=0; i<subformatCount; ++i) {
   1206         // Append the prefix of current format element.
   1207         appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset);
   1208         lastOffset = subformats[i].offset;
   1209         obj = NULL;
   1210         if (isArgNumeric) {
   1211             argumentNumber = subformats[i].argNum;
   1212 
   1213             // Checks the scope of the argument number.
   1214             if (argumentNumber >= cnt) {
   1215                 appendTo += LEFT_CURLY_BRACE;
   1216                 itos(argumentNumber, appendTo);
   1217                 appendTo += RIGHT_CURLY_BRACE;
   1218                 continue;
   1219             }
   1220             obj = arguments+argumentNumber;
   1221         }
   1222         else {
   1223             for (int32_t j=0; j<cnt; ++j) {
   1224                 if (argumentNames[j]== *subformats[i].argName ) {
   1225                     obj = arguments+j;
   1226                     break;
   1227                 }
   1228             }
   1229             if (obj == NULL ) {
   1230                 appendTo += LEFT_CURLY_BRACE;
   1231                 appendTo += *subformats[i].argName;
   1232                 appendTo += RIGHT_CURLY_BRACE;
   1233                 continue;
   1234 
   1235             }
   1236         }
   1237         Formattable::Type type = obj->getType();
   1238 
   1239         // Recursively calling the format process only if the current
   1240         // format argument refers to either of the following:
   1241         // a ChoiceFormat object ,a PluralFormat object, a SelectFormat object.
   1242         Format* fmt = subformats[i].format;
   1243         if (fmt != NULL) {
   1244             UnicodeString argNum;
   1245             fmt->format(*obj, argNum, success);
   1246 
   1247             // Needs to reprocess the ChoiceFormat and PluralFormat and SelectFormat option by using the
   1248             // MessageFormat pattern application.
   1249             if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() ||
   1250                  fmt->getDynamicClassID() == PluralFormat::getStaticClassID() ||
   1251                  fmt->getDynamicClassID() == SelectFormat::getStaticClassID()
   1252                  ) &&
   1253                 argNum.indexOf(LEFT_CURLY_BRACE) >= 0) {
   1254                 MessageFormat temp(argNum, fLocale, success);
   1255                 // TODO: Implement recursion protection
   1256                 if ( isArgNumeric ) {
   1257                     temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
   1258                 }
   1259                 else {
   1260                     temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success);
   1261                 }
   1262                 if (U_FAILURE(success)) {
   1263                     return appendTo;
   1264                 }
   1265             }
   1266             else {
   1267                 appendTo += argNum;
   1268             }
   1269         }
   1270         // If the obj data type is a number, use a NumberFormat instance.
   1271         else if ((type == Formattable::kDouble) ||
   1272                  (type == Formattable::kLong) ||
   1273                  (type == Formattable::kInt64)) {
   1274 
   1275             const NumberFormat* nf = getDefaultNumberFormat(success);
   1276             if (nf == NULL) {
   1277                 return appendTo;
   1278             }
   1279             if (type == Formattable::kDouble) {
   1280                 nf->format(obj->getDouble(), appendTo);
   1281             } else if (type == Formattable::kLong) {
   1282                 nf->format(obj->getLong(), appendTo);
   1283             } else {
   1284                 nf->format(obj->getInt64(), appendTo);
   1285             }
   1286         }
   1287         // If the obj data type is a Date instance, use a DateFormat instance.
   1288         else if (type == Formattable::kDate) {
   1289             const DateFormat* df = getDefaultDateFormat(success);
   1290             if (df == NULL) {
   1291                 return appendTo;
   1292             }
   1293             df->format(obj->getDate(), appendTo);
   1294         }
   1295         else if (type == Formattable::kString) {
   1296             appendTo += obj->getString();
   1297         }
   1298         else {
   1299             success = U_ILLEGAL_ARGUMENT_ERROR;
   1300             return appendTo;
   1301         }
   1302     }
   1303     // Appends the rest of the pattern characters after the real last offset.
   1304     appendTo.append(fPattern, lastOffset, 0x7fffffff);
   1305     return appendTo;
   1306 }
   1307 
   1308 
   1309 // -------------------------------------
   1310 // Parses the source pattern and returns the Formattable objects array,
   1311 // the array count and the ending parse position.  The caller of this method
   1312 // owns the array.
   1313 
   1314 Formattable*
   1315 MessageFormat::parse(const UnicodeString& source,
   1316                      ParsePosition& pos,
   1317                      int32_t& count) const
   1318 {
   1319     // Allocate at least one element.  Allocating an array of length
   1320     // zero causes problems on some platforms (e.g. Win32).
   1321     Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1];
   1322     int32_t patternOffset = 0;
   1323     int32_t sourceOffset = pos.getIndex();
   1324     ParsePosition tempPos(0);
   1325     count = 0; // {sfb} reset to zero
   1326     int32_t len;
   1327     // If resultArray could not be created, exit out.
   1328     // Avoid crossing initialization of variables above.
   1329     if (resultArray == NULL) {
   1330         goto PARSE_ERROR;
   1331     }
   1332     for (int32_t i = 0; i < subformatCount; ++i) {
   1333         // match up to format
   1334         len = subformats[i].offset - patternOffset;
   1335         if (len == 0 ||
   1336             fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
   1337             sourceOffset += len;
   1338             patternOffset += len;
   1339         }
   1340         else {
   1341             goto PARSE_ERROR;
   1342         }
   1343 
   1344         // now use format
   1345         Format* fmt = subformats[i].format;
   1346         int32_t argNum = subformats[i].argNum;
   1347         if (fmt == NULL) {   // string format
   1348             // if at end, use longest possible match
   1349             // otherwise uses first match to intervening string
   1350             // does NOT recursively try all possibilities
   1351             int32_t tempLength = (i+1<subformatCount) ?
   1352                 subformats[i+1].offset : fPattern.length();
   1353 
   1354             int32_t next;
   1355             if (patternOffset >= tempLength) {
   1356                 next = source.length();
   1357             }
   1358             else {
   1359                 UnicodeString buffer;
   1360                 fPattern.extract(patternOffset,tempLength - patternOffset, buffer);
   1361                 next = source.indexOf(buffer, sourceOffset);
   1362             }
   1363 
   1364             if (next < 0) {
   1365                 goto PARSE_ERROR;
   1366             }
   1367             else {
   1368                 UnicodeString buffer;
   1369                 source.extract(sourceOffset,next - sourceOffset, buffer);
   1370                 UnicodeString strValue = buffer;
   1371                 UnicodeString temp(LEFT_CURLY_BRACE);
   1372                 // {sfb} check this later
   1373                 if (isArgNumeric) {
   1374                     itos(argNum, temp);
   1375                 }
   1376                 else {
   1377                     temp+=(*subformats[i].argName);
   1378                 }
   1379                 temp += RIGHT_CURLY_BRACE;
   1380                 if (strValue != temp) {
   1381                     source.extract(sourceOffset,next - sourceOffset, buffer);
   1382                     resultArray[argNum].setString(buffer);
   1383                     // {sfb} not sure about this
   1384                     if ((argNum + 1) > count) {
   1385                         count = argNum + 1;
   1386                     }
   1387                 }
   1388                 sourceOffset = next;
   1389             }
   1390         }
   1391         else {
   1392             tempPos.setIndex(sourceOffset);
   1393             fmt->parseObject(source, resultArray[argNum], tempPos);
   1394             if (tempPos.getIndex() == sourceOffset) {
   1395                 goto PARSE_ERROR;
   1396             }
   1397 
   1398             if ((argNum + 1) > count) {
   1399                 count = argNum + 1;
   1400             }
   1401             sourceOffset = tempPos.getIndex(); // update
   1402         }
   1403     }
   1404     len = fPattern.length() - patternOffset;
   1405     if (len == 0 ||
   1406         fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
   1407         pos.setIndex(sourceOffset + len);
   1408         return resultArray;
   1409     }
   1410     // else fall through...
   1411 
   1412  PARSE_ERROR:
   1413     pos.setErrorIndex(sourceOffset);
   1414     delete [] resultArray;
   1415     count = 0;
   1416     return NULL; // leave index as is to signal error
   1417 }
   1418 
   1419 // -------------------------------------
   1420 // Parses the source string and returns the array of
   1421 // Formattable objects and the array count.  The caller
   1422 // owns the returned array.
   1423 
   1424 Formattable*
   1425 MessageFormat::parse(const UnicodeString& source,
   1426                      int32_t& cnt,
   1427                      UErrorCode& success) const
   1428 {
   1429     if (!isArgNumeric ) {
   1430         success = U_ARGUMENT_TYPE_MISMATCH;
   1431         return NULL;
   1432     }
   1433     ParsePosition status(0);
   1434     // Calls the actual implementation method and starts
   1435     // from zero offset of the source text.
   1436     Formattable* result = parse(source, status, cnt);
   1437     if (status.getIndex() == 0) {
   1438         success = U_MESSAGE_PARSE_ERROR;
   1439         delete[] result;
   1440         return NULL;
   1441     }
   1442     return result;
   1443 }
   1444 
   1445 // -------------------------------------
   1446 // Parses the source text and copy into the result buffer.
   1447 
   1448 void
   1449 MessageFormat::parseObject( const UnicodeString& source,
   1450                             Formattable& result,
   1451                             ParsePosition& status) const
   1452 {
   1453     int32_t cnt = 0;
   1454     Formattable* tmpResult = parse(source, status, cnt);
   1455     if (tmpResult != NULL)
   1456         result.adoptArray(tmpResult, cnt);
   1457 }
   1458 
   1459 UnicodeString
   1460 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
   1461   UnicodeString result;
   1462   if (U_SUCCESS(status)) {
   1463     int32_t plen = pattern.length();
   1464     const UChar* pat = pattern.getBuffer();
   1465     int32_t blen = plen * 2 + 1; // space for null termination, convenience
   1466     UChar* buf = result.getBuffer(blen);
   1467     if (buf == NULL) {
   1468       status = U_MEMORY_ALLOCATION_ERROR;
   1469     } else {
   1470       int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
   1471       result.releaseBuffer(U_SUCCESS(status) ? len : 0);
   1472     }
   1473   }
   1474   if (U_FAILURE(status)) {
   1475     result.setToBogus();
   1476   }
   1477   return result;
   1478 }
   1479 
   1480 // -------------------------------------
   1481 
   1482 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
   1483     RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
   1484     if (fmt == NULL) {
   1485         ec = U_MEMORY_ALLOCATION_ERROR;
   1486     } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
   1487         UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
   1488         fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
   1489     }
   1490     return fmt;
   1491 }
   1492 
   1493 /**
   1494  * Reads the segments[] array (see applyPattern()) and parses the
   1495  * segments[1..3] into a Format* object.  Stores the format object in
   1496  * the subformats[] array.  Updates the argTypes[] array type
   1497  * information for the corresponding argument.
   1498  *
   1499  * @param formatNumber index into subformats[] for this format
   1500  * @param segments array of strings with the parsed pattern segments
   1501  * @param parseError parse error data (output param)
   1502  * @param ec error code
   1503  */
   1504 void
   1505 MessageFormat::makeFormat(int32_t formatNumber,
   1506                           UnicodeString* segments,
   1507                           UParseError& parseError,
   1508                           UErrorCode& ec) {
   1509     if (U_FAILURE(ec)) {
   1510         return;
   1511     }
   1512 
   1513     // Parse the argument number
   1514     int32_t argumentNumber = stou(segments[1]); // always unlocalized!
   1515     UnicodeString argumentName;
   1516     if (argumentNumber < 0) {
   1517         if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) {
   1518             ec = U_INVALID_FORMAT_ERROR;
   1519             return;
   1520         }
   1521         isArgNumeric = FALSE;
   1522         argumentNumber=formatNumber;
   1523     }
   1524     if (!isArgNumeric) {
   1525         if ( !isLegalArgName(segments[1]) ) {
   1526             ec = U_INVALID_FORMAT_ERROR;
   1527             return;
   1528         }
   1529         argumentName = segments[1];
   1530     }
   1531 
   1532     // Parse the format, recording the argument type and creating a
   1533     // new Format object (except for string arguments).
   1534     Formattable::Type argType;
   1535     Format *fmt = NULL;
   1536     int32_t typeID, styleID;
   1537     DateFormat::EStyle style;
   1538     UnicodeString unquotedPattern, quotedPattern;
   1539     UBool inQuote = FALSE;
   1540 
   1541     switch (typeID = findKeyword(segments[2], TYPE_IDS)) {
   1542 
   1543     case 0: // string
   1544         argType = Formattable::kString;
   1545         break;
   1546 
   1547     case 1: // number
   1548         argType = Formattable::kDouble;
   1549 
   1550         switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) {
   1551         case 0: // default
   1552             fmt = NumberFormat::createInstance(fLocale, ec);
   1553             break;
   1554         case 1: // currency
   1555             fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
   1556             break;
   1557         case 2: // percent
   1558             fmt = NumberFormat::createPercentInstance(fLocale, ec);
   1559             break;
   1560         case 3: // integer
   1561             argType = Formattable::kLong;
   1562             fmt = createIntegerFormat(fLocale, ec);
   1563             break;
   1564         default: // pattern
   1565             fmt = NumberFormat::createInstance(fLocale, ec);
   1566             if (fmt &&
   1567                 fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
   1568                 ((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec);
   1569             }
   1570             break;
   1571         }
   1572         break;
   1573 
   1574     case 2: // date
   1575     case 3: // time
   1576         argType = Formattable::kDate;
   1577         styleID = findKeyword(segments[3], DATE_STYLE_IDS);
   1578         style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
   1579 
   1580         if (typeID == 2) {
   1581             fmt = DateFormat::createDateInstance(style, fLocale);
   1582         } else {
   1583             fmt = DateFormat::createTimeInstance(style, fLocale);
   1584         }
   1585 
   1586         if (styleID < 0 &&
   1587             fmt != NULL &&
   1588             fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
   1589             ((SimpleDateFormat*)fmt)->applyPattern(segments[3]);
   1590         }
   1591         break;
   1592 
   1593     case 4: // choice
   1594         argType = Formattable::kDouble;
   1595 
   1596         fmt = new ChoiceFormat(segments[3], parseError, ec);
   1597         break;
   1598 
   1599     case 5: // spellout
   1600         argType = Formattable::kDouble;
   1601         fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec);
   1602         break;
   1603     case 6: // ordinal
   1604         argType = Formattable::kDouble;
   1605         fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec);
   1606         break;
   1607     case 7: // duration
   1608         argType = Formattable::kDouble;
   1609         fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec);
   1610         break;
   1611     case 8: // plural
   1612     case 9: // Select
   1613         if(typeID == 8)
   1614             argType = Formattable::kDouble;
   1615         else
   1616             argType = Formattable::kString;
   1617         quotedPattern = segments[3];
   1618         for (int32_t i = 0; i < quotedPattern.length(); ++i) {
   1619             UChar ch = quotedPattern.charAt(i);
   1620             if (ch == SINGLE_QUOTE) {
   1621                 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) {
   1622                     unquotedPattern+=ch;
   1623                     ++i;
   1624                 }
   1625                 else {
   1626                     inQuote = !inQuote;
   1627                 }
   1628             }
   1629             else {
   1630                 unquotedPattern += ch;
   1631             }
   1632         }
   1633         if(typeID == 8)
   1634             fmt = new PluralFormat(fLocale, unquotedPattern, ec);
   1635         else
   1636             fmt = new SelectFormat(unquotedPattern, ec);
   1637         break;
   1638     default:
   1639         argType = Formattable::kString;
   1640         ec = U_ILLEGAL_ARGUMENT_ERROR;
   1641         break;
   1642     }
   1643 
   1644     if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) {
   1645         ec = U_MEMORY_ALLOCATION_ERROR;
   1646     }
   1647 
   1648     if (!allocateSubformats(formatNumber+1) ||
   1649         !allocateArgTypes(argumentNumber+1)) {
   1650         ec = U_MEMORY_ALLOCATION_ERROR;
   1651     }
   1652 
   1653     if (U_FAILURE(ec)) {
   1654         delete fmt;
   1655         return;
   1656     }
   1657 
   1658     // Parse succeeded; record results in our arrays
   1659     subformats[formatNumber].format = fmt;
   1660     subformats[formatNumber].offset = segments[0].length();
   1661     if (isArgNumeric) {
   1662         subformats[formatNumber].argName = NULL;
   1663         subformats[formatNumber].argNum = argumentNumber;
   1664     }
   1665     else {
   1666         subformats[formatNumber].argName = new UnicodeString(argumentName);
   1667         subformats[formatNumber].argNum = -1;
   1668     }
   1669     subformatCount = formatNumber+1;
   1670 
   1671     // Careful here: argumentNumber may in general arrive out of
   1672     // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
   1673     argTypes[argumentNumber] = argType;
   1674     if (argumentNumber+1 > argTypeCount) {
   1675         argTypeCount = argumentNumber+1;
   1676     }
   1677 }
   1678 
   1679 // -------------------------------------
   1680 // Finds the string, s, in the string array, list.
   1681 int32_t MessageFormat::findKeyword(const UnicodeString& s,
   1682                                    const UChar * const *list)
   1683 {
   1684     if (s.length() == 0)
   1685         return 0; // default
   1686 
   1687     UnicodeString buffer = s;
   1688     // Trims the space characters and turns all characters
   1689     // in s to lower case.
   1690     buffer.trim().toLower("");
   1691     for (int32_t i = 0; list[i]; ++i) {
   1692         if (!buffer.compare(list[i], u_strlen(list[i]))) {
   1693             return i;
   1694         }
   1695     }
   1696     return -1;
   1697 }
   1698 
   1699 // -------------------------------------
   1700 // Checks the range of the source text to quote the special
   1701 // characters, { and ' and copy to target buffer.
   1702 
   1703 void
   1704 MessageFormat::copyAndFixQuotes(const UnicodeString& source,
   1705                                 int32_t start,
   1706                                 int32_t end,
   1707                                 UnicodeString& appendTo)
   1708 {
   1709     UBool gotLB = FALSE;
   1710 
   1711     for (int32_t i = start; i < end; ++i) {
   1712         UChar ch = source[i];
   1713         if (ch == LEFT_CURLY_BRACE) {
   1714             appendTo += SINGLE_QUOTE;
   1715             appendTo += LEFT_CURLY_BRACE;
   1716             appendTo += SINGLE_QUOTE;
   1717             gotLB = TRUE;
   1718         }
   1719         else if (ch == RIGHT_CURLY_BRACE) {
   1720             if(gotLB) {
   1721                 appendTo += RIGHT_CURLY_BRACE;
   1722                 gotLB = FALSE;
   1723             }
   1724             else {
   1725                 // orig code.
   1726                 appendTo += SINGLE_QUOTE;
   1727                 appendTo += RIGHT_CURLY_BRACE;
   1728                 appendTo += SINGLE_QUOTE;
   1729             }
   1730         }
   1731         else if (ch == SINGLE_QUOTE) {
   1732             appendTo += SINGLE_QUOTE;
   1733             appendTo += SINGLE_QUOTE;
   1734         }
   1735         else {
   1736             appendTo += ch;
   1737         }
   1738     }
   1739 }
   1740 
   1741 /**
   1742  * Convenience method that ought to be in NumberFormat
   1743  */
   1744 NumberFormat*
   1745 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
   1746     NumberFormat *temp = NumberFormat::createInstance(locale, status);
   1747     if (temp != NULL && temp->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
   1748         DecimalFormat *temp2 = (DecimalFormat*) temp;
   1749         temp2->setMaximumFractionDigits(0);
   1750         temp2->setDecimalSeparatorAlwaysShown(FALSE);
   1751         temp2->setParseIntegerOnly(TRUE);
   1752     }
   1753 
   1754     return temp;
   1755 }
   1756 
   1757 /**
   1758  * Return the default number format.  Used to format a numeric
   1759  * argument when subformats[i].format is NULL.  Returns NULL
   1760  * on failure.
   1761  *
   1762  * Semantically const but may modify *this.
   1763  */
   1764 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
   1765     if (defaultNumberFormat == NULL) {
   1766         MessageFormat* t = (MessageFormat*) this;
   1767         t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
   1768         if (U_FAILURE(ec)) {
   1769             delete t->defaultNumberFormat;
   1770             t->defaultNumberFormat = NULL;
   1771         } else if (t->defaultNumberFormat == NULL) {
   1772             ec = U_MEMORY_ALLOCATION_ERROR;
   1773         }
   1774     }
   1775     return defaultNumberFormat;
   1776 }
   1777 
   1778 /**
   1779  * Return the default date format.  Used to format a date
   1780  * argument when subformats[i].format is NULL.  Returns NULL
   1781  * on failure.
   1782  *
   1783  * Semantically const but may modify *this.
   1784  */
   1785 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
   1786     if (defaultDateFormat == NULL) {
   1787         MessageFormat* t = (MessageFormat*) this;
   1788         t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
   1789         if (t->defaultDateFormat == NULL) {
   1790             ec = U_MEMORY_ALLOCATION_ERROR;
   1791         }
   1792     }
   1793     return defaultDateFormat;
   1794 }
   1795 
   1796 UBool
   1797 MessageFormat::usesNamedArguments() const {
   1798     return !isArgNumeric;
   1799 }
   1800 
   1801 UBool
   1802 MessageFormat::isLegalArgName(const UnicodeString& argName) const {
   1803     if(!u_hasBinaryProperty(argName.charAt(0), idStart)) {
   1804         return FALSE;
   1805     }
   1806     for (int32_t i=1; i<argName.length(); ++i) {
   1807         if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) {
   1808             return FALSE;
   1809         }
   1810     }
   1811     return TRUE;
   1812 }
   1813 
   1814 int32_t
   1815 MessageFormat::getArgTypeCount() const {
   1816         return argTypeCount;
   1817 }
   1818 
   1819 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
   1820     pos=0;
   1821     fFormatNames = fNameList;
   1822 }
   1823 
   1824 const UnicodeString*
   1825 FormatNameEnumeration::snext(UErrorCode& status) {
   1826     if (U_SUCCESS(status) && pos < fFormatNames->size()) {
   1827         return (const UnicodeString*)fFormatNames->elementAt(pos++);
   1828     }
   1829     return NULL;
   1830 }
   1831 
   1832 void
   1833 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
   1834     pos=0;
   1835 }
   1836 
   1837 int32_t
   1838 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
   1839        return (fFormatNames==NULL) ? 0 : fFormatNames->size();
   1840 }
   1841 
   1842 FormatNameEnumeration::~FormatNameEnumeration() {
   1843     UnicodeString *s;
   1844     for (int32_t i=0; i<fFormatNames->size(); ++i) {
   1845         if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) {
   1846             delete s;
   1847         }
   1848     }
   1849     delete fFormatNames;
   1850 }
   1851 U_NAMESPACE_END
   1852 
   1853 #endif /* #if !UCONFIG_NO_FORMATTING */
   1854 
   1855 //eof
   1856