Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2008, International Business Machines Corporation and         *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 *
      7 * File MSGFMT.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   02/19/97    aliu        Converted from java.
     13 *   03/20/97    helena      Finished first cut of implementation.
     14 *   04/10/97    aliu        Made to work on AIX.  Added stoi to replace wtoi.
     15 *   06/11/97    helena      Fixed addPattern to take the pattern correctly.
     16 *   06/17/97    helena      Fixed the getPattern to return the correct pattern.
     17 *   07/09/97    helena      Made ParsePosition into a class.
     18 *   02/22/99    stephen     Removed character literals for EBCDIC safety
     19 ********************************************************************************
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 
     24 #if !UCONFIG_NO_FORMATTING
     25 
     26 #include "unicode/msgfmt.h"
     27 #include "unicode/decimfmt.h"
     28 #include "unicode/datefmt.h"
     29 #include "unicode/smpdtfmt.h"
     30 #include "unicode/choicfmt.h"
     31 #include "unicode/plurfmt.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/ucnv_err.h"
     34 #include "unicode/uchar.h"
     35 #include "unicode/umsg.h"
     36 #include "unicode/rbnf.h"
     37 #include "cmemory.h"
     38 #include "msgfmt_impl.h"
     39 #include "../common/util.h"
     40 #include "uassert.h"
     41 #include "ustrfmt.h"
     42 #include "uvector.h"
     43 
     44 // *****************************************************************************
     45 // class MessageFormat
     46 // *****************************************************************************
     47 
     48 #define COMMA             ((UChar)0x002C)
     49 #define SINGLE_QUOTE      ((UChar)0x0027)
     50 #define LEFT_CURLY_BRACE  ((UChar)0x007B)
     51 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
     52 
     53 //---------------------------------------
     54 // static data
     55 
     56 static const UChar ID_EMPTY[]     = {
     57     0 /* empty string, used for default so that null can mark end of list */
     58 };
     59 
     60 static const UChar ID_NUMBER[]    = {
     61     0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0  /* "number" */
     62 };
     63 static const UChar ID_DATE[]      = {
     64     0x64, 0x61, 0x74, 0x65, 0              /* "date" */
     65 };
     66 static const UChar ID_TIME[]      = {
     67     0x74, 0x69, 0x6D, 0x65, 0              /* "time" */
     68 };
     69 static const UChar ID_CHOICE[]    = {
     70     0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0  /* "choice" */
     71 };
     72 static const UChar ID_SPELLOUT[]  = {
     73     0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
     74 };
     75 static const UChar ID_ORDINAL[]   = {
     76     0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
     77 };
     78 static const UChar ID_DURATION[]  = {
     79     0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
     80 };
     81 static const UChar ID_PLURAL[]  = {
     82     0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0  /* "plural" */
     83 };
     84 
     85 // MessageFormat Type List  Number, Date, Time or Choice
     86 static const UChar * const TYPE_IDS[] = {
     87     ID_EMPTY,
     88     ID_NUMBER,
     89     ID_DATE,
     90     ID_TIME,
     91     ID_CHOICE,
     92     ID_SPELLOUT,
     93     ID_ORDINAL,
     94     ID_DURATION,
     95     ID_PLURAL,
     96     NULL,
     97 };
     98 
     99 static const UChar ID_CURRENCY[]  = {
    100     0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0  /* "currency" */
    101 };
    102 static const UChar ID_PERCENT[]   = {
    103     0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0        /* "percent" */
    104 };
    105 static const UChar ID_INTEGER[]   = {
    106     0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0        /* "integer" */
    107 };
    108 
    109 // NumberFormat modifier list, default, currency, percent or integer
    110 static const UChar * const NUMBER_STYLE_IDS[] = {
    111     ID_EMPTY,
    112     ID_CURRENCY,
    113     ID_PERCENT,
    114     ID_INTEGER,
    115     NULL,
    116 };
    117 
    118 static const UChar ID_SHORT[]     = {
    119     0x73, 0x68, 0x6F, 0x72, 0x74, 0        /* "short" */
    120 };
    121 static const UChar ID_MEDIUM[]    = {
    122     0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0  /* "medium" */
    123 };
    124 static const UChar ID_LONG[]      = {
    125     0x6C, 0x6F, 0x6E, 0x67, 0              /* "long" */
    126 };
    127 static const UChar ID_FULL[]      = {
    128     0x66, 0x75, 0x6C, 0x6C, 0              /* "full" */
    129 };
    130 
    131 // DateFormat modifier list, default, short, medium, long or full
    132 static const UChar * const DATE_STYLE_IDS[] = {
    133     ID_EMPTY,
    134     ID_SHORT,
    135     ID_MEDIUM,
    136     ID_LONG,
    137     ID_FULL,
    138     NULL,
    139 };
    140 
    141 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = {
    142     U_NAMESPACE_QUALIFIER DateFormat::kDefault,
    143     U_NAMESPACE_QUALIFIER DateFormat::kShort,
    144     U_NAMESPACE_QUALIFIER DateFormat::kMedium,
    145     U_NAMESPACE_QUALIFIER DateFormat::kLong,
    146     U_NAMESPACE_QUALIFIER DateFormat::kFull,
    147 };
    148 
    149 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
    150 
    151 U_NAMESPACE_BEGIN
    152 
    153 // -------------------------------------
    154 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
    155 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
    156 
    157 //--------------------------------------------------------------------
    158 
    159 /**
    160  * Convert a string to an unsigned decimal, ignoring rule whitespace.
    161  * @return a non-negative number if successful, or a negative number
    162  *         upon failure.
    163  */
    164 static int32_t stou(const UnicodeString& string) {
    165     int32_t n = 0;
    166     int32_t count = 0;
    167     UChar32 c;
    168     for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) {
    169         c = string.char32At(i);
    170         if (uprv_isRuleWhiteSpace(c)) {
    171             continue;
    172         }
    173         int32_t d = u_digit(c, 10);
    174         if (d < 0 || ++count > 10) {
    175             return -1;
    176         }
    177         n = 10*n + d;
    178     }
    179     return n;
    180 }
    181 
    182 /**
    183  * Convert an integer value to a string and append the result to
    184  * the given UnicodeString.
    185  */
    186 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
    187     UChar temp[16];
    188     uprv_itou(temp,16,i,10,0); // 10 == radix
    189     appendTo.append(temp);
    190     return appendTo;
    191 }
    192 
    193 /*
    194  * A structure representing one subformat of this MessageFormat.
    195  * Each subformat has a Format object, an offset into the plain
    196  * pattern text fPattern, and an argument number.  The argument
    197  * number corresponds to the array of arguments to be formatted.
    198  * @internal
    199  */
    200 class MessageFormat::Subformat : public UMemory {
    201 public:
    202     /**
    203      * @internal
    204      */
    205     Format* format; // formatter
    206     /**
    207      * @internal
    208      */
    209     int32_t offset; // offset into fPattern
    210     /**
    211      * @internal
    212      */
    213     // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number
    214     int32_t argNum;    // 0-based argument number
    215     /**
    216      * @internal
    217      */
    218     UnicodeString* argName; // argument name or number
    219 
    220     /**
    221      * Clone that.format and assign it to this.format
    222      * Do NOT delete this.format
    223      * @internal
    224      */
    225     Subformat& operator=(const Subformat& that) {
    226         if (this != &that) {
    227             format = that.format ? that.format->clone() : NULL;
    228             offset = that.offset;
    229             argNum = that.argNum;
    230             argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL;
    231         }
    232         return *this;
    233     }
    234 
    235     /**
    236      * @internal
    237      */
    238     UBool operator==(const Subformat& that) const {
    239         // Do cheap comparisons first
    240         return offset == that.offset &&
    241                argNum == that.argNum &&
    242                ((argName == that.argName) ||
    243                 (*argName == *that.argName)) &&
    244                ((format == that.format) || // handles NULL
    245                 (*format == *that.format));
    246     }
    247 
    248     /**
    249      * @internal
    250      */
    251     UBool operator!=(const Subformat& that) const {
    252         return !operator==(that);
    253     }
    254 };
    255 
    256 // -------------------------------------
    257 // Creates a MessageFormat instance based on the pattern.
    258 
    259 MessageFormat::MessageFormat(const UnicodeString& pattern,
    260                              UErrorCode& success)
    261 : fLocale(Locale::getDefault()),  // Uses the default locale
    262   formatAliases(NULL),
    263   formatAliasesCapacity(0),
    264   idStart(UCHAR_ID_START),
    265   idContinue(UCHAR_ID_CONTINUE),
    266   subformats(NULL),
    267   subformatCount(0),
    268   subformatCapacity(0),
    269   argTypes(NULL),
    270   argTypeCount(0),
    271   argTypeCapacity(0),
    272   isArgNumeric(TRUE),
    273   defaultNumberFormat(NULL),
    274   defaultDateFormat(NULL)
    275 {
    276     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
    277         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
    278         success = U_MEMORY_ALLOCATION_ERROR;
    279         return;
    280     }
    281     applyPattern(pattern, success);
    282     setLocaleIDs(fLocale.getName(), fLocale.getName());
    283 }
    284 
    285 MessageFormat::MessageFormat(const UnicodeString& pattern,
    286                              const Locale& newLocale,
    287                              UErrorCode& success)
    288 : fLocale(newLocale),
    289   formatAliases(NULL),
    290   formatAliasesCapacity(0),
    291   idStart(UCHAR_ID_START),
    292   idContinue(UCHAR_ID_CONTINUE),
    293   subformats(NULL),
    294   subformatCount(0),
    295   subformatCapacity(0),
    296   argTypes(NULL),
    297   argTypeCount(0),
    298   argTypeCapacity(0),
    299   isArgNumeric(TRUE),
    300   defaultNumberFormat(NULL),
    301   defaultDateFormat(NULL)
    302 {
    303     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
    304         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
    305         success = U_MEMORY_ALLOCATION_ERROR;
    306         return;
    307     }
    308     applyPattern(pattern, success);
    309     setLocaleIDs(fLocale.getName(), fLocale.getName());
    310 }
    311 
    312 MessageFormat::MessageFormat(const UnicodeString& pattern,
    313                              const Locale& newLocale,
    314                              UParseError& parseError,
    315                              UErrorCode& success)
    316 : fLocale(newLocale),
    317   formatAliases(NULL),
    318   formatAliasesCapacity(0),
    319   idStart(UCHAR_ID_START),
    320   idContinue(UCHAR_ID_CONTINUE),
    321   subformats(NULL),
    322   subformatCount(0),
    323   subformatCapacity(0),
    324   argTypes(NULL),
    325   argTypeCount(0),
    326   argTypeCapacity(0),
    327   isArgNumeric(TRUE),
    328   defaultNumberFormat(NULL),
    329   defaultDateFormat(NULL)
    330 {
    331     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
    332         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
    333         success = U_MEMORY_ALLOCATION_ERROR;
    334         return;
    335     }
    336     applyPattern(pattern, parseError, success);
    337     setLocaleIDs(fLocale.getName(), fLocale.getName());
    338 }
    339 
    340 MessageFormat::MessageFormat(const MessageFormat& that)
    341 : Format(that),
    342   formatAliases(NULL),
    343   formatAliasesCapacity(0),
    344   idStart(UCHAR_ID_START),
    345   idContinue(UCHAR_ID_CONTINUE),
    346   subformats(NULL),
    347   subformatCount(0),
    348   subformatCapacity(0),
    349   argTypes(NULL),
    350   argTypeCount(0),
    351   argTypeCapacity(0),
    352   isArgNumeric(TRUE),
    353   defaultNumberFormat(NULL),
    354   defaultDateFormat(NULL)
    355 {
    356     *this = that;
    357 }
    358 
    359 MessageFormat::~MessageFormat()
    360 {
    361     int32_t idx;
    362     for (idx = 0; idx < subformatCount; idx++) {
    363         delete subformats[idx].format;
    364         delete subformats[idx].argName;
    365     }
    366     uprv_free(subformats);
    367     subformats = NULL;
    368     subformatCount = subformatCapacity = 0;
    369 
    370     uprv_free(argTypes);
    371     argTypes = NULL;
    372     argTypeCount = argTypeCapacity = 0;
    373 
    374     uprv_free(formatAliases);
    375 
    376     delete defaultNumberFormat;
    377     delete defaultDateFormat;
    378 }
    379 
    380 //--------------------------------------------------------------------
    381 // Variable-size array management
    382 
    383 /**
    384  * Allocate subformats[] to at least the given capacity and return
    385  * TRUE if successful.  If not, leave subformats[] unchanged.
    386  *
    387  * If subformats is NULL, allocate it.  If it is not NULL, enlarge it
    388  * if necessary to be at least as large as specified.
    389  */
    390 UBool MessageFormat::allocateSubformats(int32_t capacity) {
    391     if (subformats == NULL) {
    392         subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity);
    393         subformatCapacity = capacity;
    394         subformatCount = 0;
    395         if (subformats == NULL) {
    396             subformatCapacity = 0;
    397             return FALSE;
    398         }
    399     } else if (subformatCapacity < capacity) {
    400         if (capacity < 2*subformatCapacity) {
    401             capacity = 2*subformatCapacity;
    402         }
    403         Subformat* a = (Subformat*)
    404             uprv_realloc(subformats, sizeof(*subformats) * capacity);
    405         if (a == NULL) {
    406             return FALSE; // request failed
    407         }
    408         subformats = a;
    409         subformatCapacity = capacity;
    410     }
    411     return TRUE;
    412 }
    413 
    414 /**
    415  * Allocate argTypes[] to at least the given capacity and return
    416  * TRUE if successful.  If not, leave argTypes[] unchanged.
    417  *
    418  * If argTypes is NULL, allocate it.  If it is not NULL, enlarge it
    419  * if necessary to be at least as large as specified.
    420  */
    421 UBool MessageFormat::allocateArgTypes(int32_t capacity) {
    422     if (argTypes == NULL) {
    423         argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity);
    424         argTypeCount = 0;
    425         argTypeCapacity = capacity;
    426         if (argTypes == NULL) {
    427             argTypeCapacity = 0;
    428             return FALSE;
    429         }
    430         for (int32_t i=0; i<capacity; ++i) {
    431             argTypes[i] = Formattable::kString;
    432         }
    433     } else if (argTypeCapacity < capacity) {
    434         if (capacity < 2*argTypeCapacity) {
    435             capacity = 2*argTypeCapacity;
    436         }
    437         Formattable::Type* a = (Formattable::Type*)
    438             uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
    439         if (a == NULL) {
    440             return FALSE; // request failed
    441         }
    442         for (int32_t i=argTypeCapacity; i<capacity; ++i) {
    443             a[i] = Formattable::kString;
    444         }
    445         argTypes = a;
    446         argTypeCapacity = capacity;
    447     }
    448     return TRUE;
    449 }
    450 
    451 // -------------------------------------
    452 // assignment operator
    453 
    454 const MessageFormat&
    455 MessageFormat::operator=(const MessageFormat& that)
    456 {
    457     // Reallocate the arrays BEFORE changing this object
    458     if (this != &that &&
    459         allocateSubformats(that.subformatCount) &&
    460         allocateArgTypes(that.argTypeCount)) {
    461 
    462         // Calls the super class for assignment first.
    463         Format::operator=(that);
    464 
    465         fPattern = that.fPattern;
    466         setLocale(that.fLocale);
    467         isArgNumeric = that.isArgNumeric;
    468         int32_t j;
    469         for (j=0; j<subformatCount; ++j) {
    470             delete subformats[j].format;
    471         }
    472         subformatCount = 0;
    473 
    474         for (j=0; j<that.subformatCount; ++j) {
    475             // Subformat::operator= does NOT delete this.format
    476             subformats[j] = that.subformats[j];
    477         }
    478         subformatCount = that.subformatCount;
    479 
    480         for (j=0; j<that.argTypeCount; ++j) {
    481             argTypes[j] = that.argTypes[j];
    482         }
    483         argTypeCount = that.argTypeCount;
    484     }
    485     return *this;
    486 }
    487 
    488 UBool
    489 MessageFormat::operator==(const Format& rhs) const
    490 {
    491     if (this == &rhs) return TRUE;
    492 
    493     MessageFormat& that = (MessageFormat&)rhs;
    494 
    495     // Check class ID before checking MessageFormat members
    496     if (!Format::operator==(rhs) ||
    497         fPattern != that.fPattern ||
    498         fLocale != that.fLocale ||
    499         isArgNumeric != that.isArgNumeric) {
    500         return FALSE;
    501     }
    502 
    503     int32_t j;
    504     for (j=0; j<subformatCount; ++j) {
    505         if (subformats[j] != that.subformats[j]) {
    506             return FALSE;
    507         }
    508     }
    509 
    510     return TRUE;
    511 }
    512 
    513 // -------------------------------------
    514 // Creates a copy of this MessageFormat, the caller owns the copy.
    515 
    516 Format*
    517 MessageFormat::clone() const
    518 {
    519     return new MessageFormat(*this);
    520 }
    521 
    522 // -------------------------------------
    523 // Sets the locale of this MessageFormat object to theLocale.
    524 
    525 void
    526 MessageFormat::setLocale(const Locale& theLocale)
    527 {
    528     if (fLocale != theLocale) {
    529         delete defaultNumberFormat;
    530         defaultNumberFormat = NULL;
    531         delete defaultDateFormat;
    532         defaultDateFormat = NULL;
    533     }
    534     fLocale = theLocale;
    535     setLocaleIDs(fLocale.getName(), fLocale.getName());
    536 }
    537 
    538 // -------------------------------------
    539 // Gets the locale of this MessageFormat object.
    540 
    541 const Locale&
    542 MessageFormat::getLocale() const
    543 {
    544     return fLocale;
    545 }
    546 
    547 
    548 
    549 
    550 void
    551 MessageFormat::applyPattern(const UnicodeString& newPattern,
    552                             UErrorCode& status)
    553 {
    554     UParseError parseError;
    555     applyPattern(newPattern,parseError,status);
    556 }
    557 
    558 
    559 // -------------------------------------
    560 // Applies the new pattern and returns an error if the pattern
    561 // is not correct.
    562 void
    563 MessageFormat::applyPattern(const UnicodeString& pattern,
    564                             UParseError& parseError,
    565                             UErrorCode& ec)
    566 {
    567     if(U_FAILURE(ec)) {
    568         return;
    569     }
    570     // The pattern is broken up into segments.  Each time a subformat
    571     // is encountered, 4 segments are recorded.  For example, consider
    572     // the pattern:
    573     //  "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
    574     // The first set of segments is:
    575     //  segments[0] = "There "
    576     //  segments[1] = "0"
    577     //  segments[2] = "choice"
    578     //  segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
    579 
    580     // During parsing, the plain text is accumulated into segments[0].
    581     // Segments 1..3 are used to parse each subpattern.  Each time a
    582     // subpattern is parsed, it creates a format object that is stored
    583     // in the subformats array, together with an offset and argument
    584     // number.  The offset into the plain text stored in
    585     // segments[0].
    586 
    587     // Quotes in segment 0 are handled normally.  They are removed.
    588     // Quotes may not occur in segments 1 or 2.
    589     // Quotes in segment 3 are parsed and _copied_.  This makes
    590     //  subformat patterns work, e.g., {1,number,'#'.##} passes
    591     //  the pattern "'#'.##" to DecimalFormat.
    592 
    593     UnicodeString segments[4];
    594     int32_t part = 0; // segment we are in, 0..3
    595     // Record the highest argument number in the pattern.  (In the
    596     // subpattern {3,number} the argument number is 3.)
    597     int32_t formatNumber = 0;
    598     UBool inQuote = FALSE;
    599     int32_t braceStack = 0;
    600     // Clear error struct
    601     parseError.offset = -1;
    602     parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
    603     int32_t patLen = pattern.length();
    604     int32_t i;
    605 
    606     for (i=0; i<subformatCount; ++i) {
    607         delete subformats[i].format;
    608     }
    609     subformatCount = 0;
    610     argTypeCount = 0;
    611 
    612     for (i=0; i<patLen; ++i) {
    613         UChar ch = pattern[i];
    614         if (part == 0) {
    615             // In segment 0, recognize and remove quotes
    616             if (ch == SINGLE_QUOTE) {
    617                 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) {
    618                     segments[0] += ch;
    619                     ++i;
    620                 } else {
    621                     inQuote = !inQuote;
    622                 }
    623             } else if (ch == LEFT_CURLY_BRACE && !inQuote) {
    624                 // The only way we get from segment 0 to 1 is via an
    625                 // unquoted '{'.
    626                 part = 1;
    627             } else {
    628                 segments[0] += ch;
    629             }
    630         } else if (inQuote) {
    631             // In segments 1..3, recognize quoted matter, and copy it
    632             // into the segment, together with the quotes.  This takes
    633             // care of '' as well.
    634             segments[part] += ch;
    635             if (ch == SINGLE_QUOTE) {
    636                 inQuote = FALSE;
    637             }
    638         } else {
    639             // We have an unquoted character in segment 1..3
    640             switch (ch) {
    641             case COMMA:
    642                 // Commas bump us to the next segment, except for segment 3,
    643                 // which can contain commas.  See example above.
    644                 if (part < 3)
    645                     part += 1;
    646                 else
    647                     segments[3] += ch;
    648                 break;
    649             case LEFT_CURLY_BRACE:
    650                 // Handle '{' within segment 3.  The initial '{'
    651                 // before segment 1 is handled above.
    652                 if (part != 3) {
    653                     ec = U_PATTERN_SYNTAX_ERROR;
    654                     goto SYNTAX_ERROR;
    655                 }
    656                 ++braceStack;
    657                 segments[part] += ch;
    658                 break;
    659             case RIGHT_CURLY_BRACE:
    660                 if (braceStack == 0) {
    661                     makeFormat(formatNumber, segments, parseError,ec);
    662                     if (U_FAILURE(ec)){
    663                         goto SYNTAX_ERROR;
    664                     }
    665                     formatNumber++;
    666                     segments[1].remove();
    667                     segments[2].remove();
    668                     segments[3].remove();
    669                     part = 0;
    670                 } else {
    671                     --braceStack;
    672                     segments[part] += ch;
    673                 }
    674                 break;
    675             case SINGLE_QUOTE:
    676                 inQuote = TRUE;
    677                 // fall through (copy quote chars in segments 1..3)
    678             default:
    679                 segments[part] += ch;
    680                 break;
    681             }
    682         }
    683     }
    684     if (braceStack != 0 || part != 0) {
    685         // Unmatched braces in the pattern
    686         ec = U_UNMATCHED_BRACES;
    687         goto SYNTAX_ERROR;
    688     }
    689     fPattern = segments[0];
    690     return;
    691 
    692  SYNTAX_ERROR:
    693     syntaxError(pattern, i, parseError);
    694     for (i=0; i<subformatCount; ++i) {
    695         delete subformats[i].format;
    696     }
    697     argTypeCount = subformatCount = 0;
    698 }
    699 // -------------------------------------
    700 // Converts this MessageFormat instance to a pattern.
    701 
    702 UnicodeString&
    703 MessageFormat::toPattern(UnicodeString& appendTo) const {
    704     // later, make this more extensible
    705     int32_t lastOffset = 0;
    706     int32_t i;
    707     for (i=0; i<subformatCount; ++i) {
    708         copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo);
    709         lastOffset = subformats[i].offset;
    710         appendTo += LEFT_CURLY_BRACE;
    711         if (isArgNumeric) {
    712             itos(subformats[i].argNum, appendTo);
    713         }
    714         else {
    715             appendTo += *subformats[i].argName;
    716         }
    717         Format* fmt = subformats[i].format;
    718         if (fmt == NULL) {
    719             // do nothing, string format
    720         }
    721         else if (fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
    722 
    723             UErrorCode ec = U_ZERO_ERROR;
    724             NumberFormat& formatAlias = *(NumberFormat*)fmt;
    725             NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec);
    726             NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec);
    727             NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec);
    728             NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec);
    729 
    730             appendTo += COMMA;
    731             appendTo += ID_NUMBER;
    732             if (formatAlias != *defaultTemplate) {
    733                 appendTo += COMMA;
    734                 if (formatAlias == *currencyTemplate) {
    735                     appendTo += ID_CURRENCY;
    736                 }
    737                 else if (formatAlias == *percentTemplate) {
    738                     appendTo += ID_PERCENT;
    739                 }
    740                 else if (formatAlias == *integerTemplate) {
    741                     appendTo += ID_INTEGER;
    742                 }
    743                 else {
    744                     UnicodeString buffer;
    745                     appendTo += ((DecimalFormat*)fmt)->toPattern(buffer);
    746                 }
    747             }
    748 
    749             delete defaultTemplate;
    750             delete currencyTemplate;
    751             delete percentTemplate;
    752             delete integerTemplate;
    753         }
    754         else if (fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
    755             DateFormat& formatAlias = *(DateFormat*)fmt;
    756             DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale);
    757             DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale);
    758             DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale);
    759             DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale);
    760             DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale);
    761             DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale);
    762             DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale);
    763             DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale);
    764 
    765 
    766             appendTo += COMMA;
    767             if (formatAlias == *defaultDateTemplate) {
    768                 appendTo += ID_DATE;
    769             }
    770             else if (formatAlias == *shortDateTemplate) {
    771                 appendTo += ID_DATE;
    772                 appendTo += COMMA;
    773                 appendTo += ID_SHORT;
    774             }
    775             else if (formatAlias == *defaultDateTemplate) {
    776                 appendTo += ID_DATE;
    777                 appendTo += COMMA;
    778                 appendTo += ID_MEDIUM;
    779             }
    780             else if (formatAlias == *longDateTemplate) {
    781                 appendTo += ID_DATE;
    782                 appendTo += COMMA;
    783                 appendTo += ID_LONG;
    784             }
    785             else if (formatAlias == *fullDateTemplate) {
    786                 appendTo += ID_DATE;
    787                 appendTo += COMMA;
    788                 appendTo += ID_FULL;
    789             }
    790             else if (formatAlias == *defaultTimeTemplate) {
    791                 appendTo += ID_TIME;
    792             }
    793             else if (formatAlias == *shortTimeTemplate) {
    794                 appendTo += ID_TIME;
    795                 appendTo += COMMA;
    796                 appendTo += ID_SHORT;
    797             }
    798             else if (formatAlias == *defaultTimeTemplate) {
    799                 appendTo += ID_TIME;
    800                 appendTo += COMMA;
    801                 appendTo += ID_MEDIUM;
    802             }
    803             else if (formatAlias == *longTimeTemplate) {
    804                 appendTo += ID_TIME;
    805                 appendTo += COMMA;
    806                 appendTo += ID_LONG;
    807             }
    808             else if (formatAlias == *fullTimeTemplate) {
    809                 appendTo += ID_TIME;
    810                 appendTo += COMMA;
    811                 appendTo += ID_FULL;
    812             }
    813             else {
    814                 UnicodeString buffer;
    815                 appendTo += ID_DATE;
    816                 appendTo += COMMA;
    817                 appendTo += ((SimpleDateFormat*)fmt)->toPattern(buffer);
    818             }
    819 
    820             delete defaultDateTemplate;
    821             delete shortDateTemplate;
    822             delete longDateTemplate;
    823             delete fullDateTemplate;
    824             delete defaultTimeTemplate;
    825             delete shortTimeTemplate;
    826             delete longTimeTemplate;
    827             delete fullTimeTemplate;
    828             // {sfb} there should be a more efficient way to do this!
    829         }
    830         else if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID()) {
    831             UnicodeString buffer;
    832             appendTo += COMMA;
    833             appendTo += ID_CHOICE;
    834             appendTo += COMMA;
    835             appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer);
    836         }
    837         else if (fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) {
    838             UnicodeString buffer;
    839             appendTo += ((PluralFormat*)fmt)->toPattern(buffer);
    840         }
    841         else {
    842             //appendTo += ", unknown";
    843         }
    844         appendTo += RIGHT_CURLY_BRACE;
    845     }
    846     copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo);
    847     return appendTo;
    848 }
    849 
    850 // -------------------------------------
    851 // Adopts the new formats array and updates the array count.
    852 // This MessageFormat instance owns the new formats.
    853 
    854 void
    855 MessageFormat::adoptFormats(Format** newFormats,
    856                             int32_t count) {
    857     if (newFormats == NULL || count < 0) {
    858         return;
    859     }
    860 
    861     int32_t i;
    862     if (allocateSubformats(count)) {
    863         for (i=0; i<subformatCount; ++i) {
    864             delete subformats[i].format;
    865         }
    866         for (i=0; i<count; ++i) {
    867             subformats[i].format = newFormats[i];
    868         }
    869         subformatCount = count;
    870     } else {
    871         // An adopt method must always take ownership.  Delete
    872         // the incoming format objects and return unchanged.
    873         for (i=0; i<count; ++i) {
    874             delete newFormats[i];
    875         }
    876     }
    877 
    878     // TODO: What about the .offset and .argNum fields?
    879 }
    880 
    881 // -------------------------------------
    882 // Sets the new formats array and updates the array count.
    883 // This MessageFormat instance maks a copy of the new formats.
    884 
    885 void
    886 MessageFormat::setFormats(const Format** newFormats,
    887                           int32_t count) {
    888     if (newFormats == NULL || count < 0) {
    889         return;
    890     }
    891 
    892     if (allocateSubformats(count)) {
    893         int32_t i;
    894         for (i=0; i<subformatCount; ++i) {
    895             delete subformats[i].format;
    896         }
    897         subformatCount = 0;
    898 
    899         for (i=0; i<count; ++i) {
    900             subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL;
    901         }
    902         subformatCount = count;
    903     }
    904 
    905     // TODO: What about the .offset and .arg fields?
    906 }
    907 
    908 // -------------------------------------
    909 // Adopt a single format by format number.
    910 // Do nothing if the format number is not less than the array count.
    911 
    912 void
    913 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
    914     if (n < 0 || n >= subformatCount) {
    915         delete newFormat;
    916     } else {
    917         delete subformats[n].format;
    918         subformats[n].format = newFormat;
    919     }
    920 }
    921 
    922 // -------------------------------------
    923 // Adopt a single format by format name.
    924 // Do nothing if there is no match of formatName.
    925 void
    926 MessageFormat::adoptFormat(const UnicodeString& formatName,
    927                            Format* formatToAdopt,
    928                            UErrorCode& status) {
    929     if (isArgNumeric ) {
    930         int32_t argumentNumber = stou(formatName);
    931         if (argumentNumber<0) {
    932             status = U_ARGUMENT_TYPE_MISMATCH;
    933             return;
    934         }
    935         adoptFormat(argumentNumber, formatToAdopt);
    936         return;
    937     }
    938     for (int32_t i=0; i<subformatCount; ++i) {
    939         if (formatName==*subformats[i].argName) {
    940             delete subformats[i].format;
    941             if ( formatToAdopt== NULL) {
    942                 // This should never happen -- but we'll be nice if it does
    943                 subformats[i].format = NULL;
    944             } else {
    945                 subformats[i].format = formatToAdopt;
    946             }
    947         }
    948     }
    949 }
    950 
    951 // -------------------------------------
    952 // Set a single format.
    953 // Do nothing if the variable is not less than the array count.
    954 
    955 void
    956 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
    957     if (n >= 0 && n < subformatCount) {
    958         delete subformats[n].format;
    959         if (&newFormat == NULL) {
    960             // This should never happen -- but we'll be nice if it does
    961             subformats[n].format = NULL;
    962         } else {
    963             subformats[n].format = newFormat.clone();
    964         }
    965     }
    966 }
    967 
    968 // -------------------------------------
    969 // Get a single format by format name.
    970 // Do nothing if the variable is not less than the array count.
    971 Format *
    972 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
    973 
    974     if (U_FAILURE(status)) return NULL;
    975 
    976     if (isArgNumeric ) {
    977         int32_t argumentNumber = stou(formatName);
    978         if (argumentNumber<0) {
    979             status = U_ARGUMENT_TYPE_MISMATCH;
    980             return NULL;
    981         }
    982         if (argumentNumber < 0 || argumentNumber >= subformatCount) {
    983             return subformats[argumentNumber].format;
    984         }
    985         else {
    986             return NULL;
    987         }
    988     }
    989 
    990     for (int32_t i=0; i<subformatCount; ++i) {
    991         if (formatName==*subformats[i].argName)
    992         {
    993             return subformats[i].format;
    994         }
    995     }
    996     return NULL;
    997 }
    998 
    999 // -------------------------------------
   1000 // Set a single format by format name
   1001 // Do nothing if the variable is not less than the array count.
   1002 void
   1003 MessageFormat::setFormat(const UnicodeString& formatName,
   1004                          const Format& newFormat,
   1005                          UErrorCode& status) {
   1006     if (isArgNumeric) {
   1007         status = U_ARGUMENT_TYPE_MISMATCH;
   1008         return;
   1009     }
   1010     for (int32_t i=0; i<subformatCount; ++i) {
   1011         if (formatName==*subformats[i].argName)
   1012         {
   1013             delete subformats[i].format;
   1014             if (&newFormat == NULL) {
   1015                 // This should never happen -- but we'll be nice if it does
   1016                 subformats[i].format = NULL;
   1017             } else {
   1018                 subformats[i].format = newFormat.clone();
   1019             }
   1020             break;
   1021         }
   1022     }
   1023 }
   1024 
   1025 // -------------------------------------
   1026 // Gets the format array.
   1027 
   1028 const Format**
   1029 MessageFormat::getFormats(int32_t& cnt) const
   1030 {
   1031     // This old API returns an array (which we hold) of Format*
   1032     // pointers.  The array is valid up to the next call to any
   1033     // method on this object.  We construct and resize an array
   1034     // on demand that contains aliases to the subformats[i].format
   1035     // pointers.
   1036     MessageFormat* t = (MessageFormat*) this;
   1037     cnt = 0;
   1038     if (formatAliases == NULL) {
   1039         t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount;
   1040         Format** a = (Format**)
   1041             uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
   1042         if (a == NULL) {
   1043             return NULL;
   1044         }
   1045         t->formatAliases = a;
   1046     } else if (subformatCount > formatAliasesCapacity) {
   1047         Format** a = (Format**)
   1048             uprv_realloc(formatAliases, sizeof(Format*) * subformatCount);
   1049         if (a == NULL) {
   1050             return NULL;
   1051         }
   1052         t->formatAliases = a;
   1053         t->formatAliasesCapacity = subformatCount;
   1054     }
   1055     for (int32_t i=0; i<subformatCount; ++i) {
   1056         t->formatAliases[i] = subformats[i].format;
   1057     }
   1058     cnt = subformatCount;
   1059     return (const Format**)formatAliases;
   1060 }
   1061 
   1062 
   1063 StringEnumeration*
   1064 MessageFormat::getFormatNames(UErrorCode& status) {
   1065     if (U_FAILURE(status))  return NULL;
   1066 
   1067     if (isArgNumeric) {
   1068         status = U_ARGUMENT_TYPE_MISMATCH;
   1069         return NULL;
   1070     }
   1071     UVector *fFormatNames = new UVector(status);
   1072     if (U_FAILURE(status)) {
   1073         status = U_MEMORY_ALLOCATION_ERROR;
   1074         return NULL;
   1075     }
   1076     for (int32_t i=0; i<subformatCount; ++i) {
   1077         fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status);
   1078     }
   1079 
   1080     StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
   1081     return nameEnumerator;
   1082 }
   1083 
   1084 // -------------------------------------
   1085 // Formats the source Formattable array and copy into the result buffer.
   1086 // Ignore the FieldPosition result for error checking.
   1087 
   1088 UnicodeString&
   1089 MessageFormat::format(const Formattable* source,
   1090                       int32_t cnt,
   1091                       UnicodeString& appendTo,
   1092                       FieldPosition& ignore,
   1093                       UErrorCode& success) const
   1094 {
   1095     if (U_FAILURE(success))
   1096         return appendTo;
   1097 
   1098     return format(source, cnt, appendTo, ignore, 0, success);
   1099 }
   1100 
   1101 // -------------------------------------
   1102 // Internally creates a MessageFormat instance based on the
   1103 // pattern and formats the arguments Formattable array and
   1104 // copy into the appendTo buffer.
   1105 
   1106 UnicodeString&
   1107 MessageFormat::format(  const UnicodeString& pattern,
   1108                         const Formattable* arguments,
   1109                         int32_t cnt,
   1110                         UnicodeString& appendTo,
   1111                         UErrorCode& success)
   1112 {
   1113     MessageFormat temp(pattern, success);
   1114     FieldPosition ignore(0);
   1115     temp.format(arguments, cnt, appendTo, ignore, success);
   1116     return appendTo;
   1117 }
   1118 
   1119 // -------------------------------------
   1120 // Formats the source Formattable object and copy into the
   1121 // appendTo buffer.  The Formattable object must be an array
   1122 // of Formattable instances, returns error otherwise.
   1123 
   1124 UnicodeString&
   1125 MessageFormat::format(const Formattable& source,
   1126                       UnicodeString& appendTo,
   1127                       FieldPosition& ignore,
   1128                       UErrorCode& success) const
   1129 {
   1130     int32_t cnt;
   1131 
   1132     if (U_FAILURE(success))
   1133         return appendTo;
   1134     if (source.getType() != Formattable::kArray) {
   1135         success = U_ILLEGAL_ARGUMENT_ERROR;
   1136         return appendTo;
   1137     }
   1138     const Formattable* tmpPtr = source.getArray(cnt);
   1139 
   1140     return format(tmpPtr, cnt, appendTo, ignore, 0, success);
   1141 }
   1142 
   1143 
   1144 UnicodeString&
   1145 MessageFormat::format(const UnicodeString* argumentNames,
   1146                       const Formattable* arguments,
   1147                       int32_t count,
   1148                       UnicodeString& appendTo,
   1149                       UErrorCode& success) const {
   1150     FieldPosition ignore(0);
   1151     return format(arguments, argumentNames, count, appendTo, ignore, 0, success);
   1152 }
   1153 
   1154 UnicodeString&
   1155 MessageFormat::format(const Formattable* arguments,
   1156                       int32_t cnt,
   1157                       UnicodeString& appendTo,
   1158                       FieldPosition& status,
   1159                       int32_t recursionProtection,
   1160                       UErrorCode& success) const
   1161 {
   1162     return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
   1163 }
   1164 
   1165 // -------------------------------------
   1166 // Formats the arguments Formattable array and copy into the appendTo buffer.
   1167 // Ignore the FieldPosition result for error checking.
   1168 
   1169 UnicodeString&
   1170 MessageFormat::format(const Formattable* arguments,
   1171                       const UnicodeString *argumentNames,
   1172                       int32_t cnt,
   1173                       UnicodeString& appendTo,
   1174                       FieldPosition& status,
   1175                       int32_t recursionProtection,
   1176                       UErrorCode& success) const
   1177 {
   1178     int32_t lastOffset = 0;
   1179     int32_t argumentNumber=0;
   1180     if (cnt < 0 || (cnt && arguments == NULL)) {
   1181         success = U_ILLEGAL_ARGUMENT_ERROR;
   1182         return appendTo;
   1183     }
   1184 
   1185     if ( !isArgNumeric && argumentNames== NULL ) {
   1186         success = U_ILLEGAL_ARGUMENT_ERROR;
   1187         return appendTo;
   1188     }
   1189 
   1190     const Formattable *obj=NULL;
   1191     for (int32_t i=0; i<subformatCount; ++i) {
   1192         // Append the prefix of current format element.
   1193         appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset);
   1194         lastOffset = subformats[i].offset;
   1195         obj = NULL;
   1196         if (isArgNumeric) {
   1197             argumentNumber = subformats[i].argNum;
   1198 
   1199             // Checks the scope of the argument number.
   1200             if (argumentNumber >= cnt) {
   1201                 appendTo += LEFT_CURLY_BRACE;
   1202                 itos(argumentNumber, appendTo);
   1203                 appendTo += RIGHT_CURLY_BRACE;
   1204                 continue;
   1205             }
   1206             obj = arguments+argumentNumber;
   1207         }
   1208         else {
   1209             for (int32_t j=0; j<cnt; ++j) {
   1210                 if (argumentNames[j]== *subformats[i].argName ) {
   1211                     obj = arguments+j;
   1212                     break;
   1213                 }
   1214             }
   1215             if (obj == NULL ) {
   1216                 appendTo += LEFT_CURLY_BRACE;
   1217                 appendTo += *subformats[i].argName;
   1218                 appendTo += RIGHT_CURLY_BRACE;
   1219                 continue;
   1220 
   1221             }
   1222         }
   1223         Formattable::Type type = obj->getType();
   1224 
   1225         // Recursively calling the format process only if the current
   1226         // format argument refers to a ChoiceFormat object.
   1227         Format* fmt = subformats[i].format;
   1228         if (fmt != NULL) {
   1229             UnicodeString argNum;
   1230             fmt->format(*obj, argNum, success);
   1231 
   1232             // Needs to reprocess the ChoiceFormat option by using the
   1233             // MessageFormat pattern application.
   1234             if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() ||
   1235                  fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) &&
   1236                 argNum.indexOf(LEFT_CURLY_BRACE) >= 0) {
   1237                 MessageFormat temp(argNum, fLocale, success);
   1238                 // TODO: Implement recursion protection
   1239                 if ( isArgNumeric ) {
   1240                     temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
   1241                 }
   1242                 else {
   1243                     temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success);
   1244                 }
   1245                 if (U_FAILURE(success)) {
   1246                     return appendTo;
   1247                 }
   1248             }
   1249             else {
   1250                 appendTo += argNum;
   1251             }
   1252         }
   1253         // If the obj data type is a number, use a NumberFormat instance.
   1254         else if ((type == Formattable::kDouble) ||
   1255                  (type == Formattable::kLong) ||
   1256                  (type == Formattable::kInt64)) {
   1257 
   1258             const NumberFormat* nf = getDefaultNumberFormat(success);
   1259             if (nf == NULL) {
   1260                 return appendTo;
   1261             }
   1262             if (type == Formattable::kDouble) {
   1263                 nf->format(obj->getDouble(), appendTo);
   1264             } else if (type == Formattable::kLong) {
   1265                 nf->format(obj->getLong(), appendTo);
   1266             } else {
   1267                 nf->format(obj->getInt64(), appendTo);
   1268             }
   1269         }
   1270         // If the obj data type is a Date instance, use a DateFormat instance.
   1271         else if (type == Formattable::kDate) {
   1272             const DateFormat* df = getDefaultDateFormat(success);
   1273             if (df == NULL) {
   1274                 return appendTo;
   1275             }
   1276             df->format(obj->getDate(), appendTo);
   1277         }
   1278         else if (type == Formattable::kString) {
   1279             appendTo += obj->getString();
   1280         }
   1281         else {
   1282             success = U_ILLEGAL_ARGUMENT_ERROR;
   1283             return appendTo;
   1284         }
   1285     }
   1286     // Appends the rest of the pattern characters after the real last offset.
   1287     appendTo.append(fPattern, lastOffset, 0x7fffffff);
   1288     return appendTo;
   1289 }
   1290 
   1291 
   1292 // -------------------------------------
   1293 // Parses the source pattern and returns the Formattable objects array,
   1294 // the array count and the ending parse position.  The caller of this method
   1295 // owns the array.
   1296 
   1297 Formattable*
   1298 MessageFormat::parse(const UnicodeString& source,
   1299                      ParsePosition& pos,
   1300                      int32_t& count) const
   1301 {
   1302     // Allocate at least one element.  Allocating an array of length
   1303     // zero causes problems on some platforms (e.g. Win32).
   1304     Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1];
   1305     int32_t patternOffset = 0;
   1306     int32_t sourceOffset = pos.getIndex();
   1307     ParsePosition tempPos(0);
   1308     count = 0; // {sfb} reset to zero
   1309     int32_t len;
   1310     // If resultArray could not be created, exit out.
   1311     // Avoid crossing initialization of variables above.
   1312     if (resultArray == NULL) {
   1313         goto PARSE_ERROR;
   1314     }
   1315     for (int32_t i = 0; i < subformatCount; ++i) {
   1316         // match up to format
   1317         len = subformats[i].offset - patternOffset;
   1318         if (len == 0 ||
   1319             fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
   1320             sourceOffset += len;
   1321             patternOffset += len;
   1322         }
   1323         else {
   1324             goto PARSE_ERROR;
   1325         }
   1326 
   1327         // now use format
   1328         Format* fmt = subformats[i].format;
   1329         int32_t argNum = subformats[i].argNum;
   1330         if (fmt == NULL) {   // string format
   1331             // if at end, use longest possible match
   1332             // otherwise uses first match to intervening string
   1333             // does NOT recursively try all possibilities
   1334             int32_t tempLength = (i+1<subformatCount) ?
   1335                 subformats[i+1].offset : fPattern.length();
   1336 
   1337             int32_t next;
   1338             if (patternOffset >= tempLength) {
   1339                 next = source.length();
   1340             }
   1341             else {
   1342                 UnicodeString buffer;
   1343                 fPattern.extract(patternOffset,tempLength - patternOffset, buffer);
   1344                 next = source.indexOf(buffer, sourceOffset);
   1345             }
   1346 
   1347             if (next < 0) {
   1348                 goto PARSE_ERROR;
   1349             }
   1350             else {
   1351                 UnicodeString buffer;
   1352                 source.extract(sourceOffset,next - sourceOffset, buffer);
   1353                 UnicodeString strValue = buffer;
   1354                 UnicodeString temp(LEFT_CURLY_BRACE);
   1355                 // {sfb} check this later
   1356                 if (isArgNumeric) {
   1357                     itos(argNum, temp);
   1358                 }
   1359                 else {
   1360                     temp+=(*subformats[i].argName);
   1361                 }
   1362                 temp += RIGHT_CURLY_BRACE;
   1363                 if (strValue != temp) {
   1364                     source.extract(sourceOffset,next - sourceOffset, buffer);
   1365                     resultArray[argNum].setString(buffer);
   1366                     // {sfb} not sure about this
   1367                     if ((argNum + 1) > count) {
   1368                         count = argNum + 1;
   1369                     }
   1370                 }
   1371                 sourceOffset = next;
   1372             }
   1373         }
   1374         else {
   1375             tempPos.setIndex(sourceOffset);
   1376             fmt->parseObject(source, resultArray[argNum], tempPos);
   1377             if (tempPos.getIndex() == sourceOffset) {
   1378                 goto PARSE_ERROR;
   1379             }
   1380 
   1381             if ((argNum + 1) > count) {
   1382                 count = argNum + 1;
   1383             }
   1384             sourceOffset = tempPos.getIndex(); // update
   1385         }
   1386     }
   1387     len = fPattern.length() - patternOffset;
   1388     if (len == 0 ||
   1389         fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
   1390         pos.setIndex(sourceOffset + len);
   1391         return resultArray;
   1392     }
   1393     // else fall through...
   1394 
   1395  PARSE_ERROR:
   1396     pos.setErrorIndex(sourceOffset);
   1397     delete [] resultArray;
   1398     count = 0;
   1399     return NULL; // leave index as is to signal error
   1400 }
   1401 
   1402 // -------------------------------------
   1403 // Parses the source string and returns the array of
   1404 // Formattable objects and the array count.  The caller
   1405 // owns the returned array.
   1406 
   1407 Formattable*
   1408 MessageFormat::parse(const UnicodeString& source,
   1409                      int32_t& cnt,
   1410                      UErrorCode& success) const
   1411 {
   1412     if (!isArgNumeric ) {
   1413         success = U_ARGUMENT_TYPE_MISMATCH;
   1414         return NULL;
   1415     }
   1416     ParsePosition status(0);
   1417     // Calls the actual implementation method and starts
   1418     // from zero offset of the source text.
   1419     Formattable* result = parse(source, status, cnt);
   1420     if (status.getIndex() == 0) {
   1421         success = U_MESSAGE_PARSE_ERROR;
   1422         delete[] result;
   1423         return NULL;
   1424     }
   1425     return result;
   1426 }
   1427 
   1428 // -------------------------------------
   1429 // Parses the source text and copy into the result buffer.
   1430 
   1431 void
   1432 MessageFormat::parseObject( const UnicodeString& source,
   1433                             Formattable& result,
   1434                             ParsePosition& status) const
   1435 {
   1436     int32_t cnt = 0;
   1437     Formattable* tmpResult = parse(source, status, cnt);
   1438     if (tmpResult != NULL)
   1439         result.adoptArray(tmpResult, cnt);
   1440 }
   1441 
   1442 UnicodeString
   1443 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
   1444   UnicodeString result;
   1445   if (U_SUCCESS(status)) {
   1446     int32_t plen = pattern.length();
   1447     const UChar* pat = pattern.getBuffer();
   1448     int32_t blen = plen * 2 + 1; // space for null termination, convenience
   1449     UChar* buf = result.getBuffer(blen);
   1450     if (buf == NULL) {
   1451       status = U_MEMORY_ALLOCATION_ERROR;
   1452     } else {
   1453       int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
   1454       result.releaseBuffer(U_SUCCESS(status) ? len : 0);
   1455     }
   1456   }
   1457   if (U_FAILURE(status)) {
   1458     result.setToBogus();
   1459   }
   1460   return result;
   1461 }
   1462 
   1463 // -------------------------------------
   1464 
   1465 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
   1466     RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
   1467     if (fmt == NULL) {
   1468         ec = U_MEMORY_ALLOCATION_ERROR;
   1469     } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
   1470         UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
   1471         fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
   1472     }
   1473     return fmt;
   1474 }
   1475 
   1476 /**
   1477  * Reads the segments[] array (see applyPattern()) and parses the
   1478  * segments[1..3] into a Format* object.  Stores the format object in
   1479  * the subformats[] array.  Updates the argTypes[] array type
   1480  * information for the corresponding argument.
   1481  *
   1482  * @param formatNumber index into subformats[] for this format
   1483  * @param segments array of strings with the parsed pattern segments
   1484  * @param parseError parse error data (output param)
   1485  * @param ec error code
   1486  */
   1487 void
   1488 MessageFormat::makeFormat(int32_t formatNumber,
   1489                           UnicodeString* segments,
   1490                           UParseError& parseError,
   1491                           UErrorCode& ec) {
   1492     if (U_FAILURE(ec)) {
   1493         return;
   1494     }
   1495 
   1496     // Parse the argument number
   1497     int32_t argumentNumber = stou(segments[1]); // always unlocalized!
   1498     UnicodeString argumentName;
   1499     if (argumentNumber < 0) {
   1500         if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) {
   1501             ec = U_INVALID_FORMAT_ERROR;
   1502             return;
   1503         }
   1504         isArgNumeric = FALSE;
   1505         argumentNumber=formatNumber;
   1506     }
   1507     if (!isArgNumeric) {
   1508         if ( !isLegalArgName(segments[1]) ) {
   1509             ec = U_INVALID_FORMAT_ERROR;
   1510             return;
   1511         }
   1512         argumentName = segments[1];
   1513     }
   1514 
   1515     // Parse the format, recording the argument type and creating a
   1516     // new Format object (except for string arguments).
   1517     Formattable::Type argType;
   1518     Format *fmt = NULL;
   1519     int32_t typeID, styleID;
   1520     DateFormat::EStyle style;
   1521     UnicodeString unquotedPattern, quotedPattern;
   1522     UBool inQuote = FALSE;
   1523 
   1524     switch (typeID = findKeyword(segments[2], TYPE_IDS)) {
   1525 
   1526     case 0: // string
   1527         argType = Formattable::kString;
   1528         break;
   1529 
   1530     case 1: // number
   1531         argType = Formattable::kDouble;
   1532 
   1533         switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) {
   1534         case 0: // default
   1535             fmt = NumberFormat::createInstance(fLocale, ec);
   1536             break;
   1537         case 1: // currency
   1538             fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
   1539             break;
   1540         case 2: // percent
   1541             fmt = NumberFormat::createPercentInstance(fLocale, ec);
   1542             break;
   1543         case 3: // integer
   1544             argType = Formattable::kLong;
   1545             fmt = createIntegerFormat(fLocale, ec);
   1546             break;
   1547         default: // pattern
   1548             fmt = NumberFormat::createInstance(fLocale, ec);
   1549             if (fmt &&
   1550                 fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
   1551                 ((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec);
   1552             }
   1553             break;
   1554         }
   1555         break;
   1556 
   1557     case 2: // date
   1558     case 3: // time
   1559         argType = Formattable::kDate;
   1560         styleID = findKeyword(segments[3], DATE_STYLE_IDS);
   1561         style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
   1562 
   1563         if (typeID == 2) {
   1564             fmt = DateFormat::createDateInstance(style, fLocale);
   1565         } else {
   1566             fmt = DateFormat::createTimeInstance(style, fLocale);
   1567         }
   1568 
   1569         if (styleID < 0 &&
   1570             fmt != NULL &&
   1571             fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
   1572             ((SimpleDateFormat*)fmt)->applyPattern(segments[3]);
   1573         }
   1574         break;
   1575 
   1576     case 4: // choice
   1577         argType = Formattable::kDouble;
   1578 
   1579         fmt = new ChoiceFormat(segments[3], parseError, ec);
   1580         break;
   1581 
   1582     case 5: // spellout
   1583         argType = Formattable::kDouble;
   1584         fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec);
   1585         break;
   1586     case 6: // ordinal
   1587         argType = Formattable::kDouble;
   1588         fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec);
   1589         break;
   1590     case 7: // duration
   1591         argType = Formattable::kDouble;
   1592         fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec);
   1593         break;
   1594     case 8: // plural
   1595         argType = Formattable::kDouble;
   1596         quotedPattern = segments[3];
   1597         for (int32_t i = 0; i < quotedPattern.length(); ++i) {
   1598             UChar ch = quotedPattern.charAt(i);
   1599             if (ch == SINGLE_QUOTE) {
   1600                 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) {
   1601                     unquotedPattern+=ch;
   1602                     ++i;
   1603                 }
   1604                 else {
   1605                     inQuote = !inQuote;
   1606                 }
   1607             }
   1608             else {
   1609                 unquotedPattern += ch;
   1610             }
   1611         }
   1612         fmt = new PluralFormat(fLocale, unquotedPattern, ec);
   1613         break;
   1614     default:
   1615         argType = Formattable::kString;
   1616         ec = U_ILLEGAL_ARGUMENT_ERROR;
   1617         break;
   1618     }
   1619 
   1620     if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) {
   1621         ec = U_MEMORY_ALLOCATION_ERROR;
   1622     }
   1623 
   1624     if (!allocateSubformats(formatNumber+1) ||
   1625         !allocateArgTypes(argumentNumber+1)) {
   1626         ec = U_MEMORY_ALLOCATION_ERROR;
   1627     }
   1628 
   1629     if (U_FAILURE(ec)) {
   1630         delete fmt;
   1631         return;
   1632     }
   1633 
   1634     // Parse succeeded; record results in our arrays
   1635     subformats[formatNumber].format = fmt;
   1636     subformats[formatNumber].offset = segments[0].length();
   1637     if (isArgNumeric) {
   1638         subformats[formatNumber].argName = NULL;
   1639         subformats[formatNumber].argNum = argumentNumber;
   1640     }
   1641     else {
   1642         subformats[formatNumber].argName = new UnicodeString(argumentName);
   1643         subformats[formatNumber].argNum = -1;
   1644     }
   1645     subformatCount = formatNumber+1;
   1646 
   1647     // Careful here: argumentNumber may in general arrive out of
   1648     // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
   1649     argTypes[argumentNumber] = argType;
   1650     if (argumentNumber+1 > argTypeCount) {
   1651         argTypeCount = argumentNumber+1;
   1652     }
   1653 }
   1654 
   1655 // -------------------------------------
   1656 // Finds the string, s, in the string array, list.
   1657 int32_t MessageFormat::findKeyword(const UnicodeString& s,
   1658                                    const UChar * const *list)
   1659 {
   1660     if (s.length() == 0)
   1661         return 0; // default
   1662 
   1663     UnicodeString buffer = s;
   1664     // Trims the space characters and turns all characters
   1665     // in s to lower case.
   1666     buffer.trim().toLower("");
   1667     for (int32_t i = 0; list[i]; ++i) {
   1668         if (!buffer.compare(list[i], u_strlen(list[i]))) {
   1669             return i;
   1670         }
   1671     }
   1672     return -1;
   1673 }
   1674 
   1675 // -------------------------------------
   1676 // Checks the range of the source text to quote the special
   1677 // characters, { and ' and copy to target buffer.
   1678 
   1679 void
   1680 MessageFormat::copyAndFixQuotes(const UnicodeString& source,
   1681                                 int32_t start,
   1682                                 int32_t end,
   1683                                 UnicodeString& appendTo)
   1684 {
   1685     UBool gotLB = FALSE;
   1686 
   1687     for (int32_t i = start; i < end; ++i) {
   1688         UChar ch = source[i];
   1689         if (ch == LEFT_CURLY_BRACE) {
   1690             appendTo += SINGLE_QUOTE;
   1691             appendTo += LEFT_CURLY_BRACE;
   1692             appendTo += SINGLE_QUOTE;
   1693             gotLB = TRUE;
   1694         }
   1695         else if (ch == RIGHT_CURLY_BRACE) {
   1696             if(gotLB) {
   1697                 appendTo += RIGHT_CURLY_BRACE;
   1698                 gotLB = FALSE;
   1699             }
   1700             else {
   1701                 // orig code.
   1702                 appendTo += SINGLE_QUOTE;
   1703                 appendTo += RIGHT_CURLY_BRACE;
   1704                 appendTo += SINGLE_QUOTE;
   1705             }
   1706         }
   1707         else if (ch == SINGLE_QUOTE) {
   1708             appendTo += SINGLE_QUOTE;
   1709             appendTo += SINGLE_QUOTE;
   1710         }
   1711         else {
   1712             appendTo += ch;
   1713         }
   1714     }
   1715 }
   1716 
   1717 /**
   1718  * Convenience method that ought to be in NumberFormat
   1719  */
   1720 NumberFormat*
   1721 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
   1722     NumberFormat *temp = NumberFormat::createInstance(locale, status);
   1723     if (temp != NULL && temp->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
   1724         DecimalFormat *temp2 = (DecimalFormat*) temp;
   1725         temp2->setMaximumFractionDigits(0);
   1726         temp2->setDecimalSeparatorAlwaysShown(FALSE);
   1727         temp2->setParseIntegerOnly(TRUE);
   1728     }
   1729 
   1730     return temp;
   1731 }
   1732 
   1733 /**
   1734  * Return the default number format.  Used to format a numeric
   1735  * argument when subformats[i].format is NULL.  Returns NULL
   1736  * on failure.
   1737  *
   1738  * Semantically const but may modify *this.
   1739  */
   1740 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
   1741     if (defaultNumberFormat == NULL) {
   1742         MessageFormat* t = (MessageFormat*) this;
   1743         t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
   1744         if (U_FAILURE(ec)) {
   1745             delete t->defaultNumberFormat;
   1746             t->defaultNumberFormat = NULL;
   1747         } else if (t->defaultNumberFormat == NULL) {
   1748             ec = U_MEMORY_ALLOCATION_ERROR;
   1749         }
   1750     }
   1751     return defaultNumberFormat;
   1752 }
   1753 
   1754 /**
   1755  * Return the default date format.  Used to format a date
   1756  * argument when subformats[i].format is NULL.  Returns NULL
   1757  * on failure.
   1758  *
   1759  * Semantically const but may modify *this.
   1760  */
   1761 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
   1762     if (defaultDateFormat == NULL) {
   1763         MessageFormat* t = (MessageFormat*) this;
   1764         t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
   1765         if (t->defaultDateFormat == NULL) {
   1766             ec = U_MEMORY_ALLOCATION_ERROR;
   1767         }
   1768     }
   1769     return defaultDateFormat;
   1770 }
   1771 
   1772 UBool
   1773 MessageFormat::usesNamedArguments() const {
   1774     return !isArgNumeric;
   1775 }
   1776 
   1777 UBool
   1778 MessageFormat::isLegalArgName(const UnicodeString& argName) const {
   1779     if(!u_hasBinaryProperty(argName.charAt(0), idStart)) {
   1780         return FALSE;
   1781     }
   1782     for (int32_t i=1; i<argName.length(); ++i) {
   1783         if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) {
   1784             return FALSE;
   1785         }
   1786     }
   1787     return TRUE;
   1788 }
   1789 
   1790 int32_t
   1791 MessageFormat::getArgTypeCount() const {
   1792         return argTypeCount;
   1793 }
   1794 
   1795 
   1796 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
   1797     pos=0;
   1798     fFormatNames = fNameList;
   1799 }
   1800 
   1801 const UnicodeString*
   1802 FormatNameEnumeration::snext(UErrorCode& status) {
   1803     if (U_SUCCESS(status) && pos < fFormatNames->size()) {
   1804         return (const UnicodeString*)fFormatNames->elementAt(pos++);
   1805     }
   1806     return NULL;
   1807 }
   1808 
   1809 void
   1810 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
   1811     pos=0;
   1812 }
   1813 
   1814 int32_t
   1815 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
   1816        return (fFormatNames==NULL) ? 0 : fFormatNames->size();
   1817 }
   1818 
   1819 FormatNameEnumeration::~FormatNameEnumeration() {
   1820     UnicodeString *s;
   1821     for (int32_t i=0; i<fFormatNames->size(); ++i) {
   1822         if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) {
   1823             delete s;
   1824         }
   1825     }
   1826     delete fFormatNames;
   1827 }
   1828 
   1829 
   1830 U_NAMESPACE_END
   1831 
   1832 #endif /* #if !UCONFIG_NO_FORMATTING */
   1833 
   1834 //eof
   1835