Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1997-2012, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 *
      7 * File CHOICFMT.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   02/19/97    aliu        Converted from java.
     13 *   03/20/97    helena      Finished first cut of implementation and got rid
     14 *                           of nextDouble/previousDouble and replaced with
     15 *                           boolean array.
     16 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
     17 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
     18 *                           wchar.h.
     19 *   07/09/97    helena      Made ParsePosition into a class.
     20 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
     21 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
     22 *   02/22/99    stephen     Removed character literals for EBCDIC safety
     23 ********************************************************************************
     24 */
     25 
     26 #include "unicode/utypes.h"
     27 
     28 #if !UCONFIG_NO_FORMATTING
     29 
     30 #include "unicode/choicfmt.h"
     31 #include "unicode/numfmt.h"
     32 #include "unicode/locid.h"
     33 #include "cpputils.h"
     34 #include "cstring.h"
     35 #include "messageimpl.h"
     36 #include "putilimp.h"
     37 #include "uassert.h"
     38 #include <stdio.h>
     39 #include <float.h>
     40 
     41 // *****************************************************************************
     42 // class ChoiceFormat
     43 // *****************************************************************************
     44 
     45 U_NAMESPACE_BEGIN
     46 
     47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
     48 
     49 // Special characters used by ChoiceFormat.  There are two characters
     50 // used interchangeably to indicate <=.  Either is parsed, but only
     51 // LESS_EQUAL is generated by toPattern().
     52 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
     53 #define LESS_THAN    ((UChar)0x003C)   /*<*/
     54 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
     55 #define LESS_EQUAL2  ((UChar)0x2264)
     56 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
     57 #define MINUS        ((UChar)0x002D)   /*-*/
     58 
     59 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
     60 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
     61 
     62 #ifdef INFINITY
     63 #undef INFINITY
     64 #endif
     65 #define INFINITY     ((UChar)0x221E)
     66 
     67 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
     68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
     69 #define POSITIVE_INF_STRLEN 1
     70 #define NEGATIVE_INF_STRLEN 2
     71 
     72 // -------------------------------------
     73 // Creates a ChoiceFormat instance based on the pattern.
     74 
     75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
     76                            UErrorCode& status)
     77 : constructorErrorCode(status),
     78   msgPattern(status)
     79 {
     80     applyPattern(newPattern, status);
     81 }
     82 
     83 // -------------------------------------
     84 // Creates a ChoiceFormat instance with the limit array and
     85 // format strings for each limit.
     86 
     87 ChoiceFormat::ChoiceFormat(const double* limits,
     88                            const UnicodeString* formats,
     89                            int32_t cnt )
     90 : constructorErrorCode(U_ZERO_ERROR),
     91   msgPattern(constructorErrorCode)
     92 {
     93     setChoices(limits, NULL, formats, cnt, constructorErrorCode);
     94 }
     95 
     96 // -------------------------------------
     97 
     98 ChoiceFormat::ChoiceFormat(const double* limits,
     99                            const UBool* closures,
    100                            const UnicodeString* formats,
    101                            int32_t cnt )
    102 : constructorErrorCode(U_ZERO_ERROR),
    103   msgPattern(constructorErrorCode)
    104 {
    105     setChoices(limits, closures, formats, cnt, constructorErrorCode);
    106 }
    107 
    108 // -------------------------------------
    109 // copy constructor
    110 
    111 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
    112 : NumberFormat(that),
    113   constructorErrorCode(that.constructorErrorCode),
    114   msgPattern(that.msgPattern)
    115 {
    116 }
    117 
    118 // -------------------------------------
    119 // Private constructor that creates a
    120 // ChoiceFormat instance based on the
    121 // pattern and populates UParseError
    122 
    123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
    124                            UParseError& parseError,
    125                            UErrorCode& status)
    126 : constructorErrorCode(status),
    127   msgPattern(status)
    128 {
    129     applyPattern(newPattern,parseError, status);
    130 }
    131 // -------------------------------------
    132 
    133 UBool
    134 ChoiceFormat::operator==(const Format& that) const
    135 {
    136     if (this == &that) return TRUE;
    137     if (!NumberFormat::operator==(that)) return FALSE;
    138     ChoiceFormat& thatAlias = (ChoiceFormat&)that;
    139     return msgPattern == thatAlias.msgPattern;
    140 }
    141 
    142 // -------------------------------------
    143 // copy constructor
    144 
    145 const ChoiceFormat&
    146 ChoiceFormat::operator=(const   ChoiceFormat& that)
    147 {
    148     if (this != &that) {
    149         NumberFormat::operator=(that);
    150         constructorErrorCode = that.constructorErrorCode;
    151         msgPattern = that.msgPattern;
    152     }
    153     return *this;
    154 }
    155 
    156 // -------------------------------------
    157 
    158 ChoiceFormat::~ChoiceFormat()
    159 {
    160 }
    161 
    162 // -------------------------------------
    163 
    164 /**
    165  * Convert a double value to a string without the overhead of NumberFormat.
    166  */
    167 UnicodeString&
    168 ChoiceFormat::dtos(double value,
    169                    UnicodeString& string)
    170 {
    171     /* Buffer to contain the digits and any extra formatting stuff. */
    172     char temp[DBL_DIG + 16];
    173     char *itrPtr = temp;
    174     char *expPtr;
    175 
    176     sprintf(temp, "%.*g", DBL_DIG, value);
    177 
    178     /* Find and convert the decimal point.
    179        Using setlocale on some machines will cause sprintf to use a comma for certain locales.
    180     */
    181     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
    182         itrPtr++;
    183     }
    184     if (*itrPtr != 0 && *itrPtr != 'e') {
    185         /* We reached something that looks like a decimal point.
    186         In case someone used setlocale(), which changes the decimal point. */
    187         *itrPtr = '.';
    188         itrPtr++;
    189     }
    190     /* Search for the exponent */
    191     while (*itrPtr && *itrPtr != 'e') {
    192         itrPtr++;
    193     }
    194     if (*itrPtr == 'e') {
    195         itrPtr++;
    196         /* Verify the exponent sign */
    197         if (*itrPtr == '+' || *itrPtr == '-') {
    198             itrPtr++;
    199         }
    200         /* Remove leading zeros. You will see this on Windows machines. */
    201         expPtr = itrPtr;
    202         while (*itrPtr == '0') {
    203             itrPtr++;
    204         }
    205         if (*itrPtr && expPtr != itrPtr) {
    206             /* Shift the exponent without zeros. */
    207             while (*itrPtr) {
    208                 *(expPtr++)  = *(itrPtr++);
    209             }
    210             // NULL terminate
    211             *expPtr = 0;
    212         }
    213     }
    214 
    215     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
    216     return string;
    217 }
    218 
    219 // -------------------------------------
    220 // calls the overloaded applyPattern method.
    221 
    222 void
    223 ChoiceFormat::applyPattern(const UnicodeString& pattern,
    224                            UErrorCode& status)
    225 {
    226     msgPattern.parseChoiceStyle(pattern, NULL, status);
    227     constructorErrorCode = status;
    228 }
    229 
    230 // -------------------------------------
    231 // Applies the pattern to this ChoiceFormat instance.
    232 
    233 void
    234 ChoiceFormat::applyPattern(const UnicodeString& pattern,
    235                            UParseError& parseError,
    236                            UErrorCode& status)
    237 {
    238     msgPattern.parseChoiceStyle(pattern, &parseError, status);
    239     constructorErrorCode = status;
    240 }
    241 // -------------------------------------
    242 // Returns the input pattern string.
    243 
    244 UnicodeString&
    245 ChoiceFormat::toPattern(UnicodeString& result) const
    246 {
    247     return result = msgPattern.getPatternString();
    248 }
    249 
    250 // -------------------------------------
    251 // Sets the limit and format arrays.
    252 void
    253 ChoiceFormat::setChoices(  const double* limits,
    254                            const UnicodeString* formats,
    255                            int32_t cnt )
    256 {
    257     UErrorCode errorCode = U_ZERO_ERROR;
    258     setChoices(limits, NULL, formats, cnt, errorCode);
    259 }
    260 
    261 // -------------------------------------
    262 // Sets the limit and format arrays.
    263 void
    264 ChoiceFormat::setChoices(  const double* limits,
    265                            const UBool* closures,
    266                            const UnicodeString* formats,
    267                            int32_t cnt )
    268 {
    269     UErrorCode errorCode = U_ZERO_ERROR;
    270     setChoices(limits, closures, formats, cnt, errorCode);
    271 }
    272 
    273 void
    274 ChoiceFormat::setChoices(const double* limits,
    275                          const UBool* closures,
    276                          const UnicodeString* formats,
    277                          int32_t count,
    278                          UErrorCode &errorCode) {
    279     if (U_FAILURE(errorCode)) {
    280         return;
    281     }
    282     if (limits == NULL || formats == NULL) {
    283         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    284         return;
    285     }
    286     // Reconstruct the original input pattern.
    287     // Modified version of the pre-ICU 4.8 toPattern() implementation.
    288     UnicodeString result;
    289     for (int32_t i = 0; i < count; ++i) {
    290         if (i != 0) {
    291             result += VERTICAL_BAR;
    292         }
    293         UnicodeString buf;
    294         if (uprv_isPositiveInfinity(limits[i])) {
    295             result += INFINITY;
    296         } else if (uprv_isNegativeInfinity(limits[i])) {
    297             result += MINUS;
    298             result += INFINITY;
    299         } else {
    300             result += dtos(limits[i], buf);
    301         }
    302         if (closures != NULL && closures[i]) {
    303             result += LESS_THAN;
    304         } else {
    305             result += LESS_EQUAL;
    306         }
    307         // Append formats[i], using quotes if there are special
    308         // characters.  Single quotes themselves must be escaped in
    309         // either case.
    310         const UnicodeString& text = formats[i];
    311         int32_t textLength = text.length();
    312         int32_t nestingLevel = 0;
    313         for (int32_t j = 0; j < textLength; ++j) {
    314             UChar c = text[j];
    315             if (c == SINGLE_QUOTE && nestingLevel == 0) {
    316                 // Double each top-level apostrophe.
    317                 result.append(c);
    318             } else if (c == VERTICAL_BAR && nestingLevel == 0) {
    319                 // Surround each pipe symbol with apostrophes for quoting.
    320                 // If the next character is an apostrophe, then that will be doubled,
    321                 // and although the parser will see the apostrophe pairs beginning
    322                 // and ending one character earlier than our doubling, the result
    323                 // is as desired.
    324                 //   | -> '|'
    325                 //   |' -> '|'''
    326                 //   |'' -> '|''''' etc.
    327                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
    328                 continue;  // Skip the append(c) at the end of the loop body.
    329             } else if (c == LEFT_CURLY_BRACE) {
    330                 ++nestingLevel;
    331             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
    332                 --nestingLevel;
    333             }
    334             result.append(c);
    335         }
    336     }
    337     // Apply the reconstructed pattern.
    338     applyPattern(result, errorCode);
    339 }
    340 
    341 // -------------------------------------
    342 // Gets the limit array.
    343 
    344 const double*
    345 ChoiceFormat::getLimits(int32_t& cnt) const
    346 {
    347     cnt = 0;
    348     return NULL;
    349 }
    350 
    351 // -------------------------------------
    352 // Gets the closures array.
    353 
    354 const UBool*
    355 ChoiceFormat::getClosures(int32_t& cnt) const
    356 {
    357     cnt = 0;
    358     return NULL;
    359 }
    360 
    361 // -------------------------------------
    362 // Gets the format array.
    363 
    364 const UnicodeString*
    365 ChoiceFormat::getFormats(int32_t& cnt) const
    366 {
    367     cnt = 0;
    368     return NULL;
    369 }
    370 
    371 // -------------------------------------
    372 // Formats an int64 number, it's actually formatted as
    373 // a double.  The returned format string may differ
    374 // from the input number because of this.
    375 
    376 UnicodeString&
    377 ChoiceFormat::format(int64_t number,
    378                      UnicodeString& appendTo,
    379                      FieldPosition& status) const
    380 {
    381     return format((double) number, appendTo, status);
    382 }
    383 
    384 // -------------------------------------
    385 // Formats an int32_t number, it's actually formatted as
    386 // a double.
    387 
    388 UnicodeString&
    389 ChoiceFormat::format(int32_t number,
    390                      UnicodeString& appendTo,
    391                      FieldPosition& status) const
    392 {
    393     return format((double) number, appendTo, status);
    394 }
    395 
    396 // -------------------------------------
    397 // Formats a double number.
    398 
    399 UnicodeString&
    400 ChoiceFormat::format(double number,
    401                      UnicodeString& appendTo,
    402                      FieldPosition& /*pos*/) const
    403 {
    404     if (msgPattern.countParts() == 0) {
    405         // No pattern was applied, or it failed.
    406         return appendTo;
    407     }
    408     // Get the appropriate sub-message.
    409     int32_t msgStart = findSubMessage(msgPattern, 0, number);
    410     if (!MessageImpl::jdkAposMode(msgPattern)) {
    411         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
    412         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
    413         appendTo.append(msgPattern.getPatternString(),
    414                         patternStart,
    415                         msgPattern.getPatternIndex(msgLimit) - patternStart);
    416         return appendTo;
    417     }
    418     // JDK compatibility mode: Remove SKIP_SYNTAX.
    419     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
    420 }
    421 
    422 int32_t
    423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
    424     int32_t count = pattern.countParts();
    425     int32_t msgStart;
    426     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
    427     // until ARG_LIMIT or end of choice-only pattern.
    428     // Ignore the first number and selector and start the loop on the first message.
    429     partIndex += 2;
    430     for (;;) {
    431         // Skip but remember the current sub-message.
    432         msgStart = partIndex;
    433         partIndex = pattern.getLimitPartIndex(partIndex);
    434         if (++partIndex >= count) {
    435             // Reached the end of the choice-only pattern.
    436             // Return with the last sub-message.
    437             break;
    438         }
    439         const MessagePattern::Part &part = pattern.getPart(partIndex++);
    440         UMessagePatternPartType type = part.getType();
    441         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
    442             // Reached the end of the ChoiceFormat style.
    443             // Return with the last sub-message.
    444             break;
    445         }
    446         // part is an ARG_INT or ARG_DOUBLE
    447         U_ASSERT(MessagePattern::Part::hasNumericValue(type));
    448         double boundary = pattern.getNumericValue(part);
    449         // Fetch the ARG_SELECTOR character.
    450         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
    451         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
    452         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
    453             // The number is in the interval between the previous boundary and the current one.
    454             // Return with the sub-message between them.
    455             // The !(a>b) and !(a>=b) comparisons are equivalent to
    456             // (a<=b) and (a<b) except they "catch" NaN.
    457             break;
    458         }
    459     }
    460     return msgStart;
    461 }
    462 
    463 // -------------------------------------
    464 // Formats an array of objects. Checks if the data type of the objects
    465 // to get the right value for formatting.
    466 
    467 UnicodeString&
    468 ChoiceFormat::format(const Formattable* objs,
    469                      int32_t cnt,
    470                      UnicodeString& appendTo,
    471                      FieldPosition& pos,
    472                      UErrorCode& status) const
    473 {
    474     if(cnt < 0) {
    475         status = U_ILLEGAL_ARGUMENT_ERROR;
    476         return appendTo;
    477     }
    478     if (msgPattern.countParts() == 0) {
    479         status = U_INVALID_STATE_ERROR;
    480         return appendTo;
    481     }
    482 
    483     for (int32_t i = 0; i < cnt; i++) {
    484         double objDouble = objs[i].getDouble(status);
    485         if (U_SUCCESS(status)) {
    486             format(objDouble, appendTo, pos);
    487         }
    488     }
    489 
    490     return appendTo;
    491 }
    492 
    493 // -------------------------------------
    494 // Formats an array of objects. Checks if the data type of the objects
    495 // to get the right value for formatting.
    496 
    497 UnicodeString&
    498 ChoiceFormat::format(const Formattable& obj,
    499                      UnicodeString& appendTo,
    500                      FieldPosition& pos,
    501                      UErrorCode& status) const
    502 {
    503     return NumberFormat::format(obj, appendTo, pos, status);
    504 }
    505 // -------------------------------------
    506 
    507 void
    508 ChoiceFormat::parse(const UnicodeString& text,
    509                     Formattable& result,
    510                     ParsePosition& pos) const
    511 {
    512     result.setDouble(parseArgument(msgPattern, 0, text, pos));
    513 }
    514 
    515 double
    516 ChoiceFormat::parseArgument(
    517         const MessagePattern &pattern, int32_t partIndex,
    518         const UnicodeString &source, ParsePosition &pos) {
    519     // find the best number (defined as the one with the longest parse)
    520     int32_t start = pos.getIndex();
    521     int32_t furthest = start;
    522     double bestNumber = uprv_getNaN();
    523     double tempNumber = 0.0;
    524     int32_t count = pattern.countParts();
    525     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
    526         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
    527         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
    528         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
    529         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
    530         if (len >= 0) {
    531             int32_t newIndex = start + len;
    532             if (newIndex > furthest) {
    533                 furthest = newIndex;
    534                 bestNumber = tempNumber;
    535                 if (furthest == source.length()) {
    536                     break;
    537                 }
    538             }
    539         }
    540         partIndex = msgLimit + 1;
    541     }
    542     if (furthest == start) {
    543         pos.setErrorIndex(start);
    544     } else {
    545         pos.setIndex(furthest);
    546     }
    547     return bestNumber;
    548 }
    549 
    550 int32_t
    551 ChoiceFormat::matchStringUntilLimitPart(
    552         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
    553         const UnicodeString &source, int32_t sourceOffset) {
    554     int32_t matchingSourceLength = 0;
    555     const UnicodeString &msgString = pattern.getPatternString();
    556     int32_t prevIndex = pattern.getPart(partIndex).getLimit();
    557     for (;;) {
    558         const MessagePattern::Part &part = pattern.getPart(++partIndex);
    559         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
    560             int32_t index = part.getIndex();
    561             int32_t length = index - prevIndex;
    562             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
    563                 return -1;  // mismatch
    564             }
    565             matchingSourceLength += length;
    566             if (partIndex == limitPartIndex) {
    567                 return matchingSourceLength;
    568             }
    569             prevIndex = part.getLimit();  // SKIP_SYNTAX
    570         }
    571     }
    572 }
    573 
    574 // -------------------------------------
    575 // Parses the text and return the Formattable object.
    576 
    577 void
    578 ChoiceFormat::parse(const UnicodeString& text,
    579                     Formattable& result,
    580                     UErrorCode& status) const
    581 {
    582     NumberFormat::parse(text, result, status);
    583 }
    584 
    585 // -------------------------------------
    586 
    587 Format*
    588 ChoiceFormat::clone() const
    589 {
    590     ChoiceFormat *aCopy = new ChoiceFormat(*this);
    591     return aCopy;
    592 }
    593 
    594 U_NAMESPACE_END
    595 
    596 #endif /* #if !UCONFIG_NO_FORMATTING */
    597 
    598 //eof
    599