Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1997-2013, International Business Machines Corporation and    *
      6 * others. All Rights Reserved.                                                *
      7 *******************************************************************************
      8 *
      9 * File CHOICFMT.CPP
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   02/19/97    aliu        Converted from java.
     15 *   03/20/97    helena      Finished first cut of implementation and got rid
     16 *                           of nextDouble/previousDouble and replaced with
     17 *                           boolean array.
     18 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
     19 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
     20 *                           wchar.h.
     21 *   07/09/97    helena      Made ParsePosition into a class.
     22 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
     23 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
     24 *   02/22/99    stephen     Removed character literals for EBCDIC safety
     25 ********************************************************************************
     26 */
     27 
     28 #include "unicode/utypes.h"
     29 
     30 #if !UCONFIG_NO_FORMATTING
     31 
     32 #include "unicode/choicfmt.h"
     33 #include "unicode/numfmt.h"
     34 #include "unicode/locid.h"
     35 #include "cpputils.h"
     36 #include "cstring.h"
     37 #include "messageimpl.h"
     38 #include "putilimp.h"
     39 #include "uassert.h"
     40 #include <stdio.h>
     41 #include <float.h>
     42 
     43 // *****************************************************************************
     44 // class ChoiceFormat
     45 // *****************************************************************************
     46 
     47 U_NAMESPACE_BEGIN
     48 
     49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
     50 
     51 // Special characters used by ChoiceFormat.  There are two characters
     52 // used interchangeably to indicate <=.  Either is parsed, but only
     53 // LESS_EQUAL is generated by toPattern().
     54 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
     55 #define LESS_THAN    ((UChar)0x003C)   /*<*/
     56 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
     57 #define LESS_EQUAL2  ((UChar)0x2264)
     58 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
     59 #define MINUS        ((UChar)0x002D)   /*-*/
     60 
     61 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
     62 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
     63 
     64 #ifdef INFINITY
     65 #undef INFINITY
     66 #endif
     67 #define INFINITY     ((UChar)0x221E)
     68 
     69 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
     70 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
     71 #define POSITIVE_INF_STRLEN 1
     72 #define NEGATIVE_INF_STRLEN 2
     73 
     74 // -------------------------------------
     75 // Creates a ChoiceFormat instance based on the pattern.
     76 
     77 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
     78                            UErrorCode& status)
     79 : constructorErrorCode(status),
     80   msgPattern(status)
     81 {
     82     applyPattern(newPattern, status);
     83 }
     84 
     85 // -------------------------------------
     86 // Creates a ChoiceFormat instance with the limit array and
     87 // format strings for each limit.
     88 
     89 ChoiceFormat::ChoiceFormat(const double* limits,
     90                            const UnicodeString* formats,
     91                            int32_t cnt )
     92 : constructorErrorCode(U_ZERO_ERROR),
     93   msgPattern(constructorErrorCode)
     94 {
     95     setChoices(limits, NULL, formats, cnt, constructorErrorCode);
     96 }
     97 
     98 // -------------------------------------
     99 
    100 ChoiceFormat::ChoiceFormat(const double* limits,
    101                            const UBool* closures,
    102                            const UnicodeString* formats,
    103                            int32_t cnt )
    104 : constructorErrorCode(U_ZERO_ERROR),
    105   msgPattern(constructorErrorCode)
    106 {
    107     setChoices(limits, closures, formats, cnt, constructorErrorCode);
    108 }
    109 
    110 // -------------------------------------
    111 // copy constructor
    112 
    113 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
    114 : NumberFormat(that),
    115   constructorErrorCode(that.constructorErrorCode),
    116   msgPattern(that.msgPattern)
    117 {
    118 }
    119 
    120 // -------------------------------------
    121 // Private constructor that creates a
    122 // ChoiceFormat instance based on the
    123 // pattern and populates UParseError
    124 
    125 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
    126                            UParseError& parseError,
    127                            UErrorCode& status)
    128 : constructorErrorCode(status),
    129   msgPattern(status)
    130 {
    131     applyPattern(newPattern,parseError, status);
    132 }
    133 // -------------------------------------
    134 
    135 UBool
    136 ChoiceFormat::operator==(const Format& that) const
    137 {
    138     if (this == &that) return TRUE;
    139     if (!NumberFormat::operator==(that)) return FALSE;
    140     ChoiceFormat& thatAlias = (ChoiceFormat&)that;
    141     return msgPattern == thatAlias.msgPattern;
    142 }
    143 
    144 // -------------------------------------
    145 // copy constructor
    146 
    147 const ChoiceFormat&
    148 ChoiceFormat::operator=(const   ChoiceFormat& that)
    149 {
    150     if (this != &that) {
    151         NumberFormat::operator=(that);
    152         constructorErrorCode = that.constructorErrorCode;
    153         msgPattern = that.msgPattern;
    154     }
    155     return *this;
    156 }
    157 
    158 // -------------------------------------
    159 
    160 ChoiceFormat::~ChoiceFormat()
    161 {
    162 }
    163 
    164 // -------------------------------------
    165 
    166 /**
    167  * Convert a double value to a string without the overhead of NumberFormat.
    168  */
    169 UnicodeString&
    170 ChoiceFormat::dtos(double value,
    171                    UnicodeString& string)
    172 {
    173     /* Buffer to contain the digits and any extra formatting stuff. */
    174     char temp[DBL_DIG + 16];
    175     char *itrPtr = temp;
    176     char *expPtr;
    177 
    178     sprintf(temp, "%.*g", DBL_DIG, value);
    179 
    180     /* Find and convert the decimal point.
    181        Using setlocale on some machines will cause sprintf to use a comma for certain locales.
    182     */
    183     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
    184         itrPtr++;
    185     }
    186     if (*itrPtr != 0 && *itrPtr != 'e') {
    187         /* We reached something that looks like a decimal point.
    188         In case someone used setlocale(), which changes the decimal point. */
    189         *itrPtr = '.';
    190         itrPtr++;
    191     }
    192     /* Search for the exponent */
    193     while (*itrPtr && *itrPtr != 'e') {
    194         itrPtr++;
    195     }
    196     if (*itrPtr == 'e') {
    197         itrPtr++;
    198         /* Verify the exponent sign */
    199         if (*itrPtr == '+' || *itrPtr == '-') {
    200             itrPtr++;
    201         }
    202         /* Remove leading zeros. You will see this on Windows machines. */
    203         expPtr = itrPtr;
    204         while (*itrPtr == '0') {
    205             itrPtr++;
    206         }
    207         if (*itrPtr && expPtr != itrPtr) {
    208             /* Shift the exponent without zeros. */
    209             while (*itrPtr) {
    210                 *(expPtr++)  = *(itrPtr++);
    211             }
    212             // NULL terminate
    213             *expPtr = 0;
    214         }
    215     }
    216 
    217     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
    218     return string;
    219 }
    220 
    221 // -------------------------------------
    222 // calls the overloaded applyPattern method.
    223 
    224 void
    225 ChoiceFormat::applyPattern(const UnicodeString& pattern,
    226                            UErrorCode& status)
    227 {
    228     msgPattern.parseChoiceStyle(pattern, NULL, status);
    229     constructorErrorCode = status;
    230 }
    231 
    232 // -------------------------------------
    233 // Applies the pattern to this ChoiceFormat instance.
    234 
    235 void
    236 ChoiceFormat::applyPattern(const UnicodeString& pattern,
    237                            UParseError& parseError,
    238                            UErrorCode& status)
    239 {
    240     msgPattern.parseChoiceStyle(pattern, &parseError, status);
    241     constructorErrorCode = status;
    242 }
    243 // -------------------------------------
    244 // Returns the input pattern string.
    245 
    246 UnicodeString&
    247 ChoiceFormat::toPattern(UnicodeString& result) const
    248 {
    249     return result = msgPattern.getPatternString();
    250 }
    251 
    252 // -------------------------------------
    253 // Sets the limit and format arrays.
    254 void
    255 ChoiceFormat::setChoices(  const double* limits,
    256                            const UnicodeString* formats,
    257                            int32_t cnt )
    258 {
    259     UErrorCode errorCode = U_ZERO_ERROR;
    260     setChoices(limits, NULL, formats, cnt, errorCode);
    261 }
    262 
    263 // -------------------------------------
    264 // Sets the limit and format arrays.
    265 void
    266 ChoiceFormat::setChoices(  const double* limits,
    267                            const UBool* closures,
    268                            const UnicodeString* formats,
    269                            int32_t cnt )
    270 {
    271     UErrorCode errorCode = U_ZERO_ERROR;
    272     setChoices(limits, closures, formats, cnt, errorCode);
    273 }
    274 
    275 void
    276 ChoiceFormat::setChoices(const double* limits,
    277                          const UBool* closures,
    278                          const UnicodeString* formats,
    279                          int32_t count,
    280                          UErrorCode &errorCode) {
    281     if (U_FAILURE(errorCode)) {
    282         return;
    283     }
    284     if (limits == NULL || formats == NULL) {
    285         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    286         return;
    287     }
    288     // Reconstruct the original input pattern.
    289     // Modified version of the pre-ICU 4.8 toPattern() implementation.
    290     UnicodeString result;
    291     for (int32_t i = 0; i < count; ++i) {
    292         if (i != 0) {
    293             result += VERTICAL_BAR;
    294         }
    295         UnicodeString buf;
    296         if (uprv_isPositiveInfinity(limits[i])) {
    297             result += INFINITY;
    298         } else if (uprv_isNegativeInfinity(limits[i])) {
    299             result += MINUS;
    300             result += INFINITY;
    301         } else {
    302             result += dtos(limits[i], buf);
    303         }
    304         if (closures != NULL && closures[i]) {
    305             result += LESS_THAN;
    306         } else {
    307             result += LESS_EQUAL;
    308         }
    309         // Append formats[i], using quotes if there are special
    310         // characters.  Single quotes themselves must be escaped in
    311         // either case.
    312         const UnicodeString& text = formats[i];
    313         int32_t textLength = text.length();
    314         int32_t nestingLevel = 0;
    315         for (int32_t j = 0; j < textLength; ++j) {
    316             UChar c = text[j];
    317             if (c == SINGLE_QUOTE && nestingLevel == 0) {
    318                 // Double each top-level apostrophe.
    319                 result.append(c);
    320             } else if (c == VERTICAL_BAR && nestingLevel == 0) {
    321                 // Surround each pipe symbol with apostrophes for quoting.
    322                 // If the next character is an apostrophe, then that will be doubled,
    323                 // and although the parser will see the apostrophe pairs beginning
    324                 // and ending one character earlier than our doubling, the result
    325                 // is as desired.
    326                 //   | -> '|'
    327                 //   |' -> '|'''
    328                 //   |'' -> '|''''' etc.
    329                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
    330                 continue;  // Skip the append(c) at the end of the loop body.
    331             } else if (c == LEFT_CURLY_BRACE) {
    332                 ++nestingLevel;
    333             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
    334                 --nestingLevel;
    335             }
    336             result.append(c);
    337         }
    338     }
    339     // Apply the reconstructed pattern.
    340     applyPattern(result, errorCode);
    341 }
    342 
    343 // -------------------------------------
    344 // Gets the limit array.
    345 
    346 const double*
    347 ChoiceFormat::getLimits(int32_t& cnt) const
    348 {
    349     cnt = 0;
    350     return NULL;
    351 }
    352 
    353 // -------------------------------------
    354 // Gets the closures array.
    355 
    356 const UBool*
    357 ChoiceFormat::getClosures(int32_t& cnt) const
    358 {
    359     cnt = 0;
    360     return NULL;
    361 }
    362 
    363 // -------------------------------------
    364 // Gets the format array.
    365 
    366 const UnicodeString*
    367 ChoiceFormat::getFormats(int32_t& cnt) const
    368 {
    369     cnt = 0;
    370     return NULL;
    371 }
    372 
    373 // -------------------------------------
    374 // Formats an int64 number, it's actually formatted as
    375 // a double.  The returned format string may differ
    376 // from the input number because of this.
    377 
    378 UnicodeString&
    379 ChoiceFormat::format(int64_t number,
    380                      UnicodeString& appendTo,
    381                      FieldPosition& status) const
    382 {
    383     return format((double) number, appendTo, status);
    384 }
    385 
    386 // -------------------------------------
    387 // Formats an int32_t number, it's actually formatted as
    388 // a double.
    389 
    390 UnicodeString&
    391 ChoiceFormat::format(int32_t number,
    392                      UnicodeString& appendTo,
    393                      FieldPosition& status) const
    394 {
    395     return format((double) number, appendTo, status);
    396 }
    397 
    398 // -------------------------------------
    399 // Formats a double number.
    400 
    401 UnicodeString&
    402 ChoiceFormat::format(double number,
    403                      UnicodeString& appendTo,
    404                      FieldPosition& /*pos*/) const
    405 {
    406     if (msgPattern.countParts() == 0) {
    407         // No pattern was applied, or it failed.
    408         return appendTo;
    409     }
    410     // Get the appropriate sub-message.
    411     int32_t msgStart = findSubMessage(msgPattern, 0, number);
    412     if (!MessageImpl::jdkAposMode(msgPattern)) {
    413         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
    414         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
    415         appendTo.append(msgPattern.getPatternString(),
    416                         patternStart,
    417                         msgPattern.getPatternIndex(msgLimit) - patternStart);
    418         return appendTo;
    419     }
    420     // JDK compatibility mode: Remove SKIP_SYNTAX.
    421     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
    422 }
    423 
    424 int32_t
    425 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
    426     int32_t count = pattern.countParts();
    427     int32_t msgStart;
    428     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
    429     // until ARG_LIMIT or end of choice-only pattern.
    430     // Ignore the first number and selector and start the loop on the first message.
    431     partIndex += 2;
    432     for (;;) {
    433         // Skip but remember the current sub-message.
    434         msgStart = partIndex;
    435         partIndex = pattern.getLimitPartIndex(partIndex);
    436         if (++partIndex >= count) {
    437             // Reached the end of the choice-only pattern.
    438             // Return with the last sub-message.
    439             break;
    440         }
    441         const MessagePattern::Part &part = pattern.getPart(partIndex++);
    442         UMessagePatternPartType type = part.getType();
    443         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
    444             // Reached the end of the ChoiceFormat style.
    445             // Return with the last sub-message.
    446             break;
    447         }
    448         // part is an ARG_INT or ARG_DOUBLE
    449         U_ASSERT(MessagePattern::Part::hasNumericValue(type));
    450         double boundary = pattern.getNumericValue(part);
    451         // Fetch the ARG_SELECTOR character.
    452         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
    453         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
    454         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
    455             // The number is in the interval between the previous boundary and the current one.
    456             // Return with the sub-message between them.
    457             // The !(a>b) and !(a>=b) comparisons are equivalent to
    458             // (a<=b) and (a<b) except they "catch" NaN.
    459             break;
    460         }
    461     }
    462     return msgStart;
    463 }
    464 
    465 // -------------------------------------
    466 // Formats an array of objects. Checks if the data type of the objects
    467 // to get the right value for formatting.
    468 
    469 UnicodeString&
    470 ChoiceFormat::format(const Formattable* objs,
    471                      int32_t cnt,
    472                      UnicodeString& appendTo,
    473                      FieldPosition& pos,
    474                      UErrorCode& status) const
    475 {
    476     if(cnt < 0) {
    477         status = U_ILLEGAL_ARGUMENT_ERROR;
    478         return appendTo;
    479     }
    480     if (msgPattern.countParts() == 0) {
    481         status = U_INVALID_STATE_ERROR;
    482         return appendTo;
    483     }
    484 
    485     for (int32_t i = 0; i < cnt; i++) {
    486         double objDouble = objs[i].getDouble(status);
    487         if (U_SUCCESS(status)) {
    488             format(objDouble, appendTo, pos);
    489         }
    490     }
    491 
    492     return appendTo;
    493 }
    494 
    495 // -------------------------------------
    496 
    497 void
    498 ChoiceFormat::parse(const UnicodeString& text,
    499                     Formattable& result,
    500                     ParsePosition& pos) const
    501 {
    502     result.setDouble(parseArgument(msgPattern, 0, text, pos));
    503 }
    504 
    505 double
    506 ChoiceFormat::parseArgument(
    507         const MessagePattern &pattern, int32_t partIndex,
    508         const UnicodeString &source, ParsePosition &pos) {
    509     // find the best number (defined as the one with the longest parse)
    510     int32_t start = pos.getIndex();
    511     int32_t furthest = start;
    512     double bestNumber = uprv_getNaN();
    513     double tempNumber = 0.0;
    514     int32_t count = pattern.countParts();
    515     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
    516         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
    517         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
    518         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
    519         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
    520         if (len >= 0) {
    521             int32_t newIndex = start + len;
    522             if (newIndex > furthest) {
    523                 furthest = newIndex;
    524                 bestNumber = tempNumber;
    525                 if (furthest == source.length()) {
    526                     break;
    527                 }
    528             }
    529         }
    530         partIndex = msgLimit + 1;
    531     }
    532     if (furthest == start) {
    533         pos.setErrorIndex(start);
    534     } else {
    535         pos.setIndex(furthest);
    536     }
    537     return bestNumber;
    538 }
    539 
    540 int32_t
    541 ChoiceFormat::matchStringUntilLimitPart(
    542         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
    543         const UnicodeString &source, int32_t sourceOffset) {
    544     int32_t matchingSourceLength = 0;
    545     const UnicodeString &msgString = pattern.getPatternString();
    546     int32_t prevIndex = pattern.getPart(partIndex).getLimit();
    547     for (;;) {
    548         const MessagePattern::Part &part = pattern.getPart(++partIndex);
    549         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
    550             int32_t index = part.getIndex();
    551             int32_t length = index - prevIndex;
    552             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
    553                 return -1;  // mismatch
    554             }
    555             matchingSourceLength += length;
    556             if (partIndex == limitPartIndex) {
    557                 return matchingSourceLength;
    558             }
    559             prevIndex = part.getLimit();  // SKIP_SYNTAX
    560         }
    561     }
    562 }
    563 
    564 // -------------------------------------
    565 
    566 Format*
    567 ChoiceFormat::clone() const
    568 {
    569     ChoiceFormat *aCopy = new ChoiceFormat(*this);
    570     return aCopy;
    571 }
    572 
    573 U_NAMESPACE_END
    574 
    575 #endif /* #if !UCONFIG_NO_FORMATTING */
    576 
    577 //eof
    578