Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2009-2015, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 * File PLURFMT.CPP
      8 *******************************************************************************
      9 */
     10 
     11 #include "unicode/decimfmt.h"
     12 #include "unicode/messagepattern.h"
     13 #include "unicode/plurfmt.h"
     14 #include "unicode/plurrule.h"
     15 #include "unicode/utypes.h"
     16 #include "cmemory.h"
     17 #include "messageimpl.h"
     18 #include "nfrule.h"
     19 #include "plurrule_impl.h"
     20 #include "uassert.h"
     21 #include "uhash.h"
     22 #include "precision.h"
     23 #include "visibledigits.h"
     24 
     25 #if !UCONFIG_NO_FORMATTING
     26 
     27 U_NAMESPACE_BEGIN
     28 
     29 static const UChar OTHER_STRING[] = {
     30     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
     31 };
     32 
     33 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
     34 
     35 PluralFormat::PluralFormat(UErrorCode& status)
     36         : locale(Locale::getDefault()),
     37           msgPattern(status),
     38           numberFormat(NULL),
     39           offset(0) {
     40     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     41 }
     42 
     43 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
     44         : locale(loc),
     45           msgPattern(status),
     46           numberFormat(NULL),
     47           offset(0) {
     48     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     49 }
     50 
     51 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
     52         : locale(Locale::getDefault()),
     53           msgPattern(status),
     54           numberFormat(NULL),
     55           offset(0) {
     56     init(&rules, UPLURAL_TYPE_COUNT, status);
     57 }
     58 
     59 PluralFormat::PluralFormat(const Locale& loc,
     60                            const PluralRules& rules,
     61                            UErrorCode& status)
     62         : locale(loc),
     63           msgPattern(status),
     64           numberFormat(NULL),
     65           offset(0) {
     66     init(&rules, UPLURAL_TYPE_COUNT, status);
     67 }
     68 
     69 PluralFormat::PluralFormat(const Locale& loc,
     70                            UPluralType type,
     71                            UErrorCode& status)
     72         : locale(loc),
     73           msgPattern(status),
     74           numberFormat(NULL),
     75           offset(0) {
     76     init(NULL, type, status);
     77 }
     78 
     79 PluralFormat::PluralFormat(const UnicodeString& pat,
     80                            UErrorCode& status)
     81         : locale(Locale::getDefault()),
     82           msgPattern(status),
     83           numberFormat(NULL),
     84           offset(0) {
     85     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     86     applyPattern(pat, status);
     87 }
     88 
     89 PluralFormat::PluralFormat(const Locale& loc,
     90                            const UnicodeString& pat,
     91                            UErrorCode& status)
     92         : locale(loc),
     93           msgPattern(status),
     94           numberFormat(NULL),
     95           offset(0) {
     96     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     97     applyPattern(pat, status);
     98 }
     99 
    100 PluralFormat::PluralFormat(const PluralRules& rules,
    101                            const UnicodeString& pat,
    102                            UErrorCode& status)
    103         : locale(Locale::getDefault()),
    104           msgPattern(status),
    105           numberFormat(NULL),
    106           offset(0) {
    107     init(&rules, UPLURAL_TYPE_COUNT, status);
    108     applyPattern(pat, status);
    109 }
    110 
    111 PluralFormat::PluralFormat(const Locale& loc,
    112                            const PluralRules& rules,
    113                            const UnicodeString& pat,
    114                            UErrorCode& status)
    115         : locale(loc),
    116           msgPattern(status),
    117           numberFormat(NULL),
    118           offset(0) {
    119     init(&rules, UPLURAL_TYPE_COUNT, status);
    120     applyPattern(pat, status);
    121 }
    122 
    123 PluralFormat::PluralFormat(const Locale& loc,
    124                            UPluralType type,
    125                            const UnicodeString& pat,
    126                            UErrorCode& status)
    127         : locale(loc),
    128           msgPattern(status),
    129           numberFormat(NULL),
    130           offset(0) {
    131     init(NULL, type, status);
    132     applyPattern(pat, status);
    133 }
    134 
    135 PluralFormat::PluralFormat(const PluralFormat& other)
    136         : Format(other),
    137           locale(other.locale),
    138           msgPattern(other.msgPattern),
    139           numberFormat(NULL),
    140           offset(other.offset) {
    141     copyObjects(other);
    142 }
    143 
    144 void
    145 PluralFormat::copyObjects(const PluralFormat& other) {
    146     UErrorCode status = U_ZERO_ERROR;
    147     if (numberFormat != NULL) {
    148         delete numberFormat;
    149     }
    150     if (pluralRulesWrapper.pluralRules != NULL) {
    151         delete pluralRulesWrapper.pluralRules;
    152     }
    153 
    154     if (other.numberFormat == NULL) {
    155         numberFormat = NumberFormat::createInstance(locale, status);
    156     } else {
    157         numberFormat = (NumberFormat*)other.numberFormat->clone();
    158     }
    159     if (other.pluralRulesWrapper.pluralRules == NULL) {
    160         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
    161     } else {
    162         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
    163     }
    164 }
    165 
    166 
    167 PluralFormat::~PluralFormat() {
    168     delete numberFormat;
    169 }
    170 
    171 void
    172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
    173     if (U_FAILURE(status)) {
    174         return;
    175     }
    176 
    177     if (rules==NULL) {
    178         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
    179     } else {
    180         pluralRulesWrapper.pluralRules = rules->clone();
    181         if (pluralRulesWrapper.pluralRules == NULL) {
    182             status = U_MEMORY_ALLOCATION_ERROR;
    183             return;
    184         }
    185     }
    186 
    187     numberFormat= NumberFormat::createInstance(locale, status);
    188 }
    189 
    190 void
    191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
    192     msgPattern.parsePluralStyle(newPattern, NULL, status);
    193     if (U_FAILURE(status)) {
    194         msgPattern.clear();
    195         offset = 0;
    196         return;
    197     }
    198     offset = msgPattern.getPluralOffset(0);
    199 }
    200 
    201 UnicodeString&
    202 PluralFormat::format(const Formattable& obj,
    203                    UnicodeString& appendTo,
    204                    FieldPosition& pos,
    205                    UErrorCode& status) const
    206 {
    207     if (U_FAILURE(status)) return appendTo;
    208 
    209     if (obj.isNumeric()) {
    210         return format(obj, obj.getDouble(), appendTo, pos, status);
    211     } else {
    212         status = U_ILLEGAL_ARGUMENT_ERROR;
    213         return appendTo;
    214     }
    215 }
    216 
    217 UnicodeString
    218 PluralFormat::format(int32_t number, UErrorCode& status) const {
    219     FieldPosition fpos(0);
    220     UnicodeString result;
    221     return format(Formattable(number), number, result, fpos, status);
    222 }
    223 
    224 UnicodeString
    225 PluralFormat::format(double number, UErrorCode& status) const {
    226     FieldPosition fpos(0);
    227     UnicodeString result;
    228     return format(Formattable(number), number, result, fpos, status);
    229 }
    230 
    231 
    232 UnicodeString&
    233 PluralFormat::format(int32_t number,
    234                      UnicodeString& appendTo,
    235                      FieldPosition& pos,
    236                      UErrorCode& status) const {
    237     return format(Formattable(number), (double)number, appendTo, pos, status);
    238 }
    239 
    240 UnicodeString&
    241 PluralFormat::format(double number,
    242                      UnicodeString& appendTo,
    243                      FieldPosition& pos,
    244                      UErrorCode& status) const {
    245     return format(Formattable(number), (double)number, appendTo, pos, status);
    246 }
    247 
    248 UnicodeString&
    249 PluralFormat::format(const Formattable& numberObject, double number,
    250                      UnicodeString& appendTo,
    251                      FieldPosition& pos,
    252                      UErrorCode& status) const {
    253     if (U_FAILURE(status)) {
    254         return appendTo;
    255     }
    256     if (msgPattern.countParts() == 0) {
    257         return numberFormat->format(numberObject, appendTo, pos, status);
    258     }
    259     // Get the appropriate sub-message.
    260     // Select it based on the formatted number-offset.
    261     double numberMinusOffset = number - offset;
    262     UnicodeString numberString;
    263     FieldPosition ignorePos;
    264     FixedPrecision fp;
    265     VisibleDigitsWithExponent dec;
    266     fp.initVisibleDigitsWithExponent(numberMinusOffset, dec, status);
    267     if (U_FAILURE(status)) {
    268         return appendTo;
    269     }
    270     if (offset == 0) {
    271         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
    272         if(decFmt != NULL) {
    273             decFmt->initVisibleDigitsWithExponent(
    274                     numberObject, dec, status);
    275             if (U_FAILURE(status)) {
    276                 return appendTo;
    277             }
    278             decFmt->format(dec, numberString, ignorePos, status);
    279         } else {
    280             numberFormat->format(
    281                     numberObject, numberString, ignorePos, status);  // could be BigDecimal etc.
    282         }
    283     } else {
    284         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
    285         if(decFmt != NULL) {
    286             decFmt->initVisibleDigitsWithExponent(
    287                     numberMinusOffset, dec, status);
    288             if (U_FAILURE(status)) {
    289                 return appendTo;
    290             }
    291             decFmt->format(dec, numberString, ignorePos, status);
    292         } else {
    293             numberFormat->format(
    294                     numberMinusOffset, numberString, ignorePos, status);
    295         }
    296     }
    297     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
    298     if (U_FAILURE(status)) { return appendTo; }
    299     // Replace syntactic # signs in the top level of this sub-message
    300     // (not in nested arguments) with the formatted number-offset.
    301     const UnicodeString& pattern = msgPattern.getPatternString();
    302     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
    303     for (;;) {
    304         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
    305         const UMessagePatternPartType type = part.getType();
    306         int32_t index = part.getIndex();
    307         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
    308             return appendTo.append(pattern, prevIndex, index - prevIndex);
    309         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
    310             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
    311             appendTo.append(pattern, prevIndex, index - prevIndex);
    312             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
    313                 appendTo.append(numberString);
    314             }
    315             prevIndex = part.getLimit();
    316         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
    317             appendTo.append(pattern, prevIndex, index - prevIndex);
    318             prevIndex = index;
    319             partIndex = msgPattern.getLimitPartIndex(partIndex);
    320             index = msgPattern.getPart(partIndex).getLimit();
    321             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
    322             prevIndex = index;
    323         }
    324     }
    325 }
    326 
    327 UnicodeString&
    328 PluralFormat::toPattern(UnicodeString& appendTo) {
    329     if (0 == msgPattern.countParts()) {
    330         appendTo.setToBogus();
    331     } else {
    332         appendTo.append(msgPattern.getPatternString());
    333     }
    334     return appendTo;
    335 }
    336 
    337 void
    338 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
    339     if (U_FAILURE(status)) {
    340         return;
    341     }
    342     locale = loc;
    343     msgPattern.clear();
    344     delete numberFormat;
    345     offset = 0;
    346     numberFormat = NULL;
    347     pluralRulesWrapper.reset();
    348     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    349 }
    350 
    351 void
    352 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
    353     if (U_FAILURE(status)) {
    354         return;
    355     }
    356     NumberFormat* nf = (NumberFormat*)format->clone();
    357     if (nf != NULL) {
    358         delete numberFormat;
    359         numberFormat = nf;
    360     } else {
    361         status = U_MEMORY_ALLOCATION_ERROR;
    362     }
    363 }
    364 
    365 Format*
    366 PluralFormat::clone() const
    367 {
    368     return new PluralFormat(*this);
    369 }
    370 
    371 
    372 PluralFormat&
    373 PluralFormat::operator=(const PluralFormat& other) {
    374     if (this != &other) {
    375         locale = other.locale;
    376         msgPattern = other.msgPattern;
    377         offset = other.offset;
    378         copyObjects(other);
    379     }
    380 
    381     return *this;
    382 }
    383 
    384 UBool
    385 PluralFormat::operator==(const Format& other) const {
    386     if (this == &other) {
    387         return TRUE;
    388     }
    389     if (!Format::operator==(other)) {
    390         return FALSE;
    391     }
    392     const PluralFormat& o = (const PluralFormat&)other;
    393     return
    394         locale == o.locale &&
    395         msgPattern == o.msgPattern &&  // implies same offset
    396         (numberFormat == NULL) == (o.numberFormat == NULL) &&
    397         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
    398         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
    399         (pluralRulesWrapper.pluralRules == NULL ||
    400             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
    401 }
    402 
    403 UBool
    404 PluralFormat::operator!=(const Format& other) const {
    405     return  !operator==(other);
    406 }
    407 
    408 void
    409 PluralFormat::parseObject(const UnicodeString& /*source*/,
    410                         Formattable& /*result*/,
    411                         ParsePosition& pos) const
    412 {
    413     // Parsing not supported.
    414     pos.setErrorIndex(pos.getIndex());
    415 }
    416 
    417 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    418                                      const PluralSelector& selector, void *context,
    419                                      double number, UErrorCode& ec) {
    420     if (U_FAILURE(ec)) {
    421         return 0;
    422     }
    423     int32_t count=pattern.countParts();
    424     double offset;
    425     const MessagePattern::Part* part=&pattern.getPart(partIndex);
    426     if (MessagePattern::Part::hasNumericValue(part->getType())) {
    427         offset=pattern.getNumericValue(*part);
    428         ++partIndex;
    429     } else {
    430         offset=0;
    431     }
    432     // The keyword is empty until we need to match against a non-explicit, not-"other" value.
    433     // Then we get the keyword from the selector.
    434     // (In other words, we never call the selector if we match against an explicit value,
    435     // or if the only non-explicit keyword is "other".)
    436     UnicodeString keyword;
    437     UnicodeString other(FALSE, OTHER_STRING, 5);
    438     // When we find a match, we set msgStart>0 and also set this boolean to true
    439     // to avoid matching the keyword again (duplicates are allowed)
    440     // while we continue to look for an explicit-value match.
    441     UBool haveKeywordMatch=FALSE;
    442     // msgStart is 0 until we find any appropriate sub-message.
    443     // We remember the first "other" sub-message if we have not seen any
    444     // appropriate sub-message before.
    445     // We remember the first matching-keyword sub-message if we have not seen
    446     // one of those before.
    447     // (The parser allows [does not check for] duplicate keywords.
    448     // We just have to make sure to take the first one.)
    449     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
    450     // at the first keyword match.
    451     // We keep going until we find an explicit-value match or reach the end of the plural style.
    452     int32_t msgStart=0;
    453     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
    454     // until ARG_LIMIT or end of plural-only pattern.
    455     do {
    456         part=&pattern.getPart(partIndex++);
    457         const UMessagePatternPartType type = part->getType();
    458         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    459             break;
    460         }
    461         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
    462         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
    463         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
    464             // explicit value like "=2"
    465             part=&pattern.getPart(partIndex++);
    466             if(number==pattern.getNumericValue(*part)) {
    467                 // matches explicit value
    468                 return partIndex;
    469             }
    470         } else if(!haveKeywordMatch) {
    471             // plural keyword like "few" or "other"
    472             // Compare "other" first and call the selector if this is not "other".
    473             if(pattern.partSubstringMatches(*part, other)) {
    474                 if(msgStart==0) {
    475                     msgStart=partIndex;
    476                     if(0 == keyword.compare(other)) {
    477                         // This is the first "other" sub-message,
    478                         // and the selected keyword is also "other".
    479                         // Do not match "other" again.
    480                         haveKeywordMatch=TRUE;
    481                     }
    482                 }
    483             } else {
    484                 if(keyword.isEmpty()) {
    485                     keyword=selector.select(context, number-offset, ec);
    486                     if(msgStart!=0 && (0 == keyword.compare(other))) {
    487                         // We have already seen an "other" sub-message.
    488                         // Do not match "other" again.
    489                         haveKeywordMatch=TRUE;
    490                         // Skip keyword matching but do getLimitPartIndex().
    491                     }
    492                 }
    493                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
    494                     // keyword matches
    495                     msgStart=partIndex;
    496                     // Do not match this keyword again.
    497                     haveKeywordMatch=TRUE;
    498                 }
    499             }
    500         }
    501         partIndex=pattern.getLimitPartIndex(partIndex);
    502     } while(++partIndex<count);
    503     return msgStart;
    504 }
    505 
    506 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
    507     // If no pattern was applied, return null.
    508     if (msgPattern.countParts() == 0) {
    509         pos.setBeginIndex(-1);
    510         pos.setEndIndex(-1);
    511         return;
    512     }
    513     int partIndex = 0;
    514     int currMatchIndex;
    515     int count=msgPattern.countParts();
    516     int startingAt = pos.getBeginIndex();
    517     if (startingAt < 0) {
    518         startingAt = 0;
    519     }
    520 
    521     // The keyword is null until we need to match against a non-explicit, not-"other" value.
    522     // Then we get the keyword from the selector.
    523     // (In other words, we never call the selector if we match against an explicit value,
    524     // or if the only non-explicit keyword is "other".)
    525     UnicodeString keyword;
    526     UnicodeString matchedWord;
    527     const UnicodeString& pattern = msgPattern.getPatternString();
    528     int matchedIndex = -1;
    529     // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
    530     // until the end of the plural-only pattern.
    531     while (partIndex < count) {
    532         const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
    533         if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
    534             // Bad format
    535             continue;
    536         }
    537 
    538         const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
    539         if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
    540             // Bad format
    541             continue;
    542         }
    543 
    544         const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
    545         if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
    546             // Bad format
    547             continue;
    548         }
    549 
    550         UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    551         if (rbnfLenientScanner != NULL) {
    552             // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
    553             int32_t length = -1;
    554             currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
    555         }
    556         else {
    557             currMatchIndex = source.indexOf(currArg, startingAt);
    558         }
    559         if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
    560             matchedIndex = currMatchIndex;
    561             matchedWord = currArg;
    562             keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    563         }
    564     }
    565     if (matchedIndex >= 0) {
    566         pos.setBeginIndex(matchedIndex);
    567         pos.setEndIndex(matchedIndex + matchedWord.length());
    568         result.setString(keyword);
    569         return;
    570     }
    571 
    572     // Not found!
    573     pos.setBeginIndex(-1);
    574     pos.setEndIndex(-1);
    575 }
    576 
    577 PluralFormat::PluralSelector::~PluralSelector() {}
    578 
    579 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
    580     delete pluralRules;
    581 }
    582 
    583 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
    584                                                           UErrorCode& /*ec*/) const {
    585     (void)number;  // unused except in the assertion
    586     VisibleDigitsWithExponent *dec=static_cast<VisibleDigitsWithExponent *>(context);
    587     return pluralRules->select(*dec);
    588 }
    589 
    590 void PluralFormat::PluralSelectorAdapter::reset() {
    591     delete pluralRules;
    592     pluralRules = NULL;
    593 }
    594 
    595 
    596 U_NAMESPACE_END
    597 
    598 
    599 #endif /* #if !UCONFIG_NO_FORMATTING */
    600 
    601 //eof
    602