Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2009-2015, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 *******************************************************************************
      8 *
      9 * File PLURFMT.CPP
     10 *******************************************************************************
     11 */
     12 
     13 #include "unicode/decimfmt.h"
     14 #include "unicode/messagepattern.h"
     15 #include "unicode/plurfmt.h"
     16 #include "unicode/plurrule.h"
     17 #include "unicode/utypes.h"
     18 #include "cmemory.h"
     19 #include "messageimpl.h"
     20 #include "nfrule.h"
     21 #include "plurrule_impl.h"
     22 #include "uassert.h"
     23 #include "uhash.h"
     24 #include "number_decimalquantity.h"
     25 #include "number_utils.h"
     26 #include "number_utypes.h"
     27 
     28 #if !UCONFIG_NO_FORMATTING
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 using number::impl::DecimalQuantity;
     33 
     34 static const UChar OTHER_STRING[] = {
     35     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
     36 };
     37 
     38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
     39 
     40 PluralFormat::PluralFormat(UErrorCode& status)
     41         : locale(Locale::getDefault()),
     42           msgPattern(status),
     43           numberFormat(NULL),
     44           offset(0) {
     45     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     46 }
     47 
     48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
     49         : locale(loc),
     50           msgPattern(status),
     51           numberFormat(NULL),
     52           offset(0) {
     53     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     54 }
     55 
     56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
     57         : locale(Locale::getDefault()),
     58           msgPattern(status),
     59           numberFormat(NULL),
     60           offset(0) {
     61     init(&rules, UPLURAL_TYPE_COUNT, status);
     62 }
     63 
     64 PluralFormat::PluralFormat(const Locale& loc,
     65                            const PluralRules& rules,
     66                            UErrorCode& status)
     67         : locale(loc),
     68           msgPattern(status),
     69           numberFormat(NULL),
     70           offset(0) {
     71     init(&rules, UPLURAL_TYPE_COUNT, status);
     72 }
     73 
     74 PluralFormat::PluralFormat(const Locale& loc,
     75                            UPluralType type,
     76                            UErrorCode& status)
     77         : locale(loc),
     78           msgPattern(status),
     79           numberFormat(NULL),
     80           offset(0) {
     81     init(NULL, type, status);
     82 }
     83 
     84 PluralFormat::PluralFormat(const UnicodeString& pat,
     85                            UErrorCode& status)
     86         : locale(Locale::getDefault()),
     87           msgPattern(status),
     88           numberFormat(NULL),
     89           offset(0) {
     90     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     91     applyPattern(pat, status);
     92 }
     93 
     94 PluralFormat::PluralFormat(const Locale& loc,
     95                            const UnicodeString& pat,
     96                            UErrorCode& status)
     97         : locale(loc),
     98           msgPattern(status),
     99           numberFormat(NULL),
    100           offset(0) {
    101     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    102     applyPattern(pat, status);
    103 }
    104 
    105 PluralFormat::PluralFormat(const PluralRules& rules,
    106                            const UnicodeString& pat,
    107                            UErrorCode& status)
    108         : locale(Locale::getDefault()),
    109           msgPattern(status),
    110           numberFormat(NULL),
    111           offset(0) {
    112     init(&rules, UPLURAL_TYPE_COUNT, status);
    113     applyPattern(pat, status);
    114 }
    115 
    116 PluralFormat::PluralFormat(const Locale& loc,
    117                            const PluralRules& rules,
    118                            const UnicodeString& pat,
    119                            UErrorCode& status)
    120         : locale(loc),
    121           msgPattern(status),
    122           numberFormat(NULL),
    123           offset(0) {
    124     init(&rules, UPLURAL_TYPE_COUNT, status);
    125     applyPattern(pat, status);
    126 }
    127 
    128 PluralFormat::PluralFormat(const Locale& loc,
    129                            UPluralType type,
    130                            const UnicodeString& pat,
    131                            UErrorCode& status)
    132         : locale(loc),
    133           msgPattern(status),
    134           numberFormat(NULL),
    135           offset(0) {
    136     init(NULL, type, status);
    137     applyPattern(pat, status);
    138 }
    139 
    140 PluralFormat::PluralFormat(const PluralFormat& other)
    141         : Format(other),
    142           locale(other.locale),
    143           msgPattern(other.msgPattern),
    144           numberFormat(NULL),
    145           offset(other.offset) {
    146     copyObjects(other);
    147 }
    148 
    149 void
    150 PluralFormat::copyObjects(const PluralFormat& other) {
    151     UErrorCode status = U_ZERO_ERROR;
    152     if (numberFormat != NULL) {
    153         delete numberFormat;
    154     }
    155     if (pluralRulesWrapper.pluralRules != NULL) {
    156         delete pluralRulesWrapper.pluralRules;
    157     }
    158 
    159     if (other.numberFormat == NULL) {
    160         numberFormat = NumberFormat::createInstance(locale, status);
    161     } else {
    162         numberFormat = (NumberFormat*)other.numberFormat->clone();
    163     }
    164     if (other.pluralRulesWrapper.pluralRules == NULL) {
    165         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
    166     } else {
    167         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
    168     }
    169 }
    170 
    171 
    172 PluralFormat::~PluralFormat() {
    173     delete numberFormat;
    174 }
    175 
    176 void
    177 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
    178     if (U_FAILURE(status)) {
    179         return;
    180     }
    181 
    182     if (rules==NULL) {
    183         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
    184     } else {
    185         pluralRulesWrapper.pluralRules = rules->clone();
    186         if (pluralRulesWrapper.pluralRules == NULL) {
    187             status = U_MEMORY_ALLOCATION_ERROR;
    188             return;
    189         }
    190     }
    191 
    192     numberFormat= NumberFormat::createInstance(locale, status);
    193 }
    194 
    195 void
    196 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
    197     msgPattern.parsePluralStyle(newPattern, NULL, status);
    198     if (U_FAILURE(status)) {
    199         msgPattern.clear();
    200         offset = 0;
    201         return;
    202     }
    203     offset = msgPattern.getPluralOffset(0);
    204 }
    205 
    206 UnicodeString&
    207 PluralFormat::format(const Formattable& obj,
    208                    UnicodeString& appendTo,
    209                    FieldPosition& pos,
    210                    UErrorCode& status) const
    211 {
    212     if (U_FAILURE(status)) return appendTo;
    213 
    214     if (obj.isNumeric()) {
    215         return format(obj, obj.getDouble(), appendTo, pos, status);
    216     } else {
    217         status = U_ILLEGAL_ARGUMENT_ERROR;
    218         return appendTo;
    219     }
    220 }
    221 
    222 UnicodeString
    223 PluralFormat::format(int32_t number, UErrorCode& status) const {
    224     FieldPosition fpos(FieldPosition::DONT_CARE);
    225     UnicodeString result;
    226     return format(Formattable(number), number, result, fpos, status);
    227 }
    228 
    229 UnicodeString
    230 PluralFormat::format(double number, UErrorCode& status) const {
    231     FieldPosition fpos(FieldPosition::DONT_CARE);
    232     UnicodeString result;
    233     return format(Formattable(number), number, result, fpos, status);
    234 }
    235 
    236 
    237 UnicodeString&
    238 PluralFormat::format(int32_t number,
    239                      UnicodeString& appendTo,
    240                      FieldPosition& pos,
    241                      UErrorCode& status) const {
    242     return format(Formattable(number), (double)number, appendTo, pos, status);
    243 }
    244 
    245 UnicodeString&
    246 PluralFormat::format(double number,
    247                      UnicodeString& appendTo,
    248                      FieldPosition& pos,
    249                      UErrorCode& status) const {
    250     return format(Formattable(number), (double)number, appendTo, pos, status);
    251 }
    252 
    253 UnicodeString&
    254 PluralFormat::format(const Formattable& numberObject, double number,
    255                      UnicodeString& appendTo,
    256                      FieldPosition& pos,
    257                      UErrorCode& status) const {
    258     if (U_FAILURE(status)) {
    259         return appendTo;
    260     }
    261     if (msgPattern.countParts() == 0) {
    262         return numberFormat->format(numberObject, appendTo, pos, status);
    263     }
    264 
    265     // Get the appropriate sub-message.
    266     // Select it based on the formatted number-offset.
    267     double numberMinusOffset = number - offset;
    268     // Call NumberFormatter to get both the DecimalQuantity and the string.
    269     // This call site needs to use more internal APIs than the Java equivalent.
    270     number::impl::UFormattedNumberData data;
    271     if (offset == 0) {
    272         // could be BigDecimal etc.
    273         numberObject.populateDecimalQuantity(data.quantity, status);
    274     } else {
    275         data.quantity.setToDouble(numberMinusOffset);
    276     }
    277     UnicodeString numberString;
    278     auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
    279     if(decFmt != nullptr) {
    280         decFmt->toNumberFormatter().formatImpl(&data, status); // mutates &data
    281         numberString = data.string.toUnicodeString();
    282     } else {
    283         if (offset == 0) {
    284             numberFormat->format(numberObject, numberString, status);
    285         } else {
    286             numberFormat->format(numberMinusOffset, numberString, status);
    287         }
    288     }
    289 
    290     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
    291     if (U_FAILURE(status)) { return appendTo; }
    292     // Replace syntactic # signs in the top level of this sub-message
    293     // (not in nested arguments) with the formatted number-offset.
    294     const UnicodeString& pattern = msgPattern.getPatternString();
    295     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
    296     for (;;) {
    297         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
    298         const UMessagePatternPartType type = part.getType();
    299         int32_t index = part.getIndex();
    300         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
    301             return appendTo.append(pattern, prevIndex, index - prevIndex);
    302         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
    303             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
    304             appendTo.append(pattern, prevIndex, index - prevIndex);
    305             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
    306                 appendTo.append(numberString);
    307             }
    308             prevIndex = part.getLimit();
    309         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
    310             appendTo.append(pattern, prevIndex, index - prevIndex);
    311             prevIndex = index;
    312             partIndex = msgPattern.getLimitPartIndex(partIndex);
    313             index = msgPattern.getPart(partIndex).getLimit();
    314             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
    315             prevIndex = index;
    316         }
    317     }
    318 }
    319 
    320 UnicodeString&
    321 PluralFormat::toPattern(UnicodeString& appendTo) {
    322     if (0 == msgPattern.countParts()) {
    323         appendTo.setToBogus();
    324     } else {
    325         appendTo.append(msgPattern.getPatternString());
    326     }
    327     return appendTo;
    328 }
    329 
    330 void
    331 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
    332     if (U_FAILURE(status)) {
    333         return;
    334     }
    335     locale = loc;
    336     msgPattern.clear();
    337     delete numberFormat;
    338     offset = 0;
    339     numberFormat = NULL;
    340     pluralRulesWrapper.reset();
    341     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    342 }
    343 
    344 void
    345 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
    346     if (U_FAILURE(status)) {
    347         return;
    348     }
    349     NumberFormat* nf = (NumberFormat*)format->clone();
    350     if (nf != NULL) {
    351         delete numberFormat;
    352         numberFormat = nf;
    353     } else {
    354         status = U_MEMORY_ALLOCATION_ERROR;
    355     }
    356 }
    357 
    358 Format*
    359 PluralFormat::clone() const
    360 {
    361     return new PluralFormat(*this);
    362 }
    363 
    364 
    365 PluralFormat&
    366 PluralFormat::operator=(const PluralFormat& other) {
    367     if (this != &other) {
    368         locale = other.locale;
    369         msgPattern = other.msgPattern;
    370         offset = other.offset;
    371         copyObjects(other);
    372     }
    373 
    374     return *this;
    375 }
    376 
    377 UBool
    378 PluralFormat::operator==(const Format& other) const {
    379     if (this == &other) {
    380         return TRUE;
    381     }
    382     if (!Format::operator==(other)) {
    383         return FALSE;
    384     }
    385     const PluralFormat& o = (const PluralFormat&)other;
    386     return
    387         locale == o.locale &&
    388         msgPattern == o.msgPattern &&  // implies same offset
    389         (numberFormat == NULL) == (o.numberFormat == NULL) &&
    390         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
    391         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
    392         (pluralRulesWrapper.pluralRules == NULL ||
    393             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
    394 }
    395 
    396 UBool
    397 PluralFormat::operator!=(const Format& other) const {
    398     return  !operator==(other);
    399 }
    400 
    401 void
    402 PluralFormat::parseObject(const UnicodeString& /*source*/,
    403                         Formattable& /*result*/,
    404                         ParsePosition& pos) const
    405 {
    406     // Parsing not supported.
    407     pos.setErrorIndex(pos.getIndex());
    408 }
    409 
    410 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    411                                      const PluralSelector& selector, void *context,
    412                                      double number, UErrorCode& ec) {
    413     if (U_FAILURE(ec)) {
    414         return 0;
    415     }
    416     int32_t count=pattern.countParts();
    417     double offset;
    418     const MessagePattern::Part* part=&pattern.getPart(partIndex);
    419     if (MessagePattern::Part::hasNumericValue(part->getType())) {
    420         offset=pattern.getNumericValue(*part);
    421         ++partIndex;
    422     } else {
    423         offset=0;
    424     }
    425     // The keyword is empty until we need to match against a non-explicit, not-"other" value.
    426     // Then we get the keyword from the selector.
    427     // (In other words, we never call the selector if we match against an explicit value,
    428     // or if the only non-explicit keyword is "other".)
    429     UnicodeString keyword;
    430     UnicodeString other(FALSE, OTHER_STRING, 5);
    431     // When we find a match, we set msgStart>0 and also set this boolean to true
    432     // to avoid matching the keyword again (duplicates are allowed)
    433     // while we continue to look for an explicit-value match.
    434     UBool haveKeywordMatch=FALSE;
    435     // msgStart is 0 until we find any appropriate sub-message.
    436     // We remember the first "other" sub-message if we have not seen any
    437     // appropriate sub-message before.
    438     // We remember the first matching-keyword sub-message if we have not seen
    439     // one of those before.
    440     // (The parser allows [does not check for] duplicate keywords.
    441     // We just have to make sure to take the first one.)
    442     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
    443     // at the first keyword match.
    444     // We keep going until we find an explicit-value match or reach the end of the plural style.
    445     int32_t msgStart=0;
    446     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
    447     // until ARG_LIMIT or end of plural-only pattern.
    448     do {
    449         part=&pattern.getPart(partIndex++);
    450         const UMessagePatternPartType type = part->getType();
    451         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    452             break;
    453         }
    454         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
    455         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
    456         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
    457             // explicit value like "=2"
    458             part=&pattern.getPart(partIndex++);
    459             if(number==pattern.getNumericValue(*part)) {
    460                 // matches explicit value
    461                 return partIndex;
    462             }
    463         } else if(!haveKeywordMatch) {
    464             // plural keyword like "few" or "other"
    465             // Compare "other" first and call the selector if this is not "other".
    466             if(pattern.partSubstringMatches(*part, other)) {
    467                 if(msgStart==0) {
    468                     msgStart=partIndex;
    469                     if(0 == keyword.compare(other)) {
    470                         // This is the first "other" sub-message,
    471                         // and the selected keyword is also "other".
    472                         // Do not match "other" again.
    473                         haveKeywordMatch=TRUE;
    474                     }
    475                 }
    476             } else {
    477                 if(keyword.isEmpty()) {
    478                     keyword=selector.select(context, number-offset, ec);
    479                     if(msgStart!=0 && (0 == keyword.compare(other))) {
    480                         // We have already seen an "other" sub-message.
    481                         // Do not match "other" again.
    482                         haveKeywordMatch=TRUE;
    483                         // Skip keyword matching but do getLimitPartIndex().
    484                     }
    485                 }
    486                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
    487                     // keyword matches
    488                     msgStart=partIndex;
    489                     // Do not match this keyword again.
    490                     haveKeywordMatch=TRUE;
    491                 }
    492             }
    493         }
    494         partIndex=pattern.getLimitPartIndex(partIndex);
    495     } while(++partIndex<count);
    496     return msgStart;
    497 }
    498 
    499 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
    500     // If no pattern was applied, return null.
    501     if (msgPattern.countParts() == 0) {
    502         pos.setBeginIndex(-1);
    503         pos.setEndIndex(-1);
    504         return;
    505     }
    506     int partIndex = 0;
    507     int currMatchIndex;
    508     int count=msgPattern.countParts();
    509     int startingAt = pos.getBeginIndex();
    510     if (startingAt < 0) {
    511         startingAt = 0;
    512     }
    513 
    514     // The keyword is null until we need to match against a non-explicit, not-"other" value.
    515     // Then we get the keyword from the selector.
    516     // (In other words, we never call the selector if we match against an explicit value,
    517     // or if the only non-explicit keyword is "other".)
    518     UnicodeString keyword;
    519     UnicodeString matchedWord;
    520     const UnicodeString& pattern = msgPattern.getPatternString();
    521     int matchedIndex = -1;
    522     // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
    523     // until the end of the plural-only pattern.
    524     while (partIndex < count) {
    525         const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
    526         if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
    527             // Bad format
    528             continue;
    529         }
    530 
    531         const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
    532         if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
    533             // Bad format
    534             continue;
    535         }
    536 
    537         const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
    538         if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
    539             // Bad format
    540             continue;
    541         }
    542 
    543         UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    544         if (rbnfLenientScanner != NULL) {
    545             // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
    546             int32_t length = -1;
    547             currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
    548         }
    549         else {
    550             currMatchIndex = source.indexOf(currArg, startingAt);
    551         }
    552         if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
    553             matchedIndex = currMatchIndex;
    554             matchedWord = currArg;
    555             keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    556         }
    557     }
    558     if (matchedIndex >= 0) {
    559         pos.setBeginIndex(matchedIndex);
    560         pos.setEndIndex(matchedIndex + matchedWord.length());
    561         result.setString(keyword);
    562         return;
    563     }
    564 
    565     // Not found!
    566     pos.setBeginIndex(-1);
    567     pos.setEndIndex(-1);
    568 }
    569 
    570 PluralFormat::PluralSelector::~PluralSelector() {}
    571 
    572 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
    573     delete pluralRules;
    574 }
    575 
    576 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
    577                                                           UErrorCode& /*ec*/) const {
    578     (void)number;  // unused except in the assertion
    579     IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
    580     return pluralRules->select(*dec);
    581 }
    582 
    583 void PluralFormat::PluralSelectorAdapter::reset() {
    584     delete pluralRules;
    585     pluralRules = NULL;
    586 }
    587 
    588 
    589 U_NAMESPACE_END
    590 
    591 
    592 #endif /* #if !UCONFIG_NO_FORMATTING */
    593 
    594 //eof
    595