Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2009-2015, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 * File PLURFMT.CPP
      8 *******************************************************************************
      9 */
     10 
     11 #include "unicode/decimfmt.h"
     12 #include "unicode/messagepattern.h"
     13 #include "unicode/plurfmt.h"
     14 #include "unicode/plurrule.h"
     15 #include "unicode/utypes.h"
     16 #include "cmemory.h"
     17 #include "messageimpl.h"
     18 #include "nfrule.h"
     19 #include "plurrule_impl.h"
     20 #include "uassert.h"
     21 #include "uhash.h"
     22 
     23 #if !UCONFIG_NO_FORMATTING
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 static const UChar OTHER_STRING[] = {
     28     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
     29 };
     30 
     31 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
     32 
     33 PluralFormat::PluralFormat(UErrorCode& status)
     34         : locale(Locale::getDefault()),
     35           msgPattern(status),
     36           numberFormat(NULL),
     37           offset(0) {
     38     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     39 }
     40 
     41 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
     42         : locale(loc),
     43           msgPattern(status),
     44           numberFormat(NULL),
     45           offset(0) {
     46     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     47 }
     48 
     49 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
     50         : locale(Locale::getDefault()),
     51           msgPattern(status),
     52           numberFormat(NULL),
     53           offset(0) {
     54     init(&rules, UPLURAL_TYPE_COUNT, status);
     55 }
     56 
     57 PluralFormat::PluralFormat(const Locale& loc,
     58                            const PluralRules& rules,
     59                            UErrorCode& status)
     60         : locale(loc),
     61           msgPattern(status),
     62           numberFormat(NULL),
     63           offset(0) {
     64     init(&rules, UPLURAL_TYPE_COUNT, status);
     65 }
     66 
     67 PluralFormat::PluralFormat(const Locale& loc,
     68                            UPluralType type,
     69                            UErrorCode& status)
     70         : locale(loc),
     71           msgPattern(status),
     72           numberFormat(NULL),
     73           offset(0) {
     74     init(NULL, type, status);
     75 }
     76 
     77 PluralFormat::PluralFormat(const UnicodeString& pat,
     78                            UErrorCode& status)
     79         : locale(Locale::getDefault()),
     80           msgPattern(status),
     81           numberFormat(NULL),
     82           offset(0) {
     83     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     84     applyPattern(pat, status);
     85 }
     86 
     87 PluralFormat::PluralFormat(const Locale& loc,
     88                            const UnicodeString& pat,
     89                            UErrorCode& status)
     90         : locale(loc),
     91           msgPattern(status),
     92           numberFormat(NULL),
     93           offset(0) {
     94     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     95     applyPattern(pat, status);
     96 }
     97 
     98 PluralFormat::PluralFormat(const PluralRules& rules,
     99                            const UnicodeString& pat,
    100                            UErrorCode& status)
    101         : locale(Locale::getDefault()),
    102           msgPattern(status),
    103           numberFormat(NULL),
    104           offset(0) {
    105     init(&rules, UPLURAL_TYPE_COUNT, status);
    106     applyPattern(pat, status);
    107 }
    108 
    109 PluralFormat::PluralFormat(const Locale& loc,
    110                            const PluralRules& rules,
    111                            const UnicodeString& pat,
    112                            UErrorCode& status)
    113         : locale(loc),
    114           msgPattern(status),
    115           numberFormat(NULL),
    116           offset(0) {
    117     init(&rules, UPLURAL_TYPE_COUNT, status);
    118     applyPattern(pat, status);
    119 }
    120 
    121 PluralFormat::PluralFormat(const Locale& loc,
    122                            UPluralType type,
    123                            const UnicodeString& pat,
    124                            UErrorCode& status)
    125         : locale(loc),
    126           msgPattern(status),
    127           numberFormat(NULL),
    128           offset(0) {
    129     init(NULL, type, status);
    130     applyPattern(pat, status);
    131 }
    132 
    133 PluralFormat::PluralFormat(const PluralFormat& other)
    134         : Format(other),
    135           locale(other.locale),
    136           msgPattern(other.msgPattern),
    137           numberFormat(NULL),
    138           offset(other.offset) {
    139     copyObjects(other);
    140 }
    141 
    142 void
    143 PluralFormat::copyObjects(const PluralFormat& other) {
    144     UErrorCode status = U_ZERO_ERROR;
    145     if (numberFormat != NULL) {
    146         delete numberFormat;
    147     }
    148     if (pluralRulesWrapper.pluralRules != NULL) {
    149         delete pluralRulesWrapper.pluralRules;
    150     }
    151 
    152     if (other.numberFormat == NULL) {
    153         numberFormat = NumberFormat::createInstance(locale, status);
    154     } else {
    155         numberFormat = (NumberFormat*)other.numberFormat->clone();
    156     }
    157     if (other.pluralRulesWrapper.pluralRules == NULL) {
    158         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
    159     } else {
    160         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
    161     }
    162 }
    163 
    164 
    165 PluralFormat::~PluralFormat() {
    166     delete numberFormat;
    167 }
    168 
    169 void
    170 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
    171     if (U_FAILURE(status)) {
    172         return;
    173     }
    174 
    175     if (rules==NULL) {
    176         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
    177     } else {
    178         pluralRulesWrapper.pluralRules = rules->clone();
    179         if (pluralRulesWrapper.pluralRules == NULL) {
    180             status = U_MEMORY_ALLOCATION_ERROR;
    181             return;
    182         }
    183     }
    184 
    185     numberFormat= NumberFormat::createInstance(locale, status);
    186 }
    187 
    188 void
    189 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
    190     msgPattern.parsePluralStyle(newPattern, NULL, status);
    191     if (U_FAILURE(status)) {
    192         msgPattern.clear();
    193         offset = 0;
    194         return;
    195     }
    196     offset = msgPattern.getPluralOffset(0);
    197 }
    198 
    199 UnicodeString&
    200 PluralFormat::format(const Formattable& obj,
    201                    UnicodeString& appendTo,
    202                    FieldPosition& pos,
    203                    UErrorCode& status) const
    204 {
    205     if (U_FAILURE(status)) return appendTo;
    206 
    207     if (obj.isNumeric()) {
    208         return format(obj, obj.getDouble(), appendTo, pos, status);
    209     } else {
    210         status = U_ILLEGAL_ARGUMENT_ERROR;
    211         return appendTo;
    212     }
    213 }
    214 
    215 UnicodeString
    216 PluralFormat::format(int32_t number, UErrorCode& status) const {
    217     FieldPosition fpos(0);
    218     UnicodeString result;
    219     return format(Formattable(number), number, result, fpos, status);
    220 }
    221 
    222 UnicodeString
    223 PluralFormat::format(double number, UErrorCode& status) const {
    224     FieldPosition fpos(0);
    225     UnicodeString result;
    226     return format(Formattable(number), number, result, fpos, status);
    227 }
    228 
    229 
    230 UnicodeString&
    231 PluralFormat::format(int32_t number,
    232                      UnicodeString& appendTo,
    233                      FieldPosition& pos,
    234                      UErrorCode& status) const {
    235     return format(Formattable(number), (double)number, appendTo, pos, status);
    236 }
    237 
    238 UnicodeString&
    239 PluralFormat::format(double number,
    240                      UnicodeString& appendTo,
    241                      FieldPosition& pos,
    242                      UErrorCode& status) const {
    243     return format(Formattable(number), (double)number, appendTo, pos, status);
    244 }
    245 
    246 UnicodeString&
    247 PluralFormat::format(const Formattable& numberObject, double number,
    248                      UnicodeString& appendTo,
    249                      FieldPosition& pos,
    250                      UErrorCode& status) const {
    251     if (U_FAILURE(status)) {
    252         return appendTo;
    253     }
    254     if (msgPattern.countParts() == 0) {
    255         return numberFormat->format(numberObject, appendTo, pos, status);
    256     }
    257     // Get the appropriate sub-message.
    258     // Select it based on the formatted number-offset.
    259     double numberMinusOffset = number - offset;
    260     UnicodeString numberString;
    261     FieldPosition ignorePos;
    262     FixedDecimal dec(numberMinusOffset);
    263     if (offset == 0) {
    264         numberFormat->format(numberObject, numberString, ignorePos, status);  // could be BigDecimal etc.
    265         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
    266         if(decFmt != NULL) {
    267             dec = decFmt->getFixedDecimal(numberObject, status);
    268         }
    269     } else {
    270         numberFormat->format(numberMinusOffset, numberString, ignorePos, status);
    271         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
    272         if(decFmt != NULL) {
    273             dec = decFmt->getFixedDecimal(numberMinusOffset, status);
    274         }
    275     }
    276     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
    277     if (U_FAILURE(status)) { return appendTo; }
    278     // Replace syntactic # signs in the top level of this sub-message
    279     // (not in nested arguments) with the formatted number-offset.
    280     const UnicodeString& pattern = msgPattern.getPatternString();
    281     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
    282     for (;;) {
    283         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
    284         const UMessagePatternPartType type = part.getType();
    285         int32_t index = part.getIndex();
    286         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
    287             return appendTo.append(pattern, prevIndex, index - prevIndex);
    288         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
    289             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
    290             appendTo.append(pattern, prevIndex, index - prevIndex);
    291             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
    292                 appendTo.append(numberString);
    293             }
    294             prevIndex = part.getLimit();
    295         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
    296             appendTo.append(pattern, prevIndex, index - prevIndex);
    297             prevIndex = index;
    298             partIndex = msgPattern.getLimitPartIndex(partIndex);
    299             index = msgPattern.getPart(partIndex).getLimit();
    300             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
    301             prevIndex = index;
    302         }
    303     }
    304 }
    305 
    306 UnicodeString&
    307 PluralFormat::toPattern(UnicodeString& appendTo) {
    308     if (0 == msgPattern.countParts()) {
    309         appendTo.setToBogus();
    310     } else {
    311         appendTo.append(msgPattern.getPatternString());
    312     }
    313     return appendTo;
    314 }
    315 
    316 void
    317 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
    318     if (U_FAILURE(status)) {
    319         return;
    320     }
    321     locale = loc;
    322     msgPattern.clear();
    323     delete numberFormat;
    324     offset = 0;
    325     numberFormat = NULL;
    326     pluralRulesWrapper.reset();
    327     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    328 }
    329 
    330 void
    331 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
    332     if (U_FAILURE(status)) {
    333         return;
    334     }
    335     NumberFormat* nf = (NumberFormat*)format->clone();
    336     if (nf != NULL) {
    337         delete numberFormat;
    338         numberFormat = nf;
    339     } else {
    340         status = U_MEMORY_ALLOCATION_ERROR;
    341     }
    342 }
    343 
    344 Format*
    345 PluralFormat::clone() const
    346 {
    347     return new PluralFormat(*this);
    348 }
    349 
    350 
    351 PluralFormat&
    352 PluralFormat::operator=(const PluralFormat& other) {
    353     if (this != &other) {
    354         locale = other.locale;
    355         msgPattern = other.msgPattern;
    356         offset = other.offset;
    357         copyObjects(other);
    358     }
    359 
    360     return *this;
    361 }
    362 
    363 UBool
    364 PluralFormat::operator==(const Format& other) const {
    365     if (this == &other) {
    366         return TRUE;
    367     }
    368     if (!Format::operator==(other)) {
    369         return FALSE;
    370     }
    371     const PluralFormat& o = (const PluralFormat&)other;
    372     return
    373         locale == o.locale &&
    374         msgPattern == o.msgPattern &&  // implies same offset
    375         (numberFormat == NULL) == (o.numberFormat == NULL) &&
    376         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
    377         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
    378         (pluralRulesWrapper.pluralRules == NULL ||
    379             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
    380 }
    381 
    382 UBool
    383 PluralFormat::operator!=(const Format& other) const {
    384     return  !operator==(other);
    385 }
    386 
    387 void
    388 PluralFormat::parseObject(const UnicodeString& /*source*/,
    389                         Formattable& /*result*/,
    390                         ParsePosition& pos) const
    391 {
    392     // Parsing not supported.
    393     pos.setErrorIndex(pos.getIndex());
    394 }
    395 
    396 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    397                                      const PluralSelector& selector, void *context,
    398                                      double number, UErrorCode& ec) {
    399     if (U_FAILURE(ec)) {
    400         return 0;
    401     }
    402     int32_t count=pattern.countParts();
    403     double offset;
    404     const MessagePattern::Part* part=&pattern.getPart(partIndex);
    405     if (MessagePattern::Part::hasNumericValue(part->getType())) {
    406         offset=pattern.getNumericValue(*part);
    407         ++partIndex;
    408     } else {
    409         offset=0;
    410     }
    411     // The keyword is empty until we need to match against a non-explicit, not-"other" value.
    412     // Then we get the keyword from the selector.
    413     // (In other words, we never call the selector if we match against an explicit value,
    414     // or if the only non-explicit keyword is "other".)
    415     UnicodeString keyword;
    416     UnicodeString other(FALSE, OTHER_STRING, 5);
    417     // When we find a match, we set msgStart>0 and also set this boolean to true
    418     // to avoid matching the keyword again (duplicates are allowed)
    419     // while we continue to look for an explicit-value match.
    420     UBool haveKeywordMatch=FALSE;
    421     // msgStart is 0 until we find any appropriate sub-message.
    422     // We remember the first "other" sub-message if we have not seen any
    423     // appropriate sub-message before.
    424     // We remember the first matching-keyword sub-message if we have not seen
    425     // one of those before.
    426     // (The parser allows [does not check for] duplicate keywords.
    427     // We just have to make sure to take the first one.)
    428     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
    429     // at the first keyword match.
    430     // We keep going until we find an explicit-value match or reach the end of the plural style.
    431     int32_t msgStart=0;
    432     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
    433     // until ARG_LIMIT or end of plural-only pattern.
    434     do {
    435         part=&pattern.getPart(partIndex++);
    436         const UMessagePatternPartType type = part->getType();
    437         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    438             break;
    439         }
    440         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
    441         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
    442         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
    443             // explicit value like "=2"
    444             part=&pattern.getPart(partIndex++);
    445             if(number==pattern.getNumericValue(*part)) {
    446                 // matches explicit value
    447                 return partIndex;
    448             }
    449         } else if(!haveKeywordMatch) {
    450             // plural keyword like "few" or "other"
    451             // Compare "other" first and call the selector if this is not "other".
    452             if(pattern.partSubstringMatches(*part, other)) {
    453                 if(msgStart==0) {
    454                     msgStart=partIndex;
    455                     if(0 == keyword.compare(other)) {
    456                         // This is the first "other" sub-message,
    457                         // and the selected keyword is also "other".
    458                         // Do not match "other" again.
    459                         haveKeywordMatch=TRUE;
    460                     }
    461                 }
    462             } else {
    463                 if(keyword.isEmpty()) {
    464                     keyword=selector.select(context, number-offset, ec);
    465                     if(msgStart!=0 && (0 == keyword.compare(other))) {
    466                         // We have already seen an "other" sub-message.
    467                         // Do not match "other" again.
    468                         haveKeywordMatch=TRUE;
    469                         // Skip keyword matching but do getLimitPartIndex().
    470                     }
    471                 }
    472                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
    473                     // keyword matches
    474                     msgStart=partIndex;
    475                     // Do not match this keyword again.
    476                     haveKeywordMatch=TRUE;
    477                 }
    478             }
    479         }
    480         partIndex=pattern.getLimitPartIndex(partIndex);
    481     } while(++partIndex<count);
    482     return msgStart;
    483 }
    484 
    485 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
    486     // If no pattern was applied, return null.
    487     if (msgPattern.countParts() == 0) {
    488         pos.setBeginIndex(-1);
    489         pos.setEndIndex(-1);
    490         return;
    491     }
    492     int partIndex = 0;
    493     int currMatchIndex;
    494     int count=msgPattern.countParts();
    495     int startingAt = pos.getBeginIndex();
    496     if (startingAt < 0) {
    497         startingAt = 0;
    498     }
    499 
    500     // The keyword is null until we need to match against a non-explicit, not-"other" value.
    501     // Then we get the keyword from the selector.
    502     // (In other words, we never call the selector if we match against an explicit value,
    503     // or if the only non-explicit keyword is "other".)
    504     UnicodeString keyword;
    505     UnicodeString matchedWord;
    506     const UnicodeString& pattern = msgPattern.getPatternString();
    507     int matchedIndex = -1;
    508     // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
    509     // until the end of the plural-only pattern.
    510     while (partIndex < count) {
    511         const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
    512         if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
    513             // Bad format
    514             continue;
    515         }
    516 
    517         const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
    518         if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
    519             // Bad format
    520             continue;
    521         }
    522 
    523         const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
    524         if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
    525             // Bad format
    526             continue;
    527         }
    528 
    529         UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    530         if (rbnfLenientScanner != NULL) {
    531             // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
    532             int32_t length = -1;
    533             currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
    534         }
    535         else {
    536             currMatchIndex = source.indexOf(currArg, startingAt);
    537         }
    538         if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
    539             matchedIndex = currMatchIndex;
    540             matchedWord = currArg;
    541             keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
    542         }
    543     }
    544     if (matchedIndex >= 0) {
    545         pos.setBeginIndex(matchedIndex);
    546         pos.setEndIndex(matchedIndex + matchedWord.length());
    547         result.setString(keyword);
    548         return;
    549     }
    550 
    551     // Not found!
    552     pos.setBeginIndex(-1);
    553     pos.setEndIndex(-1);
    554 }
    555 
    556 PluralFormat::PluralSelector::~PluralSelector() {}
    557 
    558 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
    559     delete pluralRules;
    560 }
    561 
    562 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
    563                                                           UErrorCode& /*ec*/) const {
    564     (void)number;  // unused except in the assertion
    565     FixedDecimal *dec=static_cast<FixedDecimal *>(context);
    566     U_ASSERT(dec->source==number);
    567     return pluralRules->select(*dec);
    568 }
    569 
    570 void PluralFormat::PluralSelectorAdapter::reset() {
    571     delete pluralRules;
    572     pluralRules = NULL;
    573 }
    574 
    575 
    576 U_NAMESPACE_END
    577 
    578 
    579 #endif /* #if !UCONFIG_NO_FORMATTING */
    580 
    581 //eof
    582