Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2009-2012, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 * File PLURFMT.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *******************************************************************************
     13 */
     14 
     15 #include "unicode/messagepattern.h"
     16 #include "unicode/plurfmt.h"
     17 #include "unicode/plurrule.h"
     18 #include "unicode/utypes.h"
     19 #include "cmemory.h"
     20 #include "messageimpl.h"
     21 #include "plurrule_impl.h"
     22 #include "uassert.h"
     23 #include "uhash.h"
     24 
     25 #if !UCONFIG_NO_FORMATTING
     26 
     27 U_NAMESPACE_BEGIN
     28 
     29 static const UChar OTHER_STRING[] = {
     30     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
     31 };
     32 
     33 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
     34 
     35 PluralFormat::PluralFormat(UErrorCode& status)
     36         : locale(Locale::getDefault()),
     37           msgPattern(status),
     38           numberFormat(NULL),
     39           offset(0) {
     40     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     41 }
     42 
     43 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
     44         : locale(loc),
     45           msgPattern(status),
     46           numberFormat(NULL),
     47           offset(0) {
     48     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     49 }
     50 
     51 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
     52         : locale(Locale::getDefault()),
     53           msgPattern(status),
     54           numberFormat(NULL),
     55           offset(0) {
     56     init(&rules, UPLURAL_TYPE_COUNT, status);
     57 }
     58 
     59 PluralFormat::PluralFormat(const Locale& loc,
     60                            const PluralRules& rules,
     61                            UErrorCode& status)
     62         : locale(loc),
     63           msgPattern(status),
     64           numberFormat(NULL),
     65           offset(0) {
     66     init(&rules, UPLURAL_TYPE_COUNT, status);
     67 }
     68 
     69 PluralFormat::PluralFormat(const Locale& loc,
     70                            UPluralType type,
     71                            UErrorCode& status)
     72         : locale(loc),
     73           msgPattern(status),
     74           numberFormat(NULL),
     75           offset(0) {
     76     init(NULL, type, status);
     77 }
     78 
     79 PluralFormat::PluralFormat(const UnicodeString& pat,
     80                            UErrorCode& status)
     81         : locale(Locale::getDefault()),
     82           msgPattern(status),
     83           numberFormat(NULL),
     84           offset(0) {
     85     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     86     applyPattern(pat, status);
     87 }
     88 
     89 PluralFormat::PluralFormat(const Locale& loc,
     90                            const UnicodeString& pat,
     91                            UErrorCode& status)
     92         : locale(loc),
     93           msgPattern(status),
     94           numberFormat(NULL),
     95           offset(0) {
     96     init(NULL, UPLURAL_TYPE_CARDINAL, status);
     97     applyPattern(pat, status);
     98 }
     99 
    100 PluralFormat::PluralFormat(const PluralRules& rules,
    101                            const UnicodeString& pat,
    102                            UErrorCode& status)
    103         : locale(Locale::getDefault()),
    104           msgPattern(status),
    105           numberFormat(NULL),
    106           offset(0) {
    107     init(&rules, UPLURAL_TYPE_COUNT, status);
    108     applyPattern(pat, status);
    109 }
    110 
    111 PluralFormat::PluralFormat(const Locale& loc,
    112                            const PluralRules& rules,
    113                            const UnicodeString& pat,
    114                            UErrorCode& status)
    115         : locale(loc),
    116           msgPattern(status),
    117           numberFormat(NULL),
    118           offset(0) {
    119     init(&rules, UPLURAL_TYPE_COUNT, status);
    120     applyPattern(pat, status);
    121 }
    122 
    123 PluralFormat::PluralFormat(const Locale& loc,
    124                            UPluralType type,
    125                            const UnicodeString& pat,
    126                            UErrorCode& status)
    127         : locale(loc),
    128           msgPattern(status),
    129           numberFormat(NULL),
    130           offset(0) {
    131     init(NULL, type, status);
    132     applyPattern(pat, status);
    133 }
    134 
    135 PluralFormat::PluralFormat(const PluralFormat& other)
    136         : Format(other),
    137           locale(other.locale),
    138           msgPattern(other.msgPattern),
    139           numberFormat(NULL),
    140           offset(other.offset) {
    141     copyObjects(other);
    142 }
    143 
    144 void
    145 PluralFormat::copyObjects(const PluralFormat& other) {
    146     UErrorCode status = U_ZERO_ERROR;
    147     if (numberFormat != NULL) {
    148         delete numberFormat;
    149     }
    150     if (pluralRulesWrapper.pluralRules != NULL) {
    151         delete pluralRulesWrapper.pluralRules;
    152     }
    153 
    154     if (other.numberFormat == NULL) {
    155         numberFormat = NumberFormat::createInstance(locale, status);
    156     } else {
    157         numberFormat = (NumberFormat*)other.numberFormat->clone();
    158     }
    159     if (other.pluralRulesWrapper.pluralRules == NULL) {
    160         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
    161     } else {
    162         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
    163     }
    164 }
    165 
    166 
    167 PluralFormat::~PluralFormat() {
    168     delete numberFormat;
    169 }
    170 
    171 void
    172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
    173     if (U_FAILURE(status)) {
    174         return;
    175     }
    176 
    177     if (rules==NULL) {
    178         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
    179     } else {
    180         pluralRulesWrapper.pluralRules = rules->clone();
    181         if (pluralRulesWrapper.pluralRules == NULL) {
    182             status = U_MEMORY_ALLOCATION_ERROR;
    183             return;
    184         }
    185     }
    186 
    187     numberFormat= NumberFormat::createInstance(locale, status);
    188 }
    189 
    190 void
    191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
    192     msgPattern.parsePluralStyle(newPattern, NULL, status);
    193     if (U_FAILURE(status)) {
    194         msgPattern.clear();
    195         offset = 0;
    196         return;
    197     }
    198     offset = msgPattern.getPluralOffset(0);
    199 }
    200 
    201 UnicodeString&
    202 PluralFormat::format(const Formattable& obj,
    203                    UnicodeString& appendTo,
    204                    FieldPosition& pos,
    205                    UErrorCode& status) const
    206 {
    207     if (U_FAILURE(status)) return appendTo;
    208 
    209     if (obj.isNumeric()) {
    210         return format(obj.getDouble(), appendTo, pos, status);
    211     } else {
    212         status = U_ILLEGAL_ARGUMENT_ERROR;
    213         return appendTo;
    214     }
    215 }
    216 
    217 UnicodeString
    218 PluralFormat::format(int32_t number, UErrorCode& status) const {
    219     FieldPosition fpos(0);
    220     UnicodeString result;
    221     return format(number, result, fpos, status);
    222 }
    223 
    224 UnicodeString
    225 PluralFormat::format(double number, UErrorCode& status) const {
    226     FieldPosition fpos(0);
    227     UnicodeString result;
    228     return format(number, result, fpos, status);
    229 }
    230 
    231 
    232 UnicodeString&
    233 PluralFormat::format(int32_t number,
    234                      UnicodeString& appendTo,
    235                      FieldPosition& pos,
    236                      UErrorCode& status) const {
    237     return format((double)number, appendTo, pos, status);
    238 }
    239 
    240 UnicodeString&
    241 PluralFormat::format(double number,
    242                      UnicodeString& appendTo,
    243                      FieldPosition& pos,
    244                      UErrorCode& status) const {
    245     if (U_FAILURE(status)) {
    246         return appendTo;
    247     }
    248     if (msgPattern.countParts() == 0) {
    249         return numberFormat->format(number, appendTo, pos);
    250     }
    251     // Get the appropriate sub-message.
    252     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number, status);
    253     // Replace syntactic # signs in the top level of this sub-message
    254     // (not in nested arguments) with the formatted number-offset.
    255     const UnicodeString& pattern = msgPattern.getPatternString();
    256     number -= offset;
    257     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
    258     for (;;) {
    259         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
    260         const UMessagePatternPartType type = part.getType();
    261         int32_t index = part.getIndex();
    262         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
    263             return appendTo.append(pattern, prevIndex, index - prevIndex);
    264         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
    265             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
    266             appendTo.append(pattern, prevIndex, index - prevIndex);
    267             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
    268                 numberFormat->format(number, appendTo);
    269             }
    270             prevIndex = part.getLimit();
    271         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
    272             appendTo.append(pattern, prevIndex, index - prevIndex);
    273             prevIndex = index;
    274             partIndex = msgPattern.getLimitPartIndex(partIndex);
    275             index = msgPattern.getPart(partIndex).getLimit();
    276             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
    277             prevIndex = index;
    278         }
    279     }
    280 }
    281 
    282 UnicodeString&
    283 PluralFormat::toPattern(UnicodeString& appendTo) {
    284     if (0 == msgPattern.countParts()) {
    285         appendTo.setToBogus();
    286     } else {
    287         appendTo.append(msgPattern.getPatternString());
    288     }
    289     return appendTo;
    290 }
    291 
    292 void
    293 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
    294     if (U_FAILURE(status)) {
    295         return;
    296     }
    297     locale = loc;
    298     msgPattern.clear();
    299     delete numberFormat;
    300     offset = 0;
    301     numberFormat = NULL;
    302     pluralRulesWrapper.reset();
    303     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    304 }
    305 
    306 void
    307 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
    308     if (U_FAILURE(status)) {
    309         return;
    310     }
    311     NumberFormat* nf = (NumberFormat*)format->clone();
    312     if (nf != NULL) {
    313         delete numberFormat;
    314         numberFormat = nf;
    315     } else {
    316         status = U_MEMORY_ALLOCATION_ERROR;
    317     }
    318 }
    319 
    320 Format*
    321 PluralFormat::clone() const
    322 {
    323     return new PluralFormat(*this);
    324 }
    325 
    326 
    327 PluralFormat&
    328 PluralFormat::operator=(const PluralFormat& other) {
    329     if (this != &other) {
    330         locale = other.locale;
    331         msgPattern = other.msgPattern;
    332         offset = other.offset;
    333         copyObjects(other);
    334     }
    335 
    336     return *this;
    337 }
    338 
    339 UBool
    340 PluralFormat::operator==(const Format& other) const {
    341     if (this == &other) {
    342         return TRUE;
    343     }
    344     if (!Format::operator==(other)) {
    345         return FALSE;
    346     }
    347     const PluralFormat& o = (const PluralFormat&)other;
    348     return
    349         locale == o.locale &&
    350         msgPattern == o.msgPattern &&  // implies same offset
    351         (numberFormat == NULL) == (o.numberFormat == NULL) &&
    352         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
    353         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
    354         (pluralRulesWrapper.pluralRules == NULL ||
    355             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
    356 }
    357 
    358 UBool
    359 PluralFormat::operator!=(const Format& other) const {
    360     return  !operator==(other);
    361 }
    362 
    363 void
    364 PluralFormat::parseObject(const UnicodeString& /*source*/,
    365                         Formattable& /*result*/,
    366                         ParsePosition& pos) const
    367 {
    368     // Parsing not supported.
    369     pos.setErrorIndex(pos.getIndex());
    370 }
    371 
    372 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    373                                      const PluralSelector& selector, double number, UErrorCode& ec) {
    374     if (U_FAILURE(ec)) {
    375         return 0;
    376     }
    377     int32_t count=pattern.countParts();
    378     double offset;
    379     const MessagePattern::Part* part=&pattern.getPart(partIndex);
    380     if (MessagePattern::Part::hasNumericValue(part->getType())) {
    381         offset=pattern.getNumericValue(*part);
    382         ++partIndex;
    383     } else {
    384         offset=0;
    385     }
    386     // The keyword is empty until we need to match against non-explicit, not-"other" value.
    387     // Then we get the keyword from the selector.
    388     // (In other words, we never call the selector if we match against an explicit value,
    389     // or if the only non-explicit keyword is "other".)
    390     UnicodeString keyword;
    391     UnicodeString other(FALSE, OTHER_STRING, 5);
    392     // When we find a match, we set msgStart>0 and also set this boolean to true
    393     // to avoid matching the keyword again (duplicates are allowed)
    394     // while we continue to look for an explicit-value match.
    395     UBool haveKeywordMatch=FALSE;
    396     // msgStart is 0 until we find any appropriate sub-message.
    397     // We remember the first "other" sub-message if we have not seen any
    398     // appropriate sub-message before.
    399     // We remember the first matching-keyword sub-message if we have not seen
    400     // one of those before.
    401     // (The parser allows [does not check for] duplicate keywords.
    402     // We just have to make sure to take the first one.)
    403     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
    404     // at the first keyword match.
    405     // We keep going until we find an explicit-value match or reach the end of the plural style.
    406     int32_t msgStart=0;
    407     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
    408     // until ARG_LIMIT or end of plural-only pattern.
    409     do {
    410         part=&pattern.getPart(partIndex++);
    411         const UMessagePatternPartType type = part->getType();
    412         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    413             break;
    414         }
    415         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
    416         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
    417         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
    418             // explicit value like "=2"
    419             part=&pattern.getPart(partIndex++);
    420             if(number==pattern.getNumericValue(*part)) {
    421                 // matches explicit value
    422                 return partIndex;
    423             }
    424         } else if(!haveKeywordMatch) {
    425             // plural keyword like "few" or "other"
    426             // Compare "other" first and call the selector if this is not "other".
    427             if(pattern.partSubstringMatches(*part, other)) {
    428                 if(msgStart==0) {
    429                     msgStart=partIndex;
    430                     if(0 == keyword.compare(other)) {
    431                         // This is the first "other" sub-message,
    432                         // and the selected keyword is also "other".
    433                         // Do not match "other" again.
    434                         haveKeywordMatch=TRUE;
    435                     }
    436                 }
    437             } else {
    438                 if(keyword.isEmpty()) {
    439                     keyword=selector.select(number-offset, ec);
    440                     if(msgStart!=0 && (0 == keyword.compare(other))) {
    441                         // We have already seen an "other" sub-message.
    442                         // Do not match "other" again.
    443                         haveKeywordMatch=TRUE;
    444                         // Skip keyword matching but do getLimitPartIndex().
    445                     }
    446                 }
    447                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
    448                     // keyword matches
    449                     msgStart=partIndex;
    450                     // Do not match this keyword again.
    451                     haveKeywordMatch=TRUE;
    452                 }
    453             }
    454         }
    455         partIndex=pattern.getLimitPartIndex(partIndex);
    456     } while(++partIndex<count);
    457     return msgStart;
    458 }
    459 
    460 PluralFormat::PluralSelector::~PluralSelector() {}
    461 
    462 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
    463     delete pluralRules;
    464 }
    465 
    466 UnicodeString PluralFormat::PluralSelectorAdapter::select(double number,
    467                                                           UErrorCode& /*ec*/) const {
    468     return pluralRules->select(number);
    469 }
    470 
    471 void PluralFormat::PluralSelectorAdapter::reset() {
    472     delete pluralRules;
    473     pluralRules = NULL;
    474 }
    475 
    476 
    477 U_NAMESPACE_END
    478 
    479 
    480 #endif /* #if !UCONFIG_NO_FORMATTING */
    481 
    482 //eof
    483