Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2009-2011, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 *
      7 * File PLURFMT.CPP
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *******************************************************************************
     13 */
     14 
     15 #include "unicode/messagepattern.h"
     16 #include "unicode/plurfmt.h"
     17 #include "unicode/plurrule.h"
     18 #include "unicode/utypes.h"
     19 #include "cmemory.h"
     20 #include "messageimpl.h"
     21 #include "plurrule_impl.h"
     22 #include "uassert.h"
     23 #include "uhash.h"
     24 
     25 #if !UCONFIG_NO_FORMATTING
     26 
     27 U_NAMESPACE_BEGIN
     28 
     29 static const UChar OTHER_STRING[] = {
     30     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
     31 };
     32 
     33 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
     34 
     35 PluralFormat::PluralFormat(UErrorCode& status)
     36         : locale(Locale::getDefault()),
     37           msgPattern(status),
     38           numberFormat(NULL),
     39           offset(0) {
     40     init(NULL, status);
     41 }
     42 
     43 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
     44         : locale(loc),
     45           msgPattern(status),
     46           numberFormat(NULL),
     47           offset(0) {
     48     init(NULL, status);
     49 }
     50 
     51 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
     52         : locale(Locale::getDefault()),
     53           msgPattern(status),
     54           numberFormat(NULL),
     55           offset(0) {
     56     init(&rules, status);
     57 }
     58 
     59 PluralFormat::PluralFormat(const Locale& loc,
     60                            const PluralRules& rules,
     61                            UErrorCode& status)
     62         : locale(loc),
     63           msgPattern(status),
     64           numberFormat(NULL),
     65           offset(0) {
     66     init(&rules, status);
     67 }
     68 
     69 PluralFormat::PluralFormat(const UnicodeString& pat,
     70                            UErrorCode& status)
     71         : locale(Locale::getDefault()),
     72           msgPattern(status),
     73           numberFormat(NULL),
     74           offset(0) {
     75     init(NULL, status);
     76     applyPattern(pat, status);
     77 }
     78 
     79 PluralFormat::PluralFormat(const Locale& loc,
     80                            const UnicodeString& pat,
     81                            UErrorCode& status)
     82         : locale(loc),
     83           msgPattern(status),
     84           numberFormat(NULL),
     85           offset(0) {
     86     init(NULL, status);
     87     applyPattern(pat, status);
     88 }
     89 
     90 PluralFormat::PluralFormat(const PluralRules& rules,
     91                            const UnicodeString& pat,
     92                            UErrorCode& status)
     93         : locale(Locale::getDefault()),
     94           msgPattern(status),
     95           numberFormat(NULL),
     96           offset(0) {
     97     init(&rules, status);
     98     applyPattern(pat, status);
     99 }
    100 
    101 PluralFormat::PluralFormat(const Locale& loc,
    102                            const PluralRules& rules,
    103                            const UnicodeString& pat,
    104                            UErrorCode& status)
    105         : locale(loc),
    106           msgPattern(status),
    107           numberFormat(NULL),
    108           offset(0) {
    109     init(&rules, status);
    110     applyPattern(pat, status);
    111 }
    112 
    113 PluralFormat::PluralFormat(const PluralFormat& other)
    114         : Format(other),
    115           locale(other.locale),
    116           msgPattern(other.msgPattern),
    117           numberFormat(NULL),
    118           offset(other.offset) {
    119     copyObjects(other);
    120 }
    121 
    122 void
    123 PluralFormat::copyObjects(const PluralFormat& other) {
    124     UErrorCode status = U_ZERO_ERROR;
    125     if (numberFormat != NULL) {
    126         delete numberFormat;
    127     }
    128     if (pluralRulesWrapper.pluralRules != NULL) {
    129         delete pluralRulesWrapper.pluralRules;
    130     }
    131 
    132     if (other.numberFormat == NULL) {
    133         numberFormat = NumberFormat::createInstance(locale, status);
    134     } else {
    135         numberFormat = (NumberFormat*)other.numberFormat->clone();
    136     }
    137     if (other.pluralRulesWrapper.pluralRules == NULL) {
    138         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
    139     } else {
    140         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
    141     }
    142 }
    143 
    144 
    145 PluralFormat::~PluralFormat() {
    146     delete numberFormat;
    147 }
    148 
    149 void
    150 PluralFormat::init(const PluralRules* rules, UErrorCode& status) {
    151     if (U_FAILURE(status)) {
    152         return;
    153     }
    154 
    155     if (rules==NULL) {
    156         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
    157     } else {
    158         pluralRulesWrapper.pluralRules = rules->clone();
    159         if (pluralRulesWrapper.pluralRules == NULL) {
    160             status = U_MEMORY_ALLOCATION_ERROR;
    161             return;
    162         }
    163     }
    164 
    165     numberFormat= NumberFormat::createInstance(locale, status);
    166 }
    167 
    168 void
    169 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
    170     msgPattern.parsePluralStyle(newPattern, NULL, status);
    171     if (U_FAILURE(status)) {
    172         msgPattern.clear();
    173         offset = 0;
    174         return;
    175     }
    176     offset = msgPattern.getPluralOffset(0);
    177 }
    178 
    179 UnicodeString&
    180 PluralFormat::format(const Formattable& obj,
    181                    UnicodeString& appendTo,
    182                    FieldPosition& pos,
    183                    UErrorCode& status) const
    184 {
    185     if (U_FAILURE(status)) return appendTo;
    186 
    187     if (obj.isNumeric()) {
    188         return format(obj.getDouble(), appendTo, pos, status);
    189     } else {
    190         status = U_ILLEGAL_ARGUMENT_ERROR;
    191         return appendTo;
    192     }
    193 }
    194 
    195 UnicodeString
    196 PluralFormat::format(int32_t number, UErrorCode& status) const {
    197     FieldPosition fpos(0);
    198     UnicodeString result;
    199     return format(number, result, fpos, status);
    200 }
    201 
    202 UnicodeString
    203 PluralFormat::format(double number, UErrorCode& status) const {
    204     FieldPosition fpos(0);
    205     UnicodeString result;
    206     return format(number, result, fpos, status);
    207 }
    208 
    209 
    210 UnicodeString&
    211 PluralFormat::format(int32_t number,
    212                      UnicodeString& appendTo,
    213                      FieldPosition& pos,
    214                      UErrorCode& status) const {
    215     return format((double)number, appendTo, pos, status);
    216 }
    217 
    218 UnicodeString&
    219 PluralFormat::format(double number,
    220                      UnicodeString& appendTo,
    221                      FieldPosition& pos,
    222                      UErrorCode& status) const {
    223     if (U_FAILURE(status)) {
    224         return appendTo;
    225     }
    226     if (msgPattern.countParts() == 0) {
    227         return numberFormat->format(number, appendTo, pos);
    228     }
    229     // Get the appropriate sub-message.
    230     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number, status);
    231     // Replace syntactic # signs in the top level of this sub-message
    232     // (not in nested arguments) with the formatted number-offset.
    233     const UnicodeString& pattern = msgPattern.getPatternString();
    234     number -= offset;
    235     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
    236     for (;;) {
    237         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
    238         const UMessagePatternPartType type = part.getType();
    239         int32_t index = part.getIndex();
    240         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
    241             return appendTo.append(pattern, prevIndex, index - prevIndex);
    242         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
    243             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
    244             appendTo.append(pattern, prevIndex, index - prevIndex);
    245             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
    246                 numberFormat->format(number, appendTo);
    247             }
    248             prevIndex = part.getLimit();
    249         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
    250             appendTo.append(pattern, prevIndex, index - prevIndex);
    251             prevIndex = index;
    252             partIndex = msgPattern.getLimitPartIndex(partIndex);
    253             index = msgPattern.getPart(partIndex).getLimit();
    254             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
    255             prevIndex = index;
    256         }
    257     }
    258 }
    259 
    260 UnicodeString&
    261 PluralFormat::toPattern(UnicodeString& appendTo) {
    262     if (0 == msgPattern.countParts()) {
    263         appendTo.setToBogus();
    264     } else {
    265         appendTo.append(msgPattern.getPatternString());
    266     }
    267     return appendTo;
    268 }
    269 
    270 void
    271 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
    272     if (U_FAILURE(status)) {
    273         return;
    274     }
    275     locale = loc;
    276     msgPattern.clear();
    277     delete numberFormat;
    278     offset = 0;
    279     numberFormat = NULL;
    280     pluralRulesWrapper.reset();
    281     init(NULL, status);
    282 }
    283 
    284 void
    285 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
    286     if (U_FAILURE(status)) {
    287         return;
    288     }
    289     NumberFormat* nf = (NumberFormat*)format->clone();
    290     if (nf != NULL) {
    291         delete numberFormat;
    292         numberFormat = nf;
    293     } else {
    294         status = U_MEMORY_ALLOCATION_ERROR;
    295     }
    296 }
    297 
    298 Format*
    299 PluralFormat::clone() const
    300 {
    301     return new PluralFormat(*this);
    302 }
    303 
    304 
    305 PluralFormat&
    306 PluralFormat::operator=(const PluralFormat& other) {
    307     if (this != &other) {
    308         locale = other.locale;
    309         msgPattern = other.msgPattern;
    310         offset = other.offset;
    311         copyObjects(other);
    312     }
    313 
    314     return *this;
    315 }
    316 
    317 UBool
    318 PluralFormat::operator==(const Format& other) const {
    319     if (this == &other) {
    320         return TRUE;
    321     }
    322     if (!Format::operator==(other)) {
    323         return FALSE;
    324     }
    325     const PluralFormat& o = (const PluralFormat&)other;
    326     return
    327         locale == o.locale &&
    328         msgPattern == o.msgPattern &&  // implies same offset
    329         (numberFormat == NULL) == (o.numberFormat == NULL) &&
    330         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
    331         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
    332         (pluralRulesWrapper.pluralRules == NULL ||
    333             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
    334 }
    335 
    336 UBool
    337 PluralFormat::operator!=(const Format& other) const {
    338     return  !operator==(other);
    339 }
    340 
    341 void
    342 PluralFormat::parseObject(const UnicodeString& /*source*/,
    343                         Formattable& /*result*/,
    344                         ParsePosition& pos) const
    345 {
    346     // Parsing not supported.
    347     pos.setErrorIndex(pos.getIndex());
    348 }
    349 
    350 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
    351                                      const PluralSelector& selector, double number, UErrorCode& ec) {
    352     if (U_FAILURE(ec)) {
    353         return 0;
    354     }
    355     int32_t count=pattern.countParts();
    356     double offset;
    357     const MessagePattern::Part* part=&pattern.getPart(partIndex);
    358     if (MessagePattern::Part::hasNumericValue(part->getType())) {
    359         offset=pattern.getNumericValue(*part);
    360         ++partIndex;
    361     } else {
    362         offset=0;
    363     }
    364     // The keyword is empty until we need to match against non-explicit, not-"other" value.
    365     // Then we get the keyword from the selector.
    366     // (In other words, we never call the selector if we match against an explicit value,
    367     // or if the only non-explicit keyword is "other".)
    368     UnicodeString keyword;
    369     UnicodeString other(FALSE, OTHER_STRING, 5);
    370     // When we find a match, we set msgStart>0 and also set this boolean to true
    371     // to avoid matching the keyword again (duplicates are allowed)
    372     // while we continue to look for an explicit-value match.
    373     UBool haveKeywordMatch=FALSE;
    374     // msgStart is 0 until we find any appropriate sub-message.
    375     // We remember the first "other" sub-message if we have not seen any
    376     // appropriate sub-message before.
    377     // We remember the first matching-keyword sub-message if we have not seen
    378     // one of those before.
    379     // (The parser allows [does not check for] duplicate keywords.
    380     // We just have to make sure to take the first one.)
    381     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
    382     // at the first keyword match.
    383     // We keep going until we find an explicit-value match or reach the end of the plural style.
    384     int32_t msgStart=0;
    385     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
    386     // until ARG_LIMIT or end of plural-only pattern.
    387     do {
    388         part=&pattern.getPart(partIndex++);
    389         const UMessagePatternPartType type = part->getType();
    390         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
    391             break;
    392         }
    393         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
    394         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
    395         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
    396             // explicit value like "=2"
    397             part=&pattern.getPart(partIndex++);
    398             if(number==pattern.getNumericValue(*part)) {
    399                 // matches explicit value
    400                 return partIndex;
    401             }
    402         } else if(!haveKeywordMatch) {
    403             // plural keyword like "few" or "other"
    404             // Compare "other" first and call the selector if this is not "other".
    405             if(pattern.partSubstringMatches(*part, other)) {
    406                 if(msgStart==0) {
    407                     msgStart=partIndex;
    408                     if(0 == keyword.compare(other)) {
    409                         // This is the first "other" sub-message,
    410                         // and the selected keyword is also "other".
    411                         // Do not match "other" again.
    412                         haveKeywordMatch=TRUE;
    413                     }
    414                 }
    415             } else {
    416                 if(keyword.isEmpty()) {
    417                     keyword=selector.select(number-offset, ec);
    418                     if(msgStart!=0 && (0 == keyword.compare(other))) {
    419                         // We have already seen an "other" sub-message.
    420                         // Do not match "other" again.
    421                         haveKeywordMatch=TRUE;
    422                         // Skip keyword matching but do getLimitPartIndex().
    423                     }
    424                 }
    425                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
    426                     // keyword matches
    427                     msgStart=partIndex;
    428                     // Do not match this keyword again.
    429                     haveKeywordMatch=TRUE;
    430                 }
    431             }
    432         }
    433         partIndex=pattern.getLimitPartIndex(partIndex);
    434     } while(++partIndex<count);
    435     return msgStart;
    436 }
    437 
    438 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
    439     delete pluralRules;
    440 }
    441 
    442 UnicodeString PluralFormat::PluralSelectorAdapter::select(double number,
    443                                                           UErrorCode& /*ec*/) const {
    444     return pluralRules->select(number);
    445 }
    446 
    447 void PluralFormat::PluralSelectorAdapter::reset() {
    448     delete pluralRules;
    449     pluralRules = NULL;
    450 }
    451 
    452 
    453 U_NAMESPACE_END
    454 
    455 
    456 #endif /* #if !UCONFIG_NO_FORMATTING */
    457 
    458 //eof
    459