Home | History | Annotate | Download | only in i18n
      1 //  2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
      7 
      8 #include "resource.h"
      9 #include "number_compact.h"
     10 #include "unicode/ustring.h"
     11 #include "unicode/ures.h"
     12 #include "cstring.h"
     13 #include "charstr.h"
     14 #include "uresimp.h"
     15 
     16 using namespace icu;
     17 using namespace icu::number;
     18 using namespace icu::number::impl;
     19 
     20 namespace {
     21 
     22 // A dummy object used when a "0" compact decimal entry is encountered. This is necessary
     23 // in order to prevent falling back to root. Object equality ("==") is intended.
     24 const UChar *USE_FALLBACK = u"<USE FALLBACK>";
     25 
     26 /** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */
     27 void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType,
     28                                  CharString &sb, UErrorCode &status) {
     29     sb.clear();
     30     sb.append("NumberElements/", status);
     31     sb.append(nsName, status);
     32     sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status);
     33     sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status);
     34 }
     35 
     36 int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) {
     37     return magnitude * StandardPlural::COUNT + plural;
     38 }
     39 
     40 int32_t countZeros(const UChar *patternString, int32_t patternLength) {
     41     // NOTE: This strategy for computing the number of zeros is a hack for efficiency.
     42     // It could break if there are any 0s that aren't part of the main pattern.
     43     int32_t numZeros = 0;
     44     for (int32_t i = 0; i < patternLength; i++) {
     45         if (patternString[i] == u'0') {
     46             numZeros++;
     47         } else if (numZeros > 0) {
     48             break; // zeros should always be contiguous
     49         }
     50     }
     51     return numZeros;
     52 }
     53 
     54 } // namespace
     55 
     56 // NOTE: patterns and multipliers both get zero-initialized.
     57 CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE) {
     58 }
     59 
     60 void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle,
     61                            CompactType compactType, UErrorCode &status) {
     62     CompactDataSink sink(*this);
     63     LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status));
     64     if (U_FAILURE(status)) { return; }
     65 
     66     bool nsIsLatn = strcmp(nsName, "latn") == 0;
     67     bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT;
     68 
     69     // Fall back to latn numbering system and/or short compact style.
     70     CharString resourceKey;
     71     getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status);
     72     UErrorCode localStatus = U_ZERO_ERROR;
     73     ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     74     if (isEmpty && !nsIsLatn) {
     75         getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status);
     76         localStatus = U_ZERO_ERROR;
     77         ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     78     }
     79     if (isEmpty && !compactIsShort) {
     80         getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
     81         localStatus = U_ZERO_ERROR;
     82         ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     83     }
     84     if (isEmpty && !nsIsLatn && !compactIsShort) {
     85         getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
     86         localStatus = U_ZERO_ERROR;
     87         ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
     88     }
     89 
     90     // The last fallback should be guaranteed to return data.
     91     if (isEmpty) {
     92         status = U_INTERNAL_PROGRAM_ERROR;
     93     }
     94 }
     95 
     96 int32_t CompactData::getMultiplier(int32_t magnitude) const {
     97     if (magnitude < 0) {
     98         return 0;
     99     }
    100     if (magnitude > largestMagnitude) {
    101         magnitude = largestMagnitude;
    102     }
    103     return multipliers[magnitude];
    104 }
    105 
    106 const UChar *CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const {
    107     if (magnitude < 0) {
    108         return nullptr;
    109     }
    110     if (magnitude > largestMagnitude) {
    111         magnitude = largestMagnitude;
    112     }
    113     const UChar *patternString = patterns[getIndex(magnitude, plural)];
    114     if (patternString == nullptr && plural != StandardPlural::OTHER) {
    115         // Fall back to "other" plural variant
    116         patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)];
    117     }
    118     if (patternString == USE_FALLBACK) { // == is intended
    119         // Return null if USE_FALLBACK is present
    120         patternString = nullptr;
    121     }
    122     return patternString;
    123 }
    124 
    125 void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const {
    126     U_ASSERT(output.isEmpty());
    127     // NOTE: In C++, this is done more manually with a UVector.
    128     // In Java, we can take advantage of JDK HashSet.
    129     for (auto pattern : patterns) {
    130         if (pattern == nullptr || pattern == USE_FALLBACK) {
    131             continue;
    132         }
    133 
    134         // Insert pattern into the UVector if the UVector does not already contain the pattern.
    135         // Search the UVector from the end since identical patterns are likely to be adjacent.
    136         for (int32_t i = output.size() - 1; i >= 0; i--) {
    137             if (u_strcmp(pattern, static_cast<const UChar *>(output[i])) == 0) {
    138                 goto continue_outer;
    139             }
    140         }
    141 
    142         // The string was not found; add it to the UVector.
    143         // ANDY: This requires a const_cast.  Why?
    144         output.addElement(const_cast<UChar *>(pattern), status);
    145 
    146         continue_outer:
    147         continue;
    148     }
    149 }
    150 
    151 void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UBool /*noFallback*/,
    152                                        UErrorCode &status) {
    153     // traverse into the table of powers of ten
    154     ResourceTable powersOfTenTable = value.getTable(status);
    155     if (U_FAILURE(status)) { return; }
    156     for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
    157 
    158         // Assumes that the keys are always of the form "10000" where the magnitude is the
    159         // length of the key minus one.  We expect magnitudes to be less than MAX_DIGITS.
    160         auto magnitude = static_cast<int8_t> (strlen(key) - 1);
    161         int8_t multiplier = data.multipliers[magnitude];
    162         U_ASSERT(magnitude < COMPACT_MAX_DIGITS);
    163 
    164         // Iterate over the plural variants ("one", "other", etc)
    165         ResourceTable pluralVariantsTable = value.getTable(status);
    166         if (U_FAILURE(status)) { return; }
    167         for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
    168 
    169             // Skip this magnitude/plural if we already have it from a child locale.
    170             // Note: This also skips USE_FALLBACK entries.
    171             StandardPlural::Form plural = StandardPlural::fromString(key, status);
    172             if (U_FAILURE(status)) { return; }
    173             if (data.patterns[getIndex(magnitude, plural)] != nullptr) {
    174                 continue;
    175             }
    176 
    177             // The value "0" means that we need to use the default pattern and not fall back
    178             // to parent locales. Example locale where this is relevant: 'it'.
    179             int32_t patternLength;
    180             const UChar *patternString = value.getString(patternLength, status);
    181             if (U_FAILURE(status)) { return; }
    182             if (u_strcmp(patternString, u"0") == 0) {
    183                 patternString = USE_FALLBACK;
    184                 patternLength = 0;
    185             }
    186 
    187             // Save the pattern string. We will parse it lazily.
    188             data.patterns[getIndex(magnitude, plural)] = patternString;
    189 
    190             // If necessary, compute the multiplier: the difference between the magnitude
    191             // and the number of zeros in the pattern.
    192             if (multiplier == 0) {
    193                 int32_t numZeros = countZeros(patternString, patternLength);
    194                 if (numZeros > 0) { // numZeros==0 in certain cases, like Somali "Kun"
    195                     multiplier = static_cast<int8_t> (numZeros - magnitude - 1);
    196                 }
    197             }
    198         }
    199 
    200         // Save the multiplier.
    201         if (data.multipliers[magnitude] == 0) {
    202             data.multipliers[magnitude] = multiplier;
    203             if (magnitude > data.largestMagnitude) {
    204                 data.largestMagnitude = magnitude;
    205             }
    206             data.isEmpty = false;
    207         } else {
    208             U_ASSERT(data.multipliers[magnitude] == multiplier);
    209         }
    210     }
    211 }
    212 
    213 ///////////////////////////////////////////////////////////
    214 /// END OF CompactData.java; BEGIN CompactNotation.java ///
    215 ///////////////////////////////////////////////////////////
    216 
    217 CompactHandler::CompactHandler(CompactStyle compactStyle, const Locale &locale, const char *nsName,
    218                                CompactType compactType, const PluralRules *rules,
    219                                MutablePatternModifier *buildReference, const MicroPropsGenerator *parent,
    220                                UErrorCode &status)
    221         : rules(rules), parent(parent) {
    222     data.populate(locale, nsName, compactStyle, compactType, status);
    223     if (buildReference != nullptr) {
    224         // Safe code path
    225         precomputeAllModifiers(*buildReference, status);
    226         safe = TRUE;
    227     } else {
    228         // Unsafe code path
    229         safe = FALSE;
    230     }
    231 }
    232 
    233 CompactHandler::~CompactHandler() {
    234     for (int32_t i = 0; i < precomputedModsLength; i++) {
    235         delete precomputedMods[i].mod;
    236     }
    237 }
    238 
    239 void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) {
    240     if (U_FAILURE(status)) { return; }
    241 
    242     // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T
    243     UVector allPatterns(12, status);
    244     if (U_FAILURE(status)) { return; }
    245     data.getUniquePatterns(allPatterns, status);
    246     if (U_FAILURE(status)) { return; }
    247 
    248     // C++ only: ensure that precomputedMods has room.
    249     precomputedModsLength = allPatterns.size();
    250     if (precomputedMods.getCapacity() < precomputedModsLength) {
    251         precomputedMods.resize(allPatterns.size(), status);
    252         if (U_FAILURE(status)) { return; }
    253     }
    254 
    255     for (int32_t i = 0; i < precomputedModsLength; i++) {
    256         auto patternString = static_cast<const UChar *>(allPatterns[i]);
    257         UnicodeString hello(patternString);
    258         CompactModInfo &info = precomputedMods[i];
    259         ParsedPatternInfo patternInfo;
    260         PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
    261         if (U_FAILURE(status)) { return; }
    262         buildReference.setPatternInfo(&patternInfo);
    263         info.mod = buildReference.createImmutable(status);
    264         if (U_FAILURE(status)) { return; }
    265         info.numDigits = patternInfo.positive.integerTotal;
    266         info.patternString = patternString;
    267     }
    268 }
    269 
    270 void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
    271                                      UErrorCode &status) const {
    272     parent->processQuantity(quantity, micros, status);
    273     if (U_FAILURE(status)) { return; }
    274 
    275     // Treat zero as if it had magnitude 0
    276     int magnitude;
    277     if (quantity.isZero()) {
    278         magnitude = 0;
    279         micros.rounding.apply(quantity, status);
    280     } else {
    281         // TODO: Revisit chooseMultiplierAndApply
    282         int multiplier = micros.rounding.chooseMultiplierAndApply(quantity, data, status);
    283         magnitude = quantity.isZero() ? 0 : quantity.getMagnitude();
    284         magnitude -= multiplier;
    285     }
    286 
    287     StandardPlural::Form plural = quantity.getStandardPlural(rules);
    288     const UChar *patternString = data.getPattern(magnitude, plural);
    289     int numDigits = -1;
    290     if (patternString == nullptr) {
    291         // Use the default (non-compact) modifier.
    292         // No need to take any action.
    293     } else if (safe) {
    294         // Safe code path.
    295         // Java uses a hash set here for O(1) lookup.  C++ uses a linear search.
    296         // TODO: Benchmark this and maybe change to a binary search or hash table.
    297         int32_t i = 0;
    298         for (; i < precomputedModsLength; i++) {
    299             const CompactModInfo &info = precomputedMods[i];
    300             if (u_strcmp(patternString, info.patternString) == 0) {
    301                 info.mod->applyToMicros(micros, quantity);
    302                 numDigits = info.numDigits;
    303                 break;
    304             }
    305         }
    306         // It should be guaranteed that we found the entry.
    307         U_ASSERT(i < precomputedModsLength);
    308     } else {
    309         // Unsafe code path.
    310         // Overwrite the PatternInfo in the existing modMiddle.
    311         // C++ Note: Use unsafePatternInfo for proper lifecycle.
    312         ParsedPatternInfo &patternInfo = const_cast<CompactHandler *>(this)->unsafePatternInfo;
    313         PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
    314         static_cast<MutablePatternModifier*>(const_cast<Modifier*>(micros.modMiddle))
    315             ->setPatternInfo(&patternInfo);
    316         numDigits = patternInfo.positive.integerTotal;
    317     }
    318 
    319     // FIXME: Deal with numDigits == 0 (Awaiting a test case)
    320     (void)numDigits;
    321 
    322     // We already performed rounding. Do not perform it again.
    323     micros.rounding = Rounder::constructPassThrough();
    324 }
    325 
    326 #endif /* #if !UCONFIG_NO_FORMATTING */
    327