Home | History | Annotate | Download | only in i18n
      1 //  2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
      7 
      8 #include "cstring.h"
      9 #include "unicode/ures.h"
     10 #include "uresimp.h"
     11 #include "charstr.h"
     12 #include "number_formatimpl.h"
     13 #include "unicode/numfmt.h"
     14 #include "number_patternstring.h"
     15 #include "number_utils.h"
     16 #include "unicode/numberformatter.h"
     17 #include "unicode/dcfmtsym.h"
     18 #include "number_scientific.h"
     19 #include "number_compact.h"
     20 
     21 using namespace icu;
     22 using namespace icu::number;
     23 using namespace icu::number::impl;
     24 
     25 namespace {
     26 
     27 // NOTE: In Java, the method to get a pattern from the resource bundle exists in NumberFormat.
     28 // In C++, we have to implement that logic here.
     29 // TODO: Make Java and C++ consistent?
     30 
     31 enum CldrPatternStyle {
     32     CLDR_PATTERN_STYLE_DECIMAL,
     33     CLDR_PATTERN_STYLE_CURRENCY,
     34     CLDR_PATTERN_STYLE_ACCOUNTING,
     35     CLDR_PATTERN_STYLE_PERCENT
     36     // TODO: Consider scientific format.
     37 };
     38 
     39 const char16_t *
     40 doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, UErrorCode &publicStatus,
     41              UErrorCode &localStatus) {
     42     // Construct the path into the resource bundle
     43     CharString key;
     44     key.append("NumberElements/", publicStatus);
     45     key.append(nsName, publicStatus);
     46     key.append("/patterns/", publicStatus);
     47     key.append(patternKey, publicStatus);
     48     if (U_FAILURE(publicStatus)) {
     49         return u"";
     50     }
     51     return ures_getStringByKeyWithFallback(res, key.data(), nullptr, &localStatus);
     52 }
     53 
     54 const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, CldrPatternStyle style,
     55                                    UErrorCode &status) {
     56     const char *patternKey;
     57     switch (style) {
     58         case CLDR_PATTERN_STYLE_DECIMAL:
     59             patternKey = "decimalFormat";
     60             break;
     61         case CLDR_PATTERN_STYLE_CURRENCY:
     62             patternKey = "currencyFormat";
     63             break;
     64         case CLDR_PATTERN_STYLE_ACCOUNTING:
     65             patternKey = "accountingFormat";
     66             break;
     67         case CLDR_PATTERN_STYLE_PERCENT:
     68         default:
     69             patternKey = "percentFormat";
     70             break;
     71     }
     72     LocalUResourceBundlePointer res(ures_open(nullptr, locale.getName(), &status));
     73     if (U_FAILURE(status)) { return u""; }
     74 
     75     // Attempt to get the pattern with the native numbering system.
     76     UErrorCode localStatus = U_ZERO_ERROR;
     77     const char16_t *pattern;
     78     pattern = doGetPattern(res.getAlias(), nsName, patternKey, status, localStatus);
     79     if (U_FAILURE(status)) { return u""; }
     80 
     81     // Fall back to latn if native numbering system does not have the right pattern
     82     if (U_FAILURE(localStatus) && uprv_strcmp("latn", nsName) != 0) {
     83         localStatus = U_ZERO_ERROR;
     84         pattern = doGetPattern(res.getAlias(), "latn", patternKey, status, localStatus);
     85         if (U_FAILURE(status)) { return u""; }
     86     }
     87 
     88     return pattern;
     89 }
     90 
     91 inline bool unitIsCurrency(const MeasureUnit &unit) {
     92     return uprv_strcmp("currency", unit.getType()) == 0;
     93 }
     94 
     95 inline bool unitIsNoUnit(const MeasureUnit &unit) {
     96     return uprv_strcmp("none", unit.getType()) == 0;
     97 }
     98 
     99 inline bool unitIsPercent(const MeasureUnit &unit) {
    100     return uprv_strcmp("percent", unit.getSubtype()) == 0;
    101 }
    102 
    103 inline bool unitIsPermille(const MeasureUnit &unit) {
    104     return uprv_strcmp("permille", unit.getSubtype()) == 0;
    105 }
    106 
    107 }  // namespace
    108 
    109 NumberFormatterImpl *NumberFormatterImpl::fromMacros(const MacroProps &macros, UErrorCode &status) {
    110     return new NumberFormatterImpl(macros, true, status);
    111 }
    112 
    113 void NumberFormatterImpl::applyStatic(const MacroProps &macros, DecimalQuantity &inValue,
    114                                       NumberStringBuilder &outString, UErrorCode &status) {
    115     NumberFormatterImpl impl(macros, false, status);
    116     impl.applyUnsafe(inValue, outString, status);
    117 }
    118 
    119 // NOTE: C++ SPECIFIC DIFFERENCE FROM JAVA:
    120 // The "safe" apply method uses a new MicroProps. In the MicroPropsGenerator, fMicros is copied into the new instance.
    121 // The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
    122 // See MicroProps::processQuantity() for details.
    123 
    124 void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &outString,
    125                                 UErrorCode &status) const {
    126     if (U_FAILURE(status)) { return; }
    127     MicroProps micros;
    128     fMicroPropsGenerator->processQuantity(inValue, micros, status);
    129     if (U_FAILURE(status)) { return; }
    130     microsToString(micros, inValue, outString, status);
    131 }
    132 
    133 void NumberFormatterImpl::applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString,
    134                                       UErrorCode &status) {
    135     if (U_FAILURE(status)) { return; }
    136     fMicroPropsGenerator->processQuantity(inValue, fMicros, status);
    137     if (U_FAILURE(status)) { return; }
    138     microsToString(fMicros, inValue, outString, status);
    139 }
    140 
    141 NumberFormatterImpl::NumberFormatterImpl(const MacroProps &macros, bool safe, UErrorCode &status) {
    142     fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status);
    143 }
    144 
    145 //////////
    146 
    147 const MicroPropsGenerator *
    148 NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe, UErrorCode &status) {
    149     const MicroPropsGenerator *chain = &fMicros;
    150 
    151     // Check that macros is error-free before continuing.
    152     if (macros.copyErrorTo(status)) {
    153         return nullptr;
    154     }
    155 
    156     // TODO: Accept currency symbols from DecimalFormatSymbols?
    157 
    158     // Pre-compute a few values for efficiency.
    159     bool isCurrency = unitIsCurrency(macros.unit);
    160     bool isNoUnit = unitIsNoUnit(macros.unit);
    161     bool isPercent = isNoUnit && unitIsPercent(macros.unit);
    162     bool isPermille = isNoUnit && unitIsPermille(macros.unit);
    163     bool isCldrUnit = !isCurrency && !isNoUnit;
    164     bool isAccounting =
    165             macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS;
    166     CurrencyUnit currency(kDefaultCurrency, status);
    167     if (isCurrency) {
    168         currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit
    169     }
    170     UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT;
    171     if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) {
    172         unitWidth = macros.unitWidth;
    173     }
    174 
    175     // Select the numbering system.
    176     LocalPointer<const NumberingSystem> nsLocal;
    177     const NumberingSystem *ns;
    178     if (macros.symbols.isNumberingSystem()) {
    179         ns = macros.symbols.getNumberingSystem();
    180     } else {
    181         // TODO: Is there a way to avoid creating the NumberingSystem object?
    182         ns = NumberingSystem::createInstance(macros.locale, status);
    183         // Give ownership to the function scope.
    184         nsLocal.adoptInstead(ns);
    185     }
    186     const char *nsName = U_SUCCESS(status) ? ns->getName() : "latn";
    187 
    188     // Load and parse the pattern string.  It is used for grouping sizes and affixes only.
    189     CldrPatternStyle patternStyle;
    190     if (isPercent || isPermille) {
    191         patternStyle = CLDR_PATTERN_STYLE_PERCENT;
    192     } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
    193         patternStyle = CLDR_PATTERN_STYLE_DECIMAL;
    194     } else if (isAccounting) {
    195         // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now,
    196         // the API contract allows us to add support to other units in the future.
    197         patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING;
    198     } else {
    199         patternStyle = CLDR_PATTERN_STYLE_CURRENCY;
    200     }
    201     const char16_t *pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status);
    202     auto patternInfo = new ParsedPatternInfo();
    203     fPatternInfo.adoptInstead(patternInfo);
    204     PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status);
    205 
    206     /////////////////////////////////////////////////////////////////////////////////////
    207     /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR ///
    208     /////////////////////////////////////////////////////////////////////////////////////
    209 
    210     // Symbols
    211     if (macros.symbols.isDecimalFormatSymbols()) {
    212         fMicros.symbols = macros.symbols.getDecimalFormatSymbols();
    213     } else {
    214         fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status);
    215         // Give ownership to the NumberFormatterImpl.
    216         fSymbols.adoptInstead(fMicros.symbols);
    217     }
    218 
    219     // Rounding strategy
    220     if (!macros.rounder.isBogus()) {
    221         fMicros.rounding = macros.rounder;
    222     } else if (macros.notation.fType == Notation::NTN_COMPACT) {
    223         fMicros.rounding = Rounder::integer().withMinDigits(2);
    224     } else if (isCurrency) {
    225         fMicros.rounding = Rounder::currency(UCURR_USAGE_STANDARD);
    226     } else {
    227         fMicros.rounding = Rounder::maxFraction(6);
    228     }
    229     fMicros.rounding.setLocaleData(currency, status);
    230 
    231     // Grouping strategy
    232     if (!macros.grouper.isBogus()) {
    233         fMicros.grouping = macros.grouper;
    234     } else if (macros.notation.fType == Notation::NTN_COMPACT) {
    235         // Compact notation uses minGrouping by default since ICU 59
    236         fMicros.grouping = Grouper::minTwoDigits();
    237     } else {
    238         fMicros.grouping = Grouper::defaults();
    239     }
    240     fMicros.grouping.setLocaleData(*fPatternInfo);
    241 
    242     // Padding strategy
    243     if (!macros.padder.isBogus()) {
    244         fMicros.padding = macros.padder;
    245     } else {
    246         fMicros.padding = Padder::none();
    247     }
    248 
    249     // Integer width
    250     if (!macros.integerWidth.isBogus()) {
    251         fMicros.integerWidth = macros.integerWidth;
    252     } else {
    253         fMicros.integerWidth = IntegerWidth::zeroFillTo(1);
    254     }
    255 
    256     // Sign display
    257     if (macros.sign != UNUM_SIGN_COUNT) {
    258         fMicros.sign = macros.sign;
    259     } else {
    260         fMicros.sign = UNUM_SIGN_AUTO;
    261     }
    262 
    263     // Decimal mark display
    264     if (macros.decimal != UNUM_DECIMAL_SEPARATOR_COUNT) {
    265         fMicros.decimal = macros.decimal;
    266     } else {
    267         fMicros.decimal = UNUM_DECIMAL_SEPARATOR_AUTO;
    268     }
    269 
    270     // Use monetary separator symbols
    271     fMicros.useCurrency = isCurrency;
    272 
    273     // Inner modifier (scientific notation)
    274     if (macros.notation.fType == Notation::NTN_SCIENTIFIC) {
    275         fScientificHandler.adoptInstead(new ScientificHandler(&macros.notation, fMicros.symbols, chain));
    276         chain = fScientificHandler.getAlias();
    277     } else {
    278         // No inner modifier required
    279         fMicros.modInner = &fMicros.helpers.emptyStrongModifier;
    280     }
    281 
    282     // Middle modifier (patterns, positive/negative, currency symbols, percent)
    283     auto patternModifier = new MutablePatternModifier(false);
    284     fPatternModifier.adoptInstead(patternModifier);
    285     patternModifier->setPatternInfo(fPatternInfo.getAlias());
    286     patternModifier->setPatternAttributes(fMicros.sign, isPermille);
    287     if (patternModifier->needsPlurals()) {
    288         patternModifier->setSymbols(
    289                 fMicros.symbols,
    290                 currency,
    291                 unitWidth,
    292                 resolvePluralRules(macros.rules, macros.locale, status));
    293     } else {
    294         patternModifier->setSymbols(fMicros.symbols, currency, unitWidth, nullptr);
    295     }
    296     if (safe) {
    297         fImmutablePatternModifier.adoptInstead(patternModifier->createImmutableAndChain(chain, status));
    298         chain = fImmutablePatternModifier.getAlias();
    299     } else {
    300         patternModifier->addToChain(chain);
    301         chain = patternModifier;
    302     }
    303 
    304     // Outer modifier (CLDR units and currency long names)
    305     if (isCldrUnit) {
    306         fLongNameHandler.adoptInstead(
    307                 new LongNameHandler(
    308                         LongNameHandler::forMeasureUnit(
    309                                 macros.locale,
    310                                 macros.unit,
    311                                 unitWidth,
    312                                 resolvePluralRules(macros.rules, macros.locale, status),
    313                                 chain,
    314                                 status)));
    315         chain = fLongNameHandler.getAlias();
    316     } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
    317         fLongNameHandler.adoptInstead(
    318                 new LongNameHandler(
    319                         LongNameHandler::forCurrencyLongNames(
    320                                 macros.locale,
    321                                 currency,
    322                                 resolvePluralRules(macros.rules, macros.locale, status),
    323                                 chain,
    324                                 status)));
    325         chain = fLongNameHandler.getAlias();
    326     } else {
    327         // No outer modifier required
    328         fMicros.modOuter = &fMicros.helpers.emptyWeakModifier;
    329     }
    330 
    331     // Compact notation
    332     // NOTE: Compact notation can (but might not) override the middle modifier and rounding.
    333     // It therefore needs to go at the end of the chain.
    334     if (macros.notation.fType == Notation::NTN_COMPACT) {
    335         CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME)
    336                                   ? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL;
    337         fCompactHandler.adoptInstead(
    338                 new CompactHandler(
    339                         macros.notation.fUnion.compactStyle,
    340                         macros.locale,
    341                         nsName,
    342                         compactType,
    343                         resolvePluralRules(macros.rules, macros.locale, status),
    344                         safe ? patternModifier : nullptr,
    345                         chain,
    346                         status));
    347         chain = fCompactHandler.getAlias();
    348     }
    349 
    350     return chain;
    351 }
    352 
    353 const PluralRules *
    354 NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale,
    355                                         UErrorCode &status) {
    356     if (rulesPtr != nullptr) {
    357         return rulesPtr;
    358     }
    359     // Lazily create PluralRules
    360     if (fRules.isNull()) {
    361         fRules.adoptInstead(PluralRules::forLocale(locale, status));
    362     }
    363     return fRules.getAlias();
    364 }
    365 
    366 int32_t NumberFormatterImpl::microsToString(const MicroProps &micros, DecimalQuantity &quantity,
    367                                             NumberStringBuilder &string, UErrorCode &status) {
    368     micros.rounding.apply(quantity, status);
    369     micros.integerWidth.apply(quantity, status);
    370     int32_t length = writeNumber(micros, quantity, string, status);
    371     // NOTE: When range formatting is added, these modifiers can bubble up.
    372     // For now, apply them all here at once.
    373     // Always apply the inner modifier (which is "strong").
    374     length += micros.modInner->apply(string, 0, length, status);
    375     if (micros.padding.isValid()) {
    376         length += micros.padding
    377                 .padAndApply(*micros.modMiddle, *micros.modOuter, string, 0, length, status);
    378     } else {
    379         length += micros.modMiddle->apply(string, 0, length, status);
    380         length += micros.modOuter->apply(string, 0, length, status);
    381     }
    382     return length;
    383 }
    384 
    385 int32_t NumberFormatterImpl::writeNumber(const MicroProps &micros, DecimalQuantity &quantity,
    386                                          NumberStringBuilder &string, UErrorCode &status) {
    387     int32_t length = 0;
    388     if (quantity.isInfinite()) {
    389         length += string.insert(
    390                 length,
    391                 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol),
    392                 UNUM_INTEGER_FIELD,
    393                 status);
    394 
    395     } else if (quantity.isNaN()) {
    396         length += string.insert(
    397                 length,
    398                 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol),
    399                 UNUM_INTEGER_FIELD,
    400                 status);
    401 
    402     } else {
    403         // Add the integer digits
    404         length += writeIntegerDigits(micros, quantity, string, status);
    405 
    406         // Add the decimal point
    407         if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) {
    408             length += string.insert(
    409                     length,
    410                     micros.useCurrency ? micros.symbols->getSymbol(
    411                             DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros
    412                             .symbols
    413                             ->getSymbol(
    414                                     DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol),
    415                     UNUM_DECIMAL_SEPARATOR_FIELD,
    416                     status);
    417         }
    418 
    419         // Add the fraction digits
    420         length += writeFractionDigits(micros, quantity, string, status);
    421     }
    422 
    423     return length;
    424 }
    425 
    426 int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps &micros, DecimalQuantity &quantity,
    427                                                 NumberStringBuilder &string, UErrorCode &status) {
    428     int length = 0;
    429     int integerCount = quantity.getUpperDisplayMagnitude() + 1;
    430     for (int i = 0; i < integerCount; i++) {
    431         // Add grouping separator
    432         if (micros.grouping.groupAtPosition(i, quantity)) {
    433             length += string.insert(
    434                     0,
    435                     micros.useCurrency ? micros.symbols->getSymbol(
    436                             DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol)
    437                                        : micros.symbols->getSymbol(
    438                             DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol),
    439                     UNUM_GROUPING_SEPARATOR_FIELD,
    440                     status);
    441         }
    442 
    443         // Get and append the next digit value
    444         int8_t nextDigit = quantity.getDigit(i);
    445         length += string.insert(
    446                 0, getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_INTEGER_FIELD, status);
    447     }
    448     return length;
    449 }
    450 
    451 int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps &micros, DecimalQuantity &quantity,
    452                                                  NumberStringBuilder &string, UErrorCode &status) {
    453     int length = 0;
    454     int fractionCount = -quantity.getLowerDisplayMagnitude();
    455     for (int i = 0; i < fractionCount; i++) {
    456         // Get and append the next digit value
    457         int8_t nextDigit = quantity.getDigit(-i - 1);
    458         length += string.append(
    459                 getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_FRACTION_FIELD, status);
    460     }
    461     return length;
    462 }
    463 
    464 #endif /* #if !UCONFIG_NO_FORMATTING */
    465