Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1997-2015, International Business Machines Corporation and    *
      6 * others. All Rights Reserved.                                                *
      7 *******************************************************************************
      8 *
      9 * File COMPACTDECIMALFORMAT.CPP
     10 *
     11 ********************************************************************************
     12 */
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_FORMATTING
     16 
     17 #include "charstr.h"
     18 #include "cstring.h"
     19 #include "digitlst.h"
     20 #include "mutex.h"
     21 #include "unicode/compactdecimalformat.h"
     22 #include "unicode/numsys.h"
     23 #include "unicode/plurrule.h"
     24 #include "unicode/ures.h"
     25 #include "ucln_in.h"
     26 #include "uhash.h"
     27 #include "umutex.h"
     28 #include "unicode/ures.h"
     29 #include "uresimp.h"
     30 
     31 // Maps locale name to CDFLocaleData struct.
     32 static UHashtable* gCompactDecimalData = NULL;
     33 static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
     34 
     35 U_NAMESPACE_BEGIN
     36 
     37 static const int32_t MAX_DIGITS = 15;
     38 static const char gOther[] = "other";
     39 static const char gLatnTag[] = "latn";
     40 static const char gNumberElementsTag[] = "NumberElements";
     41 static const char gDecimalFormatTag[] = "decimalFormat";
     42 static const char gPatternsShort[] = "patternsShort";
     43 static const char gPatternsLong[] = "patternsLong";
     44 static const char gLatnPath[] = "NumberElements/latn";
     45 
     46 static const UChar u_0 = 0x30;
     47 static const UChar u_apos = 0x27;
     48 
     49 static const UChar kZero[] = {u_0};
     50 
     51 // Used to unescape single quotes.
     52 enum QuoteState {
     53   OUTSIDE,
     54   INSIDE_EMPTY,
     55   INSIDE_FULL
     56 };
     57 
     58 enum FallbackFlags {
     59   ANY = 0,
     60   MUST = 1,
     61   NOT_ROOT = 2
     62   // Next one will be 4 then 6 etc.
     63 };
     64 
     65 
     66 // CDFUnit represents a prefix-suffix pair for a particular variant
     67 // and log10 value.
     68 struct CDFUnit : public UMemory {
     69   UnicodeString prefix;
     70   UnicodeString suffix;
     71   inline CDFUnit() : prefix(), suffix() {
     72     prefix.setToBogus();
     73   }
     74   inline ~CDFUnit() {}
     75   inline UBool isSet() const {
     76     return !prefix.isBogus();
     77   }
     78   inline void markAsSet() {
     79     prefix.remove();
     80   }
     81 };
     82 
     83 // CDFLocaleStyleData contains formatting data for a particular locale
     84 // and style.
     85 class CDFLocaleStyleData : public UMemory {
     86  public:
     87   // What to divide by for each log10 value when formatting. These values
     88   // will be powers of 10. For English, would be:
     89   // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
     90   double divisors[MAX_DIGITS];
     91   // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
     92   // To format a number x,
     93   // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
     94   // Compute the plural variant for displayNum
     95   // (e.g zero, one, two, few, many, other).
     96   // Compute cdfUnits = unitsByVariant[pluralVariant].
     97   // Prefix and suffix to use at cdfUnits[log10(x)]
     98   UHashtable* unitsByVariant;
     99   // A flag for whether or not this CDFLocaleStyleData was loaded from the
    100   // Latin numbering system as a fallback from the locale numbering system.
    101   // This value is meaningless if the object is bogus or empty.
    102   UBool fromFallback;
    103   inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) {
    104     uprv_memset(divisors, 0, sizeof(divisors));
    105   }
    106   ~CDFLocaleStyleData();
    107   // Init initializes this object.
    108   void Init(UErrorCode& status);
    109   inline UBool isBogus() const {
    110     return unitsByVariant == NULL;
    111   }
    112   void setToBogus();
    113   UBool isEmpty() {
    114     return unitsByVariant == NULL || unitsByVariant->count == 0;
    115   }
    116  private:
    117   CDFLocaleStyleData(const CDFLocaleStyleData&);
    118   CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
    119 };
    120 
    121 // CDFLocaleData contains formatting data for a particular locale.
    122 struct CDFLocaleData : public UMemory {
    123   CDFLocaleStyleData shortData;
    124   CDFLocaleStyleData longData;
    125   inline CDFLocaleData() : shortData(), longData() { }
    126   inline ~CDFLocaleData() { }
    127   // Init initializes this object.
    128   void Init(UErrorCode& status);
    129 };
    130 
    131 U_NAMESPACE_END
    132 
    133 U_CDECL_BEGIN
    134 
    135 static UBool U_CALLCONV cdf_cleanup(void) {
    136   if (gCompactDecimalData != NULL) {
    137     uhash_close(gCompactDecimalData);
    138     gCompactDecimalData = NULL;
    139   }
    140   return TRUE;
    141 }
    142 
    143 static void U_CALLCONV deleteCDFUnits(void* ptr) {
    144   delete [] (icu::CDFUnit*) ptr;
    145 }
    146 
    147 static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
    148   delete (icu::CDFLocaleData*) ptr;
    149 }
    150 
    151 U_CDECL_END
    152 
    153 U_NAMESPACE_BEGIN
    154 
    155 static UBool divisors_equal(const double* lhs, const double* rhs);
    156 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
    157 
    158 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
    159 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
    160 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
    161 static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status);
    162 static double calculateDivisor(double power10, int32_t numZeros);
    163 static UBool onlySpaces(UnicodeString u);
    164 static void fixQuotes(UnicodeString& s);
    165 static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status);
    166 static void fillInMissing(CDFLocaleStyleData* result);
    167 static int32_t computeLog10(double x, UBool inRange);
    168 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
    169 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
    170 
    171 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
    172 
    173 CompactDecimalFormat::CompactDecimalFormat(
    174     const DecimalFormat& decimalFormat,
    175     const UHashtable* unitsByVariant,
    176     const double* divisors,
    177     PluralRules* pluralRules)
    178   : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
    179 }
    180 
    181 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
    182     : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
    183 }
    184 
    185 CompactDecimalFormat* U_EXPORT2
    186 CompactDecimalFormat::createInstance(
    187     const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
    188   LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
    189   if (U_FAILURE(status)) {
    190     return NULL;
    191   }
    192   LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
    193   if (U_FAILURE(status)) {
    194     return NULL;
    195   }
    196   const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
    197   if (U_FAILURE(status)) {
    198     return NULL;
    199   }
    200   CompactDecimalFormat* result =
    201       new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
    202   if (result == NULL) {
    203     status = U_MEMORY_ALLOCATION_ERROR;
    204     return NULL;
    205   }
    206   pluralRules.orphan();
    207   result->setMaximumSignificantDigits(3);
    208   result->setSignificantDigitsUsed(TRUE);
    209   result->setGroupingUsed(FALSE);
    210   return result;
    211 }
    212 
    213 CompactDecimalFormat&
    214 CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
    215   if (this != &rhs) {
    216     DecimalFormat::operator=(rhs);
    217     _unitsByVariant = rhs._unitsByVariant;
    218     _divisors = rhs._divisors;
    219     delete _pluralRules;
    220     _pluralRules = rhs._pluralRules->clone();
    221   }
    222   return *this;
    223 }
    224 
    225 CompactDecimalFormat::~CompactDecimalFormat() {
    226   delete _pluralRules;
    227 }
    228 
    229 
    230 Format*
    231 CompactDecimalFormat::clone(void) const {
    232   return new CompactDecimalFormat(*this);
    233 }
    234 
    235 UBool
    236 CompactDecimalFormat::operator==(const Format& that) const {
    237   if (this == &that) {
    238     return TRUE;
    239   }
    240   return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
    241 }
    242 
    243 UBool
    244 CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
    245   return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
    246 }
    247 
    248 UnicodeString&
    249 CompactDecimalFormat::format(
    250     double number,
    251     UnicodeString& appendTo,
    252     FieldPosition& pos) const {
    253   UErrorCode status = U_ZERO_ERROR;
    254   return format(number, appendTo, pos, status);
    255 }
    256 
    257 UnicodeString&
    258 CompactDecimalFormat::format(
    259     double number,
    260     UnicodeString& appendTo,
    261     FieldPosition& pos,
    262     UErrorCode &status) const {
    263   if (U_FAILURE(status)) {
    264     return appendTo;
    265   }
    266   DigitList orig, rounded;
    267   orig.set(number);
    268   UBool isNegative;
    269   _round(orig, rounded, isNegative, status);
    270   if (U_FAILURE(status)) {
    271     return appendTo;
    272   }
    273   double roundedDouble = rounded.getDouble();
    274   if (isNegative) {
    275     roundedDouble = -roundedDouble;
    276   }
    277   int32_t baseIdx = computeLog10(roundedDouble, TRUE);
    278   double numberToFormat = roundedDouble / _divisors[baseIdx];
    279   UnicodeString variant = _pluralRules->select(numberToFormat);
    280   if (isNegative) {
    281     numberToFormat = -numberToFormat;
    282   }
    283   const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
    284   appendTo += unit->prefix;
    285   DecimalFormat::format(numberToFormat, appendTo, pos);
    286   appendTo += unit->suffix;
    287   return appendTo;
    288 }
    289 
    290 UnicodeString&
    291 CompactDecimalFormat::format(
    292     double /* number */,
    293     UnicodeString& appendTo,
    294     FieldPositionIterator* /* posIter */,
    295     UErrorCode& status) const {
    296   status = U_UNSUPPORTED_ERROR;
    297   return appendTo;
    298 }
    299 
    300 UnicodeString&
    301 CompactDecimalFormat::format(
    302     int32_t number,
    303     UnicodeString& appendTo,
    304     FieldPosition& pos) const {
    305   return format((double) number, appendTo, pos);
    306 }
    307 
    308 UnicodeString&
    309 CompactDecimalFormat::format(
    310     int32_t number,
    311     UnicodeString& appendTo,
    312     FieldPosition& pos,
    313     UErrorCode &status) const {
    314   return format((double) number, appendTo, pos, status);
    315 }
    316 
    317 UnicodeString&
    318 CompactDecimalFormat::format(
    319     int32_t /* number */,
    320     UnicodeString& appendTo,
    321     FieldPositionIterator* /* posIter */,
    322     UErrorCode& status) const {
    323   status = U_UNSUPPORTED_ERROR;
    324   return appendTo;
    325 }
    326 
    327 UnicodeString&
    328 CompactDecimalFormat::format(
    329     int64_t number,
    330     UnicodeString& appendTo,
    331     FieldPosition& pos) const {
    332   return format((double) number, appendTo, pos);
    333 }
    334 
    335 UnicodeString&
    336 CompactDecimalFormat::format(
    337     int64_t number,
    338     UnicodeString& appendTo,
    339     FieldPosition& pos,
    340     UErrorCode &status) const {
    341   return format((double) number, appendTo, pos, status);
    342 }
    343 
    344 UnicodeString&
    345 CompactDecimalFormat::format(
    346     int64_t /* number */,
    347     UnicodeString& appendTo,
    348     FieldPositionIterator* /* posIter */,
    349     UErrorCode& status) const {
    350   status = U_UNSUPPORTED_ERROR;
    351   return appendTo;
    352 }
    353 
    354 UnicodeString&
    355 CompactDecimalFormat::format(
    356     StringPiece /* number */,
    357     UnicodeString& appendTo,
    358     FieldPositionIterator* /* posIter */,
    359     UErrorCode& status) const {
    360   status = U_UNSUPPORTED_ERROR;
    361   return appendTo;
    362 }
    363 
    364 UnicodeString&
    365 CompactDecimalFormat::format(
    366     const DigitList& /* number */,
    367     UnicodeString& appendTo,
    368     FieldPositionIterator* /* posIter */,
    369     UErrorCode& status) const {
    370   status = U_UNSUPPORTED_ERROR;
    371   return appendTo;
    372 }
    373 
    374 UnicodeString&
    375 CompactDecimalFormat::format(const DigitList& /* number */,
    376                              UnicodeString& appendTo,
    377                              FieldPosition& /* pos */,
    378                              UErrorCode& status) const {
    379   status = U_UNSUPPORTED_ERROR;
    380   return appendTo;
    381 }
    382 
    383 void
    384 CompactDecimalFormat::parse(
    385     const UnicodeString& /* text */,
    386     Formattable& /* result */,
    387     ParsePosition& /* parsePosition */) const {
    388 }
    389 
    390 void
    391 CompactDecimalFormat::parse(
    392     const UnicodeString& /* text */,
    393     Formattable& /* result */,
    394     UErrorCode& status) const {
    395   status = U_UNSUPPORTED_ERROR;
    396 }
    397 
    398 CurrencyAmount*
    399 CompactDecimalFormat::parseCurrency(
    400     const UnicodeString& /* text */,
    401     ParsePosition& /* pos */) const {
    402   return NULL;
    403 }
    404 
    405 void CDFLocaleStyleData::Init(UErrorCode& status) {
    406   if (unitsByVariant != NULL) {
    407     return;
    408   }
    409   unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
    410   if (U_FAILURE(status)) {
    411     return;
    412   }
    413   uhash_setKeyDeleter(unitsByVariant, uprv_free);
    414   uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
    415 }
    416 
    417 CDFLocaleStyleData::~CDFLocaleStyleData() {
    418   setToBogus();
    419 }
    420 
    421 void CDFLocaleStyleData::setToBogus() {
    422   if (unitsByVariant != NULL) {
    423     uhash_close(unitsByVariant);
    424     unitsByVariant = NULL;
    425   }
    426 }
    427 
    428 void CDFLocaleData::Init(UErrorCode& status) {
    429   shortData.Init(status);
    430   if (U_FAILURE(status)) {
    431     return;
    432   }
    433   longData.Init(status);
    434 }
    435 
    436 // Helper method for operator=
    437 static UBool divisors_equal(const double* lhs, const double* rhs) {
    438   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
    439     if (lhs[i] != rhs[i]) {
    440       return FALSE;
    441     }
    442   }
    443   return TRUE;
    444 }
    445 
    446 // getCDFLocaleStyleData returns pointer to formatting data for given locale and
    447 // style within the global cache. On cache miss, getCDFLocaleStyleData loads
    448 // the data from CLDR into the global cache before returning the pointer. If a
    449 // UNUM_LONG data is requested for a locale, and that locale does not have
    450 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
    451 // that locale.
    452 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
    453   if (U_FAILURE(status)) {
    454     return NULL;
    455   }
    456   CDFLocaleData* result = NULL;
    457   const char* key = inLocale.getName();
    458   {
    459     Mutex lock(&gCompactDecimalMetaLock);
    460     if (gCompactDecimalData == NULL) {
    461       gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
    462       if (U_FAILURE(status)) {
    463         return NULL;
    464       }
    465       uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
    466       uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
    467       ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
    468     } else {
    469       result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
    470     }
    471   }
    472   if (result != NULL) {
    473     return extractDataByStyleEnum(*result, style, status);
    474   }
    475 
    476   result = loadCDFLocaleData(inLocale, status);
    477   if (U_FAILURE(status)) {
    478     return NULL;
    479   }
    480 
    481   {
    482     Mutex lock(&gCompactDecimalMetaLock);
    483     CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
    484     if (temp != NULL) {
    485       delete result;
    486       result = temp;
    487     } else {
    488       uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
    489       if (U_FAILURE(status)) {
    490         return NULL;
    491       }
    492     }
    493   }
    494   return extractDataByStyleEnum(*result, style, status);
    495 }
    496 
    497 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
    498   switch (style) {
    499     case UNUM_SHORT:
    500       return &data.shortData;
    501     case UNUM_LONG:
    502       if (!data.longData.isBogus()) {
    503         return &data.longData;
    504       }
    505       return &data.shortData;
    506     default:
    507       status = U_ILLEGAL_ARGUMENT_ERROR;
    508       return NULL;
    509   }
    510 }
    511 
    512 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
    513 // caller owns the returned pointer.
    514 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
    515   if (U_FAILURE(status)) {
    516     return NULL;
    517   }
    518   CDFLocaleData* result = new CDFLocaleData;
    519   if (result == NULL) {
    520     status = U_MEMORY_ALLOCATION_ERROR;
    521     return NULL;
    522   }
    523   result->Init(status);
    524   if (U_FAILURE(status)) {
    525     delete result;
    526     return NULL;
    527   }
    528 
    529   load(inLocale, result, status);
    530 
    531   if (U_FAILURE(status)) {
    532     delete result;
    533     return NULL;
    534   }
    535   return result;
    536 }
    537 
    538 namespace {
    539 
    540 struct CmptDecDataSink : public ResourceSink {
    541 
    542   CDFLocaleData& dataBundle; // Where to save values when they are read
    543   UBool isLatin; // Whether or not we are traversing the Latin tree
    544   UBool isFallback; // Whether or not we are traversing the Latin tree as fallback
    545 
    546   enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG };
    547   enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT };
    548 
    549   /*
    550    * NumberElements{              <-- top (numbering system table)
    551    *  latn{                       <-- patternsTable (one per numbering system)
    552    *    patternsLong{             <-- formatsTable (one per pattern)
    553    *      decimalFormat{          <-- powersOfTenTable (one per format)
    554    *        1000{                 <-- pluralVariantsTable (one per power of ten)
    555    *          one{"0 thousand"}   <-- plural variant and template
    556    */
    557 
    558   CmptDecDataSink(CDFLocaleData& _dataBundle)
    559     : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {}
    560   virtual ~CmptDecDataSink();
    561 
    562   virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) {
    563     // SPECIAL CASE: Don't consume root in the non-Latin numbering system
    564     if (isRoot && !isLatin) { return; }
    565 
    566     ResourceTable patternsTable = value.getTable(errorCode);
    567     if (U_FAILURE(errorCode)) { return; }
    568     for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) {
    569 
    570       // Check for patternsShort or patternsLong
    571       EPatternsTableKey patternsTableKey;
    572       if (uprv_strcmp(key, gPatternsShort) == 0) {
    573         patternsTableKey = PATTERNS_SHORT;
    574       } else if (uprv_strcmp(key, gPatternsLong) == 0) {
    575         patternsTableKey = PATTERNS_LONG;
    576       } else {
    577         continue;
    578       }
    579 
    580       // Traverse into the formats table
    581       ResourceTable formatsTable = value.getTable(errorCode);
    582       if (U_FAILURE(errorCode)) { return; }
    583       for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) {
    584 
    585         // Check for decimalFormat or currencyFormat
    586         EFormatsTableKey formatsTableKey;
    587         if (uprv_strcmp(key, gDecimalFormatTag) == 0) {
    588           formatsTableKey = DECIMAL_FORMAT;
    589         // TODO: Enable this statement when currency support is added
    590         // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) {
    591         //   formatsTableKey = CURRENCY_FORMAT;
    592         } else {
    593           continue;
    594         }
    595 
    596         // Set the current style and destination based on the two keys
    597         UNumberCompactStyle style;
    598         CDFLocaleStyleData* destination = NULL;
    599         if (patternsTableKey == PATTERNS_LONG
    600             && formatsTableKey == DECIMAL_FORMAT) {
    601           style = UNUM_LONG;
    602           destination = &dataBundle.longData;
    603         } else if (patternsTableKey == PATTERNS_SHORT
    604             && formatsTableKey == DECIMAL_FORMAT) {
    605           style = UNUM_SHORT;
    606           destination = &dataBundle.shortData;
    607         // TODO: Enable the following statements when currency support is added
    608         // } else if (patternsTableKey == PATTERNS_SHORT
    609         //     && formatsTableKey == CURRENCY_FORMAT) {
    610         //   style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named
    611         //   destination = &dataBundle.shortCurrencyData;
    612         // } else {
    613         //   // Silently ignore this case
    614         //   continue;
    615         }
    616 
    617         // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE:
    618         //   1) Don't consume longData if shortData was consumed from the non-Latin
    619         //      locale numbering system
    620         //   2) Don't consume longData for the first time if this is the root bundle and
    621         //      shortData is already populated from a more specific locale. Note that if
    622         //      both longData and shortData are both only in root, longData will be
    623         //      consumed since it is alphabetically before shortData in the bundle.
    624         if (isFallback
    625                 && style == UNUM_LONG
    626                 && !dataBundle.shortData.isEmpty()
    627                 && !dataBundle.shortData.fromFallback) {
    628             continue;
    629         }
    630         if (isRoot
    631                 && style == UNUM_LONG
    632                 && dataBundle.longData.isEmpty()
    633                 && !dataBundle.shortData.isEmpty()) {
    634             continue;
    635         }
    636 
    637         // Set the "fromFallback" flag on the data object
    638         destination->fromFallback = isFallback;
    639 
    640         // Traverse into the powers of ten table
    641         ResourceTable powersOfTenTable = value.getTable(errorCode);
    642         if (U_FAILURE(errorCode)) { return; }
    643         for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
    644 
    645           // The key will always be some even power of 10. e.g 10000.
    646           char* endPtr = NULL;
    647           double power10 = uprv_strtod(key, &endPtr);
    648           if (*endPtr != 0) {
    649             errorCode = U_INTERNAL_PROGRAM_ERROR;
    650             return;
    651           }
    652           int32_t log10Value = computeLog10(power10, FALSE);
    653 
    654           // Silently ignore divisors that are too big.
    655           if (log10Value >= MAX_DIGITS) continue;
    656 
    657           // Iterate over the plural variants ("one", "other", etc)
    658           ResourceTable pluralVariantsTable = value.getTable(errorCode);
    659           if (U_FAILURE(errorCode)) { return; }
    660           for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
    661             const char* pluralVariant = key;
    662             const UnicodeString formatStr = value.getUnicodeString(errorCode);
    663 
    664             // Copy the data into the in-memory data bundle (do not overwrite
    665             // existing values)
    666             int32_t numZeros = populatePrefixSuffix(
    667                 pluralVariant, log10Value, formatStr,
    668                 destination->unitsByVariant, FALSE, errorCode);
    669 
    670             // If populatePrefixSuffix returns -1, it means that this key has been
    671             // encountered already.
    672             if (numZeros < 0) {
    673               continue;
    674             }
    675 
    676             // Set the divisor, which is based on the number of zeros in the template
    677             // string.  If the divisor from here is different from the one previously
    678             // stored, it means that the number of zeros in different plural variants
    679             // differs; throw an exception.
    680             // TODO: How should I check for floating-point errors here?
    681             //       Is there a good reason why "divisor" is double and not long like Java?
    682             double divisor = calculateDivisor(power10, numZeros);
    683             if (destination->divisors[log10Value] != 0.0
    684                 && destination->divisors[log10Value] != divisor) {
    685               errorCode = U_INTERNAL_PROGRAM_ERROR;
    686               return;
    687             }
    688             destination->divisors[log10Value] = divisor;
    689           }
    690         }
    691       }
    692     }
    693   }
    694 };
    695 
    696 // Virtual destructors must be defined out of line.
    697 CmptDecDataSink::~CmptDecDataSink() {}
    698 
    699 } // namespace
    700 
    701 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
    702   LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
    703   if (U_FAILURE(status)) {
    704     return;
    705   }
    706   const char* nsName = ns->getName();
    707 
    708   LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status));
    709   if (U_FAILURE(status)) {
    710     return;
    711   }
    712   CmptDecDataSink sink(*result);
    713   sink.isFallback = FALSE;
    714 
    715   // First load the number elements data if nsName is not Latin.
    716   if (uprv_strcmp(nsName, gLatnTag) != 0) {
    717     sink.isLatin = FALSE;
    718     CharString path;
    719     path.append(gNumberElementsTag, status)
    720         .append('/', status)
    721         .append(nsName, status);
    722     ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status);
    723     if (status == U_MISSING_RESOURCE_ERROR) {
    724       // Silently ignore and use Latin
    725       status = U_ZERO_ERROR;
    726     } else if  (U_FAILURE(status)) {
    727       return;
    728     }
    729     sink.isFallback = TRUE;
    730   }
    731 
    732   // Now load Latin.
    733   sink.isLatin = TRUE;
    734   ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status);
    735   if (U_FAILURE(status)) return;
    736 
    737   // If longData is empty, default it to be equal to shortData
    738   if (result->longData.isEmpty()) {
    739     result->longData.setToBogus();
    740   }
    741 
    742   // Check for "other" variants in each of the three data classes, and resolve missing elements.
    743 
    744   if (!result->longData.isBogus()) {
    745     checkForOtherVariants(&result->longData, status);
    746     if (U_FAILURE(status)) return;
    747     fillInMissing(&result->longData);
    748   }
    749 
    750   checkForOtherVariants(&result->shortData, status);
    751   if (U_FAILURE(status)) return;
    752   fillInMissing(&result->shortData);
    753 
    754   // TODO: Enable this statement when currency support is added
    755   // checkForOtherVariants(&result->shortCurrencyData, status);
    756   // if (U_FAILURE(status)) return;
    757   // fillInMissing(&result->shortCurrencyData);
    758 }
    759 
    760 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
    761 // given variant and log10 value.
    762 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
    763 // formatStr is the format string from which the prefix and suffix are
    764 // extracted. It is usually of form 'Pefix 000 suffix'.
    765 // populatePrefixSuffix returns the number of 0's found in formatStr
    766 // before the decimal point.
    767 // In the special case that formatStr contains only spaces for prefix
    768 // and suffix, populatePrefixSuffix returns log10Value + 1.
    769 static int32_t populatePrefixSuffix(
    770     const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) {
    771   if (U_FAILURE(status)) {
    772     return 0;
    773   }
    774   int32_t firstIdx = formatStr.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
    775   // We must have 0's in format string.
    776   if (firstIdx == -1) {
    777     status = U_INTERNAL_PROGRAM_ERROR;
    778     return 0;
    779   }
    780   int32_t lastIdx = formatStr.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
    781   CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
    782   if (U_FAILURE(status)) {
    783     return 0;
    784   }
    785 
    786   // Return -1 if we are not overwriting an existing value
    787   if (unit->isSet() && !overwrite) {
    788     return -1;
    789   }
    790   unit->markAsSet();
    791 
    792   // Everything up to first 0 is the prefix
    793   unit->prefix = formatStr.tempSubString(0, firstIdx);
    794   fixQuotes(unit->prefix);
    795   // Everything beyond the last 0 is the suffix
    796   unit->suffix = formatStr.tempSubString(lastIdx + 1);
    797   fixQuotes(unit->suffix);
    798 
    799   // If there is effectively no prefix or suffix, ignore the actual number of
    800   // 0's and act as if the number of 0's matches the size of the number.
    801   if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
    802     return log10Value + 1;
    803   }
    804 
    805   // Calculate number of zeros before decimal point
    806   int32_t idx = firstIdx + 1;
    807   while (idx <= lastIdx && formatStr.charAt(idx) == u_0) {
    808     ++idx;
    809   }
    810   return (idx - firstIdx);
    811 }
    812 
    813 // Calculate a divisor based on the magnitude and number of zeros in the
    814 // template string.
    815 static double calculateDivisor(double power10, int32_t numZeros) {
    816   double divisor = power10;
    817   for (int32_t i = 1; i < numZeros; ++i) {
    818     divisor /= 10.0;
    819   }
    820   return divisor;
    821 }
    822 
    823 static UBool onlySpaces(UnicodeString u) {
    824   return u.trim().length() == 0;
    825 }
    826 
    827 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
    828 // Modifies s in place.
    829 static void fixQuotes(UnicodeString& s) {
    830   QuoteState state = OUTSIDE;
    831   int32_t len = s.length();
    832   int32_t dest = 0;
    833   for (int32_t i = 0; i < len; ++i) {
    834     UChar ch = s.charAt(i);
    835     if (ch == u_apos) {
    836       if (state == INSIDE_EMPTY) {
    837         s.setCharAt(dest, ch);
    838         ++dest;
    839       }
    840     } else {
    841       s.setCharAt(dest, ch);
    842       ++dest;
    843     }
    844 
    845     // Update state
    846     switch (state) {
    847       case OUTSIDE:
    848         state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
    849         break;
    850       case INSIDE_EMPTY:
    851       case INSIDE_FULL:
    852         state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
    853         break;
    854       default:
    855         break;
    856     }
    857   }
    858   s.truncate(dest);
    859 }
    860 
    861 // Checks to make sure that an "other" variant is present in all
    862 // powers of 10.
    863 static void checkForOtherVariants(CDFLocaleStyleData* result,
    864     UErrorCode& status) {
    865   if (result == NULL || result->unitsByVariant == NULL) {
    866     return;
    867   }
    868 
    869   const CDFUnit* otherByBase =
    870       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
    871   if (otherByBase == NULL) {
    872     status = U_INTERNAL_PROGRAM_ERROR;
    873     return;
    874   }
    875 
    876   // Check all other plural variants, and make sure that if
    877   // any of them are populated, then other is also populated
    878   int32_t pos = UHASH_FIRST;
    879   const UHashElement* element;
    880   while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) {
    881     CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer;
    882     if (variantsByBase == otherByBase) continue;
    883     for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) {
    884       if (variantsByBase[log10Value].isSet()
    885           && !otherByBase[log10Value].isSet()) {
    886         status = U_INTERNAL_PROGRAM_ERROR;
    887         return;
    888       }
    889     }
    890   }
    891 }
    892 
    893 // fillInMissing ensures that the data in result is complete.
    894 // result data is complete if for each variant in result, there exists
    895 // a prefix-suffix pair for each log10 value and there also exists
    896 // a divisor for each log10 value.
    897 //
    898 // First this function figures out for which log10 values, the other
    899 // variant already had data. These are the same log10 values defined
    900 // in CLDR.
    901 //
    902 // For each log10 value not defined in CLDR, it uses the divisor for
    903 // the last defined log10 value or 1.
    904 //
    905 // Then for each variant, it does the following. For each log10
    906 // value not defined in CLDR, copy the prefix-suffix pair from the
    907 // previous log10 value. If log10 value is defined in CLDR but is
    908 // missing from given variant, copy the prefix-suffix pair for that
    909 // log10 value from the 'other' variant.
    910 static void fillInMissing(CDFLocaleStyleData* result) {
    911   const CDFUnit* otherUnits =
    912       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
    913   UBool definedInCLDR[MAX_DIGITS];
    914   double lastDivisor = 1.0;
    915   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
    916     if (!otherUnits[i].isSet()) {
    917       result->divisors[i] = lastDivisor;
    918       definedInCLDR[i] = FALSE;
    919     } else {
    920       lastDivisor = result->divisors[i];
    921       definedInCLDR[i] = TRUE;
    922     }
    923   }
    924   // Iterate over each variant.
    925   int32_t pos = UHASH_FIRST;
    926   const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
    927   for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
    928     CDFUnit* units = (CDFUnit*) element->value.pointer;
    929     for (int32_t i = 0; i < MAX_DIGITS; ++i) {
    930       if (definedInCLDR[i]) {
    931         if (!units[i].isSet()) {
    932           units[i] = otherUnits[i];
    933         }
    934       } else {
    935         if (i == 0) {
    936           units[0].markAsSet();
    937         } else {
    938           units[i] = units[i - 1];
    939         }
    940       }
    941     }
    942   }
    943 }
    944 
    945 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
    946 // value computeLog10 will return MAX_DIGITS -1 even for
    947 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
    948 // up to MAX_DIGITS.
    949 static int32_t computeLog10(double x, UBool inRange) {
    950   int32_t result = 0;
    951   int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
    952   while (x >= 10.0) {
    953     x /= 10.0;
    954     ++result;
    955     if (result == max) {
    956       break;
    957     }
    958   }
    959   return result;
    960 }
    961 
    962 // createCDFUnit returns a pointer to the prefix-suffix pair for a given
    963 // variant and log10 value within table. If no such prefix-suffix pair is
    964 // stored in table, one is created within table before returning pointer.
    965 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
    966   if (U_FAILURE(status)) {
    967     return NULL;
    968   }
    969   CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
    970   if (cdfUnit == NULL) {
    971     cdfUnit = new CDFUnit[MAX_DIGITS];
    972     if (cdfUnit == NULL) {
    973       status = U_MEMORY_ALLOCATION_ERROR;
    974       return NULL;
    975     }
    976     uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
    977     if (U_FAILURE(status)) {
    978       return NULL;
    979     }
    980   }
    981   CDFUnit* result = &cdfUnit[log10Value];
    982   return result;
    983 }
    984 
    985 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
    986 // variant and log10 value within table. If the given variant doesn't exist, it
    987 // falls back to the OTHER variant. Therefore, this method will always return
    988 // some non-NULL value.
    989 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
    990   CharString cvariant;
    991   UErrorCode status = U_ZERO_ERROR;
    992   const CDFUnit *cdfUnit = NULL;
    993   cvariant.appendInvariantChars(variant, status);
    994   if (!U_FAILURE(status)) {
    995     cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
    996   }
    997   if (cdfUnit == NULL) {
    998     cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
    999   }
   1000   return &cdfUnit[log10Value];
   1001 }
   1002 
   1003 U_NAMESPACE_END
   1004 #endif
   1005