Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1997-2012, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 *
      7 * File COMPACTDECIMALFORMAT.CPP
      8 *
      9 ********************************************************************************
     10 */
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_FORMATTING
     14 
     15 #include "charstr.h"
     16 #include "cstring.h"
     17 #include "digitlst.h"
     18 #include "mutex.h"
     19 #include "unicode/compactdecimalformat.h"
     20 #include "unicode/numsys.h"
     21 #include "unicode/plurrule.h"
     22 #include "unicode/ures.h"
     23 #include "ucln_in.h"
     24 #include "uhash.h"
     25 #include "umutex.h"
     26 #include "unicode/ures.h"
     27 #include "uresimp.h"
     28 
     29 #define LENGTHOF(array) (int32_t)(sizeof(array) / sizeof((array)[0]))
     30 
     31 // Maps locale name to CDFLocaleData struct.
     32 static UHashtable* gCompactDecimalData = NULL;
     33 static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
     34 
     35 U_NAMESPACE_BEGIN
     36 
     37 static const int32_t MAX_DIGITS = 15;
     38 static const char gOther[] = "other";
     39 static const char gLatnTag[] = "latn";
     40 static const char gNumberElementsTag[] = "NumberElements";
     41 static const char gDecimalFormatTag[] = "decimalFormat";
     42 static const char gPatternsShort[] = "patternsShort";
     43 static const char gPatternsLong[] = "patternsLong";
     44 static const char gRoot[] = "root";
     45 
     46 static const UChar u_0 = 0x30;
     47 static const UChar u_apos = 0x27;
     48 
     49 static const UChar kZero[] = {u_0};
     50 
     51 // Used to unescape single quotes.
     52 enum QuoteState {
     53   OUTSIDE,
     54   INSIDE_EMPTY,
     55   INSIDE_FULL
     56 };
     57 
     58 enum FallbackFlags {
     59   ANY = 0,
     60   MUST = 1,
     61   NOT_ROOT = 2
     62   // Next one will be 4 then 6 etc.
     63 };
     64 
     65 
     66 // CDFUnit represents a prefix-suffix pair for a particular variant
     67 // and log10 value.
     68 struct CDFUnit : public UMemory {
     69   UnicodeString prefix;
     70   UnicodeString suffix;
     71   inline CDFUnit() : prefix(), suffix() {
     72     prefix.setToBogus();
     73   }
     74   inline ~CDFUnit() {}
     75   inline UBool isSet() const {
     76     return !prefix.isBogus();
     77   }
     78   inline void markAsSet() {
     79     prefix.remove();
     80   }
     81 };
     82 
     83 // CDFLocaleStyleData contains formatting data for a particular locale
     84 // and style.
     85 class CDFLocaleStyleData : public UMemory {
     86  public:
     87   // What to divide by for each log10 value when formatting. These values
     88   // will be powers of 10. For English, would be:
     89   // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
     90   double divisors[MAX_DIGITS];
     91   // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
     92   // To format a number x,
     93   // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
     94   // Compute the plural variant for displayNum
     95   // (e.g zero, one, two, few, many, other).
     96   // Compute cdfUnits = unitsByVariant[pluralVariant].
     97   // Prefix and suffix to use at cdfUnits[log10(x)]
     98   UHashtable* unitsByVariant;
     99   inline CDFLocaleStyleData() : unitsByVariant(NULL) {}
    100   ~CDFLocaleStyleData();
    101   // Init initializes this object.
    102   void Init(UErrorCode& status);
    103   inline UBool isBogus() const {
    104     return unitsByVariant == NULL;
    105   }
    106   void setToBogus();
    107  private:
    108   CDFLocaleStyleData(const CDFLocaleStyleData&);
    109   CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
    110 };
    111 
    112 // CDFLocaleData contains formatting data for a particular locale.
    113 struct CDFLocaleData : public UMemory {
    114   CDFLocaleStyleData shortData;
    115   CDFLocaleStyleData longData;
    116   inline CDFLocaleData() : shortData(), longData() { }
    117   inline ~CDFLocaleData() { }
    118   // Init initializes this object.
    119   void Init(UErrorCode& status);
    120 };
    121 
    122 U_NAMESPACE_END
    123 
    124 U_CDECL_BEGIN
    125 
    126 static UBool U_CALLCONV cdf_cleanup(void) {
    127   if (gCompactDecimalData != NULL) {
    128     uhash_close(gCompactDecimalData);
    129     gCompactDecimalData = NULL;
    130   }
    131   return TRUE;
    132 }
    133 
    134 static void U_CALLCONV deleteCDFUnits(void* ptr) {
    135   delete [] (icu::CDFUnit*) ptr;
    136 }
    137 
    138 static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
    139   delete (icu::CDFLocaleData*) ptr;
    140 }
    141 
    142 U_CDECL_END
    143 
    144 U_NAMESPACE_BEGIN
    145 
    146 static UBool divisors_equal(const double* lhs, const double* rhs);
    147 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
    148 
    149 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
    150 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
    151 static void initCDFLocaleData(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
    152 static UResourceBundle* tryGetDecimalFallback(const UResourceBundle* numberSystemResource, const char* style, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status);
    153 static UResourceBundle* tryGetByKeyWithFallback(const UResourceBundle* rb, const char* path, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status);
    154 static UBool isRoot(const UResourceBundle* rb, UErrorCode& status);
    155 static void initCDFLocaleStyleData(const UResourceBundle* decimalFormatBundle, CDFLocaleStyleData* result, UErrorCode& status);
    156 static void populatePower10(const UResourceBundle* power10Bundle, CDFLocaleStyleData* result, UErrorCode& status);
    157 static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UErrorCode& status);
    158 static UBool onlySpaces(UnicodeString u);
    159 static void fixQuotes(UnicodeString& s);
    160 static void fillInMissing(CDFLocaleStyleData* result);
    161 static int32_t computeLog10(double x, UBool inRange);
    162 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
    163 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
    164 
    165 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
    166 
    167 CompactDecimalFormat::CompactDecimalFormat(
    168     const DecimalFormat& decimalFormat,
    169     const UHashtable* unitsByVariant,
    170     const double* divisors,
    171     PluralRules* pluralRules)
    172   : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
    173 }
    174 
    175 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
    176     : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
    177 }
    178 
    179 CompactDecimalFormat* U_EXPORT2
    180 CompactDecimalFormat::createInstance(
    181     const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
    182   LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
    183   if (U_FAILURE(status)) {
    184     return NULL;
    185   }
    186   LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
    187   if (U_FAILURE(status)) {
    188     return NULL;
    189   }
    190   const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
    191   if (U_FAILURE(status)) {
    192     return NULL;
    193   }
    194   CompactDecimalFormat* result =
    195       new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
    196   if (result == NULL) {
    197     status = U_MEMORY_ALLOCATION_ERROR;
    198     return NULL;
    199   }
    200   pluralRules.orphan();
    201   result->setMaximumSignificantDigits(3);
    202   result->setSignificantDigitsUsed(TRUE);
    203   result->setGroupingUsed(FALSE);
    204   return result;
    205 }
    206 
    207 CompactDecimalFormat&
    208 CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
    209   if (this != &rhs) {
    210     DecimalFormat::operator=(rhs);
    211     _unitsByVariant = rhs._unitsByVariant;
    212     _divisors = rhs._divisors;
    213     delete _pluralRules;
    214     _pluralRules = rhs._pluralRules->clone();
    215   }
    216   return *this;
    217 }
    218 
    219 CompactDecimalFormat::~CompactDecimalFormat() {
    220   delete _pluralRules;
    221 }
    222 
    223 
    224 Format*
    225 CompactDecimalFormat::clone(void) const {
    226   return new CompactDecimalFormat(*this);
    227 }
    228 
    229 UBool
    230 CompactDecimalFormat::operator==(const Format& that) const {
    231   if (this == &that) {
    232     return TRUE;
    233   }
    234   return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
    235 }
    236 
    237 UBool
    238 CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
    239   return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
    240 }
    241 
    242 UnicodeString&
    243 CompactDecimalFormat::format(
    244     double number,
    245     UnicodeString& appendTo,
    246     FieldPosition& pos) const {
    247   DigitList orig, rounded;
    248   orig.set(number);
    249   UBool isNegative;
    250   UErrorCode status = U_ZERO_ERROR;
    251   _round(orig, rounded, isNegative, status);
    252   if (U_FAILURE(status)) {
    253     return appendTo;
    254   }
    255   double roundedDouble = rounded.getDouble();
    256   if (isNegative) {
    257     roundedDouble = -roundedDouble;
    258   }
    259   int32_t baseIdx = computeLog10(roundedDouble, TRUE);
    260   double numberToFormat = roundedDouble / _divisors[baseIdx];
    261   UnicodeString variant = _pluralRules->select(numberToFormat);
    262   if (isNegative) {
    263     numberToFormat = -numberToFormat;
    264   }
    265   const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
    266   appendTo += unit->prefix;
    267   DecimalFormat::format(numberToFormat, appendTo, pos);
    268   appendTo += unit->suffix;
    269   return appendTo;
    270 }
    271 
    272 UnicodeString&
    273 CompactDecimalFormat::format(
    274     double /* number */,
    275     UnicodeString& appendTo,
    276     FieldPositionIterator* /* posIter */,
    277     UErrorCode& status) const {
    278   status = U_UNSUPPORTED_ERROR;
    279   return appendTo;
    280 }
    281 
    282 UnicodeString&
    283 CompactDecimalFormat::format(
    284     int64_t number,
    285     UnicodeString& appendTo,
    286     FieldPosition& pos) const {
    287   return format((double) number, appendTo, pos);
    288 }
    289 
    290 UnicodeString&
    291 CompactDecimalFormat::format(
    292     int64_t /* number */,
    293     UnicodeString& appendTo,
    294     FieldPositionIterator* /* posIter */,
    295     UErrorCode& status) const {
    296   status = U_UNSUPPORTED_ERROR;
    297   return appendTo;
    298 }
    299 
    300 UnicodeString&
    301 CompactDecimalFormat::format(
    302     const StringPiece& /* number */,
    303     UnicodeString& appendTo,
    304     FieldPositionIterator* /* posIter */,
    305     UErrorCode& status) const {
    306   status = U_UNSUPPORTED_ERROR;
    307   return appendTo;
    308 }
    309 
    310 UnicodeString&
    311 CompactDecimalFormat::format(
    312     const DigitList& /* number */,
    313     UnicodeString& appendTo,
    314     FieldPositionIterator* /* posIter */,
    315     UErrorCode& status) const {
    316   status = U_UNSUPPORTED_ERROR;
    317   return appendTo;
    318 }
    319 
    320 UnicodeString&
    321 CompactDecimalFormat::format(const DigitList& /* number */,
    322                              UnicodeString& appendTo,
    323                              FieldPosition& /* pos */,
    324                              UErrorCode& status) const {
    325   status = U_UNSUPPORTED_ERROR;
    326   return appendTo;
    327 }
    328 
    329 void
    330 CompactDecimalFormat::parse(
    331     const UnicodeString& /* text */,
    332     Formattable& /* result */,
    333     ParsePosition& /* parsePosition */) const {
    334 }
    335 
    336 void
    337 CompactDecimalFormat::parse(
    338     const UnicodeString& /* text */,
    339     Formattable& /* result */,
    340     UErrorCode& status) const {
    341   status = U_UNSUPPORTED_ERROR;
    342 }
    343 
    344 CurrencyAmount*
    345 CompactDecimalFormat::parseCurrency(
    346     const UnicodeString& /* text */,
    347     ParsePosition& /* pos */) const {
    348   return NULL;
    349 }
    350 
    351 void CDFLocaleStyleData::Init(UErrorCode& status) {
    352   if (unitsByVariant != NULL) {
    353     return;
    354   }
    355   unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
    356   if (U_FAILURE(status)) {
    357     return;
    358   }
    359   uhash_setKeyDeleter(unitsByVariant, uprv_free);
    360   uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
    361 }
    362 
    363 CDFLocaleStyleData::~CDFLocaleStyleData() {
    364   setToBogus();
    365 }
    366 
    367 void CDFLocaleStyleData::setToBogus() {
    368   if (unitsByVariant != NULL) {
    369     uhash_close(unitsByVariant);
    370     unitsByVariant = NULL;
    371   }
    372 }
    373 
    374 void CDFLocaleData::Init(UErrorCode& status) {
    375   shortData.Init(status);
    376   if (U_FAILURE(status)) {
    377     return;
    378   }
    379   longData.Init(status);
    380 }
    381 
    382 // Helper method for operator=
    383 static UBool divisors_equal(const double* lhs, const double* rhs) {
    384   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
    385     if (lhs[i] != rhs[i]) {
    386       return FALSE;
    387     }
    388   }
    389   return TRUE;
    390 }
    391 
    392 // getCDFLocaleStyleData returns pointer to formatting data for given locale and
    393 // style within the global cache. On cache miss, getCDFLocaleStyleData loads
    394 // the data from CLDR into the global cache before returning the pointer. If a
    395 // UNUM_LONG data is requested for a locale, and that locale does not have
    396 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
    397 // that locale.
    398 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
    399   if (U_FAILURE(status)) {
    400     return NULL;
    401   }
    402   CDFLocaleData* result = NULL;
    403   const char* key = inLocale.getName();
    404   {
    405     Mutex lock(&gCompactDecimalMetaLock);
    406     if (gCompactDecimalData == NULL) {
    407       gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
    408       if (U_FAILURE(status)) {
    409         return NULL;
    410       }
    411       uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
    412       uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
    413       ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
    414     } else {
    415       result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
    416     }
    417   }
    418   if (result != NULL) {
    419     return extractDataByStyleEnum(*result, style, status);
    420   }
    421 
    422   result = loadCDFLocaleData(inLocale, status);
    423   if (U_FAILURE(status)) {
    424     return NULL;
    425   }
    426 
    427   {
    428     Mutex lock(&gCompactDecimalMetaLock);
    429     CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
    430     if (temp != NULL) {
    431       delete result;
    432       result = temp;
    433     } else {
    434       uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
    435       if (U_FAILURE(status)) {
    436         return NULL;
    437       }
    438     }
    439   }
    440   return extractDataByStyleEnum(*result, style, status);
    441 }
    442 
    443 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
    444   switch (style) {
    445     case UNUM_SHORT:
    446       return &data.shortData;
    447     case UNUM_LONG:
    448       if (!data.longData.isBogus()) {
    449         return &data.longData;
    450       }
    451       return &data.shortData;
    452     default:
    453       status = U_ILLEGAL_ARGUMENT_ERROR;
    454       return NULL;
    455   }
    456 }
    457 
    458 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
    459 // caller owns the returned pointer.
    460 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
    461   if (U_FAILURE(status)) {
    462     return NULL;
    463   }
    464   CDFLocaleData* result = new CDFLocaleData;
    465   if (result == NULL) {
    466     status = U_MEMORY_ALLOCATION_ERROR;
    467     return NULL;
    468   }
    469   result->Init(status);
    470   if (U_FAILURE(status)) {
    471     delete result;
    472     return NULL;
    473   }
    474 
    475   initCDFLocaleData(inLocale, result, status);
    476   if (U_FAILURE(status)) {
    477     delete result;
    478     return NULL;
    479   }
    480   return result;
    481 }
    482 
    483 // initCDFLocaleData initializes result with data from CLDR.
    484 // inLocale is the locale, the CLDR data is stored in result.
    485 // We load the UNUM_SHORT  and UNUM_LONG data looking first in local numbering
    486 // system and not including root locale in fallback. Next we try in the latn
    487 // numbering system where we fallback all the way to root. If we don't find
    488 // UNUM_SHORT data in these three places, we report an error. If we find
    489 // UNUM_SHORT data before finding UNUM_LONG data we make UNUM_LONG data fall
    490 // back to UNUM_SHORT data.
    491 static void initCDFLocaleData(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
    492   LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
    493   if (U_FAILURE(status)) {
    494     return;
    495   }
    496   const char* numberingSystemName = ns->getName();
    497   UResourceBundle* rb = ures_open(NULL, inLocale.getName(), &status);
    498   rb = ures_getByKeyWithFallback(rb, gNumberElementsTag, rb, &status);
    499   if (U_FAILURE(status)) {
    500     ures_close(rb);
    501     return;
    502   }
    503   UResourceBundle* shortDataFillIn = NULL;
    504   UResourceBundle* longDataFillIn = NULL;
    505   UResourceBundle* shortData = NULL;
    506   UResourceBundle* longData = NULL;
    507 
    508   if (uprv_strcmp(numberingSystemName, gLatnTag) != 0) {
    509     LocalUResourceBundlePointer localResource(
    510         tryGetByKeyWithFallback(rb, numberingSystemName, NULL, NOT_ROOT, status));
    511     shortData = tryGetDecimalFallback(
    512         localResource.getAlias(), gPatternsShort, &shortDataFillIn, NOT_ROOT, status);
    513     longData = tryGetDecimalFallback(
    514         localResource.getAlias(), gPatternsLong, &longDataFillIn, NOT_ROOT, status);
    515   }
    516   if (U_FAILURE(status)) {
    517     ures_close(shortDataFillIn);
    518     ures_close(longDataFillIn);
    519     ures_close(rb);
    520     return;
    521   }
    522 
    523   // If we haven't found UNUM_SHORT look in latn numbering system. We must
    524   // succeed at finding UNUM_SHORT here.
    525   if (shortData == NULL) {
    526     LocalUResourceBundlePointer latnResource(tryGetByKeyWithFallback(rb, gLatnTag, NULL, MUST, status));
    527     shortData = tryGetDecimalFallback(latnResource.getAlias(), gPatternsShort, &shortDataFillIn, MUST, status);
    528     if (longData == NULL) {
    529       longData = tryGetDecimalFallback(latnResource.getAlias(), gPatternsLong, &longDataFillIn, ANY, status);
    530       if (longData != NULL && isRoot(longData, status) && !isRoot(shortData, status)) {
    531         longData = NULL;
    532       }
    533     }
    534   }
    535   initCDFLocaleStyleData(shortData, &result->shortData, status);
    536   ures_close(shortDataFillIn);
    537   if (U_FAILURE(status)) {
    538     ures_close(longDataFillIn);
    539     ures_close(rb);
    540   }
    541 
    542   if (longData == NULL) {
    543     result->longData.setToBogus();
    544   } else {
    545     initCDFLocaleStyleData(longData, &result->longData, status);
    546   }
    547   ures_close(longDataFillIn);
    548   ures_close(rb);
    549 }
    550 
    551 /**
    552  * tryGetDecimalFallback attempts to fetch the "decimalFormat" resource bundle
    553  * with a particular style. style is either "patternsShort" or "patternsLong."
    554  * FillIn, flags, and status work in the same way as in tryGetByKeyWithFallback.
    555  */
    556 static UResourceBundle* tryGetDecimalFallback(const UResourceBundle* numberSystemResource, const char* style, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status) {
    557   UResourceBundle* first = tryGetByKeyWithFallback(numberSystemResource, style, fillIn, flags, status);
    558   UResourceBundle* second = tryGetByKeyWithFallback(first, gDecimalFormatTag, fillIn, flags, status);
    559   if (fillIn == NULL) {
    560     ures_close(first);
    561   }
    562   return second;
    563 }
    564 
    565 // tryGetByKeyWithFallback returns a sub-resource bundle that matches given
    566 // criteria or NULL if none found. rb is the resource bundle that we are
    567 // searching. If rb == NULL then this function behaves as if no sub-resource
    568 // is found; path is the key of the sub-resource,
    569 // (i.e "foo" but not "foo/bar"); If fillIn is NULL, caller must always call
    570 // ures_close() on returned resource. See below for example when fillIn is
    571 // not NULL. flags is ANY or NOT_ROOT. Optionally, these values
    572 // can be ored with MUST. MUST by itself is the same as ANY | MUST.
    573 // The locale of the returned sub-resource will either match the
    574 // flags or the returned sub-resouce will be NULL. If MUST is included in
    575 // flags, and not suitable sub-resource is found then in addition to returning
    576 // NULL, this function also sets status to U_MISSING_RESOURCE_ERROR. If MUST
    577 // is not included in flags, then this function just returns NULL if no
    578 // such sub-resource is found and will never set status to
    579 // U_MISSING_RESOURCE_ERROR.
    580 //
    581 // Example: This code first searches for "foo/bar" sub-resource without falling
    582 // back to ROOT. Then searches for "baz" sub-resource as last resort.
    583 //
    584 // UResourcebundle* fillIn = NULL;
    585 // UResourceBundle* data = tryGetByKeyWithFallback(rb, "foo", &fillIn, NON_ROOT, status);
    586 // data = tryGetByKeyWithFallback(data, "bar", &fillIn, NON_ROOT, status);
    587 // if (!data) {
    588 //   data = tryGetbyKeyWithFallback(rb, "baz", &fillIn, MUST,  status);
    589 // }
    590 // if (U_FAILURE(status)) {
    591 //   ures_close(fillIn);
    592 //   return;
    593 // }
    594 // doStuffWithNonNullSubresource(data);
    595 //
    596 // /* Wrong! don't do the following as it can leak memory if fillIn gets set
    597 // to NULL. */
    598 // fillIn = tryGetByKeyWithFallback(rb, "wrong", &fillIn, ANY, status);
    599 //
    600 // ures_close(fillIn);
    601 //
    602 static UResourceBundle* tryGetByKeyWithFallback(const UResourceBundle* rb, const char* path, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status) {
    603   if (U_FAILURE(status)) {
    604     return NULL;
    605   }
    606   UBool must = (flags & MUST);
    607   if (rb == NULL) {
    608     if (must) {
    609       status = U_MISSING_RESOURCE_ERROR;
    610     }
    611     return NULL;
    612   }
    613   UResourceBundle* result = NULL;
    614   UResourceBundle* ownedByUs = NULL;
    615   if (fillIn == NULL) {
    616     ownedByUs = ures_getByKeyWithFallback(rb, path, NULL, &status);
    617     result = ownedByUs;
    618   } else {
    619     *fillIn = ures_getByKeyWithFallback(rb, path, *fillIn, &status);
    620     result = *fillIn;
    621   }
    622   if (U_FAILURE(status)) {
    623     ures_close(ownedByUs);
    624     if (status == U_MISSING_RESOURCE_ERROR && !must) {
    625       status = U_ZERO_ERROR;
    626     }
    627     return NULL;
    628   }
    629   flags = (FallbackFlags) (flags & ~MUST);
    630   switch (flags) {
    631     case NOT_ROOT:
    632       {
    633         UBool bRoot = isRoot(result, status);
    634         if (bRoot || U_FAILURE(status)) {
    635           ures_close(ownedByUs);
    636           if (must && (status == U_ZERO_ERROR)) {
    637             status = U_MISSING_RESOURCE_ERROR;
    638           }
    639           return NULL;
    640         }
    641         return result;
    642       }
    643     case ANY:
    644       return result;
    645     default:
    646       ures_close(ownedByUs);
    647       status = U_ILLEGAL_ARGUMENT_ERROR;
    648       return NULL;
    649   }
    650 }
    651 
    652 static UBool isRoot(const UResourceBundle* rb, UErrorCode& status) {
    653   const char* actualLocale = ures_getLocaleByType(
    654       rb, ULOC_ACTUAL_LOCALE, &status);
    655   if (U_FAILURE(status)) {
    656     return FALSE;
    657   }
    658   return uprv_strcmp(actualLocale, gRoot) == 0;
    659 }
    660 
    661 
    662 // initCDFLocaleStyleData loads formatting data for a particular style.
    663 // decimalFormatBundle is the "decimalFormat" resource bundle in CLDR.
    664 // Loaded data stored in result.
    665 static void initCDFLocaleStyleData(const UResourceBundle* decimalFormatBundle, CDFLocaleStyleData* result, UErrorCode& status) {
    666   if (U_FAILURE(status)) {
    667     return;
    668   }
    669   // Iterate through all the powers of 10.
    670   int32_t size = ures_getSize(decimalFormatBundle);
    671   UResourceBundle* power10 = NULL;
    672   for (int32_t i = 0; i < size; ++i) {
    673     power10 = ures_getByIndex(decimalFormatBundle, i, power10, &status);
    674     if (U_FAILURE(status)) {
    675       ures_close(power10);
    676       return;
    677     }
    678     populatePower10(power10, result, status);
    679     if (U_FAILURE(status)) {
    680       ures_close(power10);
    681       return;
    682     }
    683   }
    684   ures_close(power10);
    685   fillInMissing(result);
    686 }
    687 
    688 // populatePower10 grabs data for a particular power of 10 from CLDR.
    689 // The loaded data is stored in result.
    690 static void populatePower10(const UResourceBundle* power10Bundle, CDFLocaleStyleData* result, UErrorCode& status) {
    691   if (U_FAILURE(status)) {
    692     return;
    693   }
    694   char* endPtr = NULL;
    695   double power10 = uprv_strtod(ures_getKey(power10Bundle), &endPtr);
    696   if (*endPtr != 0) {
    697     status = U_INTERNAL_PROGRAM_ERROR;
    698     return;
    699   }
    700   int32_t log10Value = computeLog10(power10, FALSE);
    701   // Silently ignore divisors that are too big.
    702   if (log10Value == MAX_DIGITS) {
    703     return;
    704   }
    705   int32_t size = ures_getSize(power10Bundle);
    706   int32_t numZeros = 0;
    707   UBool otherVariantDefined = FALSE;
    708   UResourceBundle* variantBundle = NULL;
    709   // Iterate over all the plural variants for the power of 10
    710   for (int32_t i = 0; i < size; ++i) {
    711     variantBundle = ures_getByIndex(power10Bundle, i, variantBundle, &status);
    712     if (U_FAILURE(status)) {
    713       ures_close(variantBundle);
    714       return;
    715     }
    716     const char* variant = ures_getKey(variantBundle);
    717     int32_t resLen;
    718     const UChar* formatStrP = ures_getString(variantBundle, &resLen, &status);
    719     if (U_FAILURE(status)) {
    720       ures_close(variantBundle);
    721       return;
    722     }
    723     UnicodeString formatStr(false, formatStrP, resLen);
    724     if (uprv_strcmp(variant, gOther) == 0) {
    725       otherVariantDefined = TRUE;
    726     }
    727     int32_t nz = populatePrefixSuffix(
    728         variant, log10Value, formatStr, result->unitsByVariant, status);
    729     if (U_FAILURE(status)) {
    730       ures_close(variantBundle);
    731       return;
    732     }
    733     if (nz != numZeros) {
    734       // We expect all format strings to have the same number of 0's
    735       // left of the decimal point.
    736       if (numZeros != 0) {
    737         status = U_INTERNAL_PROGRAM_ERROR;
    738         ures_close(variantBundle);
    739         return;
    740       }
    741       numZeros = nz;
    742     }
    743   }
    744   ures_close(variantBundle);
    745   // We expect to find an OTHER variant for each power of 10.
    746   if (!otherVariantDefined) {
    747     status = U_INTERNAL_PROGRAM_ERROR;
    748     return;
    749   }
    750   double divisor = power10;
    751   for (int32_t i = 1; i < numZeros; ++i) {
    752     divisor /= 10.0;
    753   }
    754   result->divisors[log10Value] = divisor;
    755 }
    756 
    757 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
    758 // given variant and log10 value.
    759 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
    760 // formatStr is the format string from which the prefix and suffix are
    761 // extracted. It is usually of form 'Pefix 000 suffix'.
    762 // populatePrefixSuffix returns the number of 0's found in formatStr
    763 // before the decimal point.
    764 // In the special case that formatStr contains only spaces for prefix
    765 // and suffix, populatePrefixSuffix returns log10Value + 1.
    766 static int32_t populatePrefixSuffix(
    767     const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UErrorCode& status) {
    768   if (U_FAILURE(status)) {
    769     return 0;
    770   }
    771   int32_t firstIdx = formatStr.indexOf(kZero, LENGTHOF(kZero), 0);
    772   // We must have 0's in format string.
    773   if (firstIdx == -1) {
    774     status = U_INTERNAL_PROGRAM_ERROR;
    775     return 0;
    776   }
    777   int32_t lastIdx = formatStr.lastIndexOf(kZero, LENGTHOF(kZero), firstIdx);
    778   CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
    779   if (U_FAILURE(status)) {
    780     return 0;
    781   }
    782   // Everything up to first 0 is the prefix
    783   unit->prefix = formatStr.tempSubString(0, firstIdx);
    784   fixQuotes(unit->prefix);
    785   // Everything beyond the last 0 is the suffix
    786   unit->suffix = formatStr.tempSubString(lastIdx + 1);
    787   fixQuotes(unit->suffix);
    788 
    789   // If there is effectively no prefix or suffix, ignore the actual number of
    790   // 0's and act as if the number of 0's matches the size of the number.
    791   if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
    792     return log10Value + 1;
    793   }
    794 
    795   // Calculate number of zeros before decimal point
    796   int32_t idx = firstIdx + 1;
    797   while (idx <= lastIdx && formatStr.charAt(idx) == u_0) {
    798     ++idx;
    799   }
    800   return (idx - firstIdx);
    801 }
    802 
    803 static UBool onlySpaces(UnicodeString u) {
    804   return u.trim().length() == 0;
    805 }
    806 
    807 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
    808 // Modifies s in place.
    809 static void fixQuotes(UnicodeString& s) {
    810   QuoteState state = OUTSIDE;
    811   int32_t len = s.length();
    812   int32_t dest = 0;
    813   for (int32_t i = 0; i < len; ++i) {
    814     UChar ch = s.charAt(i);
    815     if (ch == u_apos) {
    816       if (state == INSIDE_EMPTY) {
    817         s.setCharAt(dest, ch);
    818         ++dest;
    819       }
    820     } else {
    821       s.setCharAt(dest, ch);
    822       ++dest;
    823     }
    824 
    825     // Update state
    826     switch (state) {
    827       case OUTSIDE:
    828         state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
    829         break;
    830       case INSIDE_EMPTY:
    831       case INSIDE_FULL:
    832         state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
    833         break;
    834       default:
    835         break;
    836     }
    837   }
    838   s.truncate(dest);
    839 }
    840 
    841 // fillInMissing ensures that the data in result is complete.
    842 // result data is complete if for each variant in result, there exists
    843 // a prefix-suffix pair for each log10 value and there also exists
    844 // a divisor for each log10 value.
    845 //
    846 // First this function figures out for which log10 values, the other
    847 // variant already had data. These are the same log10 values defined
    848 // in CLDR.
    849 //
    850 // For each log10 value not defined in CLDR, it uses the divisor for
    851 // the last defined log10 value or 1.
    852 //
    853 // Then for each variant, it does the following. For each log10
    854 // value not defined in CLDR, copy the prefix-suffix pair from the
    855 // previous log10 value. If log10 value is defined in CLDR but is
    856 // missing from given variant, copy the prefix-suffix pair for that
    857 // log10 value from the 'other' variant.
    858 static void fillInMissing(CDFLocaleStyleData* result) {
    859   const CDFUnit* otherUnits =
    860       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
    861   UBool definedInCLDR[MAX_DIGITS];
    862   double lastDivisor = 1.0;
    863   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
    864     if (!otherUnits[i].isSet()) {
    865       result->divisors[i] = lastDivisor;
    866       definedInCLDR[i] = FALSE;
    867     } else {
    868       lastDivisor = result->divisors[i];
    869       definedInCLDR[i] = TRUE;
    870     }
    871   }
    872   // Iterate over each variant.
    873   int32_t pos = -1;
    874   const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
    875   for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
    876     CDFUnit* units = (CDFUnit*) element->value.pointer;
    877     for (int32_t i = 0; i < MAX_DIGITS; ++i) {
    878       if (definedInCLDR[i]) {
    879         if (!units[i].isSet()) {
    880           units[i] = otherUnits[i];
    881         }
    882       } else {
    883         if (i == 0) {
    884           units[0].markAsSet();
    885         } else {
    886           units[i] = units[i - 1];
    887         }
    888       }
    889     }
    890   }
    891 }
    892 
    893 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
    894 // value computeLog10 will return MAX_DIGITS -1 even for
    895 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
    896 // up to MAX_DIGITS.
    897 static int32_t computeLog10(double x, UBool inRange) {
    898   int32_t result = 0;
    899   int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
    900   while (x >= 10.0) {
    901     x /= 10.0;
    902     ++result;
    903     if (result == max) {
    904       break;
    905     }
    906   }
    907   return result;
    908 }
    909 
    910 // createCDFUnit returns a pointer to the prefix-suffix pair for a given
    911 // variant and log10 value within table. If no such prefix-suffix pair is
    912 // stored in table, one is created within table before returning pointer.
    913 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
    914   if (U_FAILURE(status)) {
    915     return NULL;
    916   }
    917   CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
    918   if (cdfUnit == NULL) {
    919     cdfUnit = new CDFUnit[MAX_DIGITS];
    920     if (cdfUnit == NULL) {
    921       status = U_MEMORY_ALLOCATION_ERROR;
    922       return NULL;
    923     }
    924     uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
    925     if (U_FAILURE(status)) {
    926       return NULL;
    927     }
    928   }
    929   CDFUnit* result = &cdfUnit[log10Value];
    930   result->markAsSet();
    931   return result;
    932 }
    933 
    934 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
    935 // variant and log10 value within table. If the given variant doesn't exist, it
    936 // falls back to the OTHER variant. Therefore, this method will always return
    937 // some non-NULL value.
    938 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
    939   CharString cvariant;
    940   UErrorCode status = U_ZERO_ERROR;
    941   const CDFUnit *cdfUnit = NULL;
    942   cvariant.appendInvariantChars(variant, status);
    943   if (!U_FAILURE(status)) {
    944     cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
    945   }
    946   if (cdfUnit == NULL) {
    947     cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
    948   }
    949   return &cdfUnit[log10Value];
    950 }
    951 
    952 U_NAMESPACE_END
    953 #endif
    954