1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2015, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 * 9 * File COMPACTDECIMALFORMAT.CPP 10 * 11 ******************************************************************************** 12 */ 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_FORMATTING 16 17 #include "charstr.h" 18 #include "cstring.h" 19 #include "digitlst.h" 20 #include "mutex.h" 21 #include "unicode/compactdecimalformat.h" 22 #include "unicode/numsys.h" 23 #include "unicode/plurrule.h" 24 #include "unicode/ures.h" 25 #include "ucln_in.h" 26 #include "uhash.h" 27 #include "umutex.h" 28 #include "unicode/ures.h" 29 #include "uresimp.h" 30 31 // Maps locale name to CDFLocaleData struct. 32 static UHashtable* gCompactDecimalData = NULL; 33 static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER; 34 35 U_NAMESPACE_BEGIN 36 37 static const int32_t MAX_DIGITS = 15; 38 static const char gOther[] = "other"; 39 static const char gLatnTag[] = "latn"; 40 static const char gNumberElementsTag[] = "NumberElements"; 41 static const char gDecimalFormatTag[] = "decimalFormat"; 42 static const char gPatternsShort[] = "patternsShort"; 43 static const char gPatternsLong[] = "patternsLong"; 44 static const char gLatnPath[] = "NumberElements/latn"; 45 46 static const UChar u_0 = 0x30; 47 static const UChar u_apos = 0x27; 48 49 static const UChar kZero[] = {u_0}; 50 51 // Used to unescape single quotes. 52 enum QuoteState { 53 OUTSIDE, 54 INSIDE_EMPTY, 55 INSIDE_FULL 56 }; 57 58 enum FallbackFlags { 59 ANY = 0, 60 MUST = 1, 61 NOT_ROOT = 2 62 // Next one will be 4 then 6 etc. 63 }; 64 65 66 // CDFUnit represents a prefix-suffix pair for a particular variant 67 // and log10 value. 68 struct CDFUnit : public UMemory { 69 UnicodeString prefix; 70 UnicodeString suffix; 71 inline CDFUnit() : prefix(), suffix() { 72 prefix.setToBogus(); 73 } 74 inline ~CDFUnit() {} 75 inline UBool isSet() const { 76 return !prefix.isBogus(); 77 } 78 inline void markAsSet() { 79 prefix.remove(); 80 } 81 }; 82 83 // CDFLocaleStyleData contains formatting data for a particular locale 84 // and style. 85 class CDFLocaleStyleData : public UMemory { 86 public: 87 // What to divide by for each log10 value when formatting. These values 88 // will be powers of 10. For English, would be: 89 // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ... 90 double divisors[MAX_DIGITS]; 91 // Maps plural variants to CDFUnit[MAX_DIGITS] arrays. 92 // To format a number x, 93 // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]). 94 // Compute the plural variant for displayNum 95 // (e.g zero, one, two, few, many, other). 96 // Compute cdfUnits = unitsByVariant[pluralVariant]. 97 // Prefix and suffix to use at cdfUnits[log10(x)] 98 UHashtable* unitsByVariant; 99 // A flag for whether or not this CDFLocaleStyleData was loaded from the 100 // Latin numbering system as a fallback from the locale numbering system. 101 // This value is meaningless if the object is bogus or empty. 102 UBool fromFallback; 103 inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) { 104 uprv_memset(divisors, 0, sizeof(divisors)); 105 } 106 ~CDFLocaleStyleData(); 107 // Init initializes this object. 108 void Init(UErrorCode& status); 109 inline UBool isBogus() const { 110 return unitsByVariant == NULL; 111 } 112 void setToBogus(); 113 UBool isEmpty() { 114 return unitsByVariant == NULL || unitsByVariant->count == 0; 115 } 116 private: 117 CDFLocaleStyleData(const CDFLocaleStyleData&); 118 CDFLocaleStyleData& operator=(const CDFLocaleStyleData&); 119 }; 120 121 // CDFLocaleData contains formatting data for a particular locale. 122 struct CDFLocaleData : public UMemory { 123 CDFLocaleStyleData shortData; 124 CDFLocaleStyleData longData; 125 inline CDFLocaleData() : shortData(), longData() { } 126 inline ~CDFLocaleData() { } 127 // Init initializes this object. 128 void Init(UErrorCode& status); 129 }; 130 131 U_NAMESPACE_END 132 133 U_CDECL_BEGIN 134 135 static UBool U_CALLCONV cdf_cleanup(void) { 136 if (gCompactDecimalData != NULL) { 137 uhash_close(gCompactDecimalData); 138 gCompactDecimalData = NULL; 139 } 140 return TRUE; 141 } 142 143 static void U_CALLCONV deleteCDFUnits(void* ptr) { 144 delete [] (icu::CDFUnit*) ptr; 145 } 146 147 static void U_CALLCONV deleteCDFLocaleData(void* ptr) { 148 delete (icu::CDFLocaleData*) ptr; 149 } 150 151 U_CDECL_END 152 153 U_NAMESPACE_BEGIN 154 155 static UBool divisors_equal(const double* lhs, const double* rhs); 156 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status); 157 158 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status); 159 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status); 160 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status); 161 static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status); 162 static double calculateDivisor(double power10, int32_t numZeros); 163 static UBool onlySpaces(UnicodeString u); 164 static void fixQuotes(UnicodeString& s); 165 static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status); 166 static void fillInMissing(CDFLocaleStyleData* result); 167 static int32_t computeLog10(double x, UBool inRange); 168 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status); 169 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value); 170 171 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat) 172 173 CompactDecimalFormat::CompactDecimalFormat( 174 const DecimalFormat& decimalFormat, 175 const UHashtable* unitsByVariant, 176 const double* divisors, 177 PluralRules* pluralRules) 178 : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) { 179 } 180 181 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source) 182 : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) { 183 } 184 185 CompactDecimalFormat* U_EXPORT2 186 CompactDecimalFormat::createInstance( 187 const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) { 188 LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status)); 189 if (U_FAILURE(status)) { 190 return NULL; 191 } 192 LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status)); 193 if (U_FAILURE(status)) { 194 return NULL; 195 } 196 const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status); 197 if (U_FAILURE(status)) { 198 return NULL; 199 } 200 CompactDecimalFormat* result = 201 new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias()); 202 if (result == NULL) { 203 status = U_MEMORY_ALLOCATION_ERROR; 204 return NULL; 205 } 206 pluralRules.orphan(); 207 result->setMaximumSignificantDigits(3); 208 result->setSignificantDigitsUsed(TRUE); 209 result->setGroupingUsed(FALSE); 210 return result; 211 } 212 213 CompactDecimalFormat& 214 CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) { 215 if (this != &rhs) { 216 DecimalFormat::operator=(rhs); 217 _unitsByVariant = rhs._unitsByVariant; 218 _divisors = rhs._divisors; 219 delete _pluralRules; 220 _pluralRules = rhs._pluralRules->clone(); 221 } 222 return *this; 223 } 224 225 CompactDecimalFormat::~CompactDecimalFormat() { 226 delete _pluralRules; 227 } 228 229 230 Format* 231 CompactDecimalFormat::clone(void) const { 232 return new CompactDecimalFormat(*this); 233 } 234 235 UBool 236 CompactDecimalFormat::operator==(const Format& that) const { 237 if (this == &that) { 238 return TRUE; 239 } 240 return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that)); 241 } 242 243 UBool 244 CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const { 245 return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules); 246 } 247 248 UnicodeString& 249 CompactDecimalFormat::format( 250 double number, 251 UnicodeString& appendTo, 252 FieldPosition& pos) const { 253 UErrorCode status = U_ZERO_ERROR; 254 return format(number, appendTo, pos, status); 255 } 256 257 UnicodeString& 258 CompactDecimalFormat::format( 259 double number, 260 UnicodeString& appendTo, 261 FieldPosition& pos, 262 UErrorCode &status) const { 263 if (U_FAILURE(status)) { 264 return appendTo; 265 } 266 DigitList orig, rounded; 267 orig.set(number); 268 UBool isNegative; 269 _round(orig, rounded, isNegative, status); 270 if (U_FAILURE(status)) { 271 return appendTo; 272 } 273 double roundedDouble = rounded.getDouble(); 274 if (isNegative) { 275 roundedDouble = -roundedDouble; 276 } 277 int32_t baseIdx = computeLog10(roundedDouble, TRUE); 278 double numberToFormat = roundedDouble / _divisors[baseIdx]; 279 UnicodeString variant = _pluralRules->select(numberToFormat); 280 if (isNegative) { 281 numberToFormat = -numberToFormat; 282 } 283 const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx); 284 appendTo += unit->prefix; 285 DecimalFormat::format(numberToFormat, appendTo, pos); 286 appendTo += unit->suffix; 287 return appendTo; 288 } 289 290 UnicodeString& 291 CompactDecimalFormat::format( 292 double /* number */, 293 UnicodeString& appendTo, 294 FieldPositionIterator* /* posIter */, 295 UErrorCode& status) const { 296 status = U_UNSUPPORTED_ERROR; 297 return appendTo; 298 } 299 300 UnicodeString& 301 CompactDecimalFormat::format( 302 int32_t number, 303 UnicodeString& appendTo, 304 FieldPosition& pos) const { 305 return format((double) number, appendTo, pos); 306 } 307 308 UnicodeString& 309 CompactDecimalFormat::format( 310 int32_t number, 311 UnicodeString& appendTo, 312 FieldPosition& pos, 313 UErrorCode &status) const { 314 return format((double) number, appendTo, pos, status); 315 } 316 317 UnicodeString& 318 CompactDecimalFormat::format( 319 int32_t /* number */, 320 UnicodeString& appendTo, 321 FieldPositionIterator* /* posIter */, 322 UErrorCode& status) const { 323 status = U_UNSUPPORTED_ERROR; 324 return appendTo; 325 } 326 327 UnicodeString& 328 CompactDecimalFormat::format( 329 int64_t number, 330 UnicodeString& appendTo, 331 FieldPosition& pos) const { 332 return format((double) number, appendTo, pos); 333 } 334 335 UnicodeString& 336 CompactDecimalFormat::format( 337 int64_t number, 338 UnicodeString& appendTo, 339 FieldPosition& pos, 340 UErrorCode &status) const { 341 return format((double) number, appendTo, pos, status); 342 } 343 344 UnicodeString& 345 CompactDecimalFormat::format( 346 int64_t /* number */, 347 UnicodeString& appendTo, 348 FieldPositionIterator* /* posIter */, 349 UErrorCode& status) const { 350 status = U_UNSUPPORTED_ERROR; 351 return appendTo; 352 } 353 354 UnicodeString& 355 CompactDecimalFormat::format( 356 StringPiece /* number */, 357 UnicodeString& appendTo, 358 FieldPositionIterator* /* posIter */, 359 UErrorCode& status) const { 360 status = U_UNSUPPORTED_ERROR; 361 return appendTo; 362 } 363 364 UnicodeString& 365 CompactDecimalFormat::format( 366 const DigitList& /* number */, 367 UnicodeString& appendTo, 368 FieldPositionIterator* /* posIter */, 369 UErrorCode& status) const { 370 status = U_UNSUPPORTED_ERROR; 371 return appendTo; 372 } 373 374 UnicodeString& 375 CompactDecimalFormat::format(const DigitList& /* number */, 376 UnicodeString& appendTo, 377 FieldPosition& /* pos */, 378 UErrorCode& status) const { 379 status = U_UNSUPPORTED_ERROR; 380 return appendTo; 381 } 382 383 void 384 CompactDecimalFormat::parse( 385 const UnicodeString& /* text */, 386 Formattable& /* result */, 387 ParsePosition& /* parsePosition */) const { 388 } 389 390 void 391 CompactDecimalFormat::parse( 392 const UnicodeString& /* text */, 393 Formattable& /* result */, 394 UErrorCode& status) const { 395 status = U_UNSUPPORTED_ERROR; 396 } 397 398 CurrencyAmount* 399 CompactDecimalFormat::parseCurrency( 400 const UnicodeString& /* text */, 401 ParsePosition& /* pos */) const { 402 return NULL; 403 } 404 405 void CDFLocaleStyleData::Init(UErrorCode& status) { 406 if (unitsByVariant != NULL) { 407 return; 408 } 409 unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); 410 if (U_FAILURE(status)) { 411 return; 412 } 413 uhash_setKeyDeleter(unitsByVariant, uprv_free); 414 uhash_setValueDeleter(unitsByVariant, deleteCDFUnits); 415 } 416 417 CDFLocaleStyleData::~CDFLocaleStyleData() { 418 setToBogus(); 419 } 420 421 void CDFLocaleStyleData::setToBogus() { 422 if (unitsByVariant != NULL) { 423 uhash_close(unitsByVariant); 424 unitsByVariant = NULL; 425 } 426 } 427 428 void CDFLocaleData::Init(UErrorCode& status) { 429 shortData.Init(status); 430 if (U_FAILURE(status)) { 431 return; 432 } 433 longData.Init(status); 434 } 435 436 // Helper method for operator= 437 static UBool divisors_equal(const double* lhs, const double* rhs) { 438 for (int32_t i = 0; i < MAX_DIGITS; ++i) { 439 if (lhs[i] != rhs[i]) { 440 return FALSE; 441 } 442 } 443 return TRUE; 444 } 445 446 // getCDFLocaleStyleData returns pointer to formatting data for given locale and 447 // style within the global cache. On cache miss, getCDFLocaleStyleData loads 448 // the data from CLDR into the global cache before returning the pointer. If a 449 // UNUM_LONG data is requested for a locale, and that locale does not have 450 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for 451 // that locale. 452 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) { 453 if (U_FAILURE(status)) { 454 return NULL; 455 } 456 CDFLocaleData* result = NULL; 457 const char* key = inLocale.getName(); 458 { 459 Mutex lock(&gCompactDecimalMetaLock); 460 if (gCompactDecimalData == NULL) { 461 gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); 462 if (U_FAILURE(status)) { 463 return NULL; 464 } 465 uhash_setKeyDeleter(gCompactDecimalData, uprv_free); 466 uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData); 467 ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup); 468 } else { 469 result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key); 470 } 471 } 472 if (result != NULL) { 473 return extractDataByStyleEnum(*result, style, status); 474 } 475 476 result = loadCDFLocaleData(inLocale, status); 477 if (U_FAILURE(status)) { 478 return NULL; 479 } 480 481 { 482 Mutex lock(&gCompactDecimalMetaLock); 483 CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key); 484 if (temp != NULL) { 485 delete result; 486 result = temp; 487 } else { 488 uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status); 489 if (U_FAILURE(status)) { 490 return NULL; 491 } 492 } 493 } 494 return extractDataByStyleEnum(*result, style, status); 495 } 496 497 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) { 498 switch (style) { 499 case UNUM_SHORT: 500 return &data.shortData; 501 case UNUM_LONG: 502 if (!data.longData.isBogus()) { 503 return &data.longData; 504 } 505 return &data.shortData; 506 default: 507 status = U_ILLEGAL_ARGUMENT_ERROR; 508 return NULL; 509 } 510 } 511 512 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The 513 // caller owns the returned pointer. 514 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) { 515 if (U_FAILURE(status)) { 516 return NULL; 517 } 518 CDFLocaleData* result = new CDFLocaleData; 519 if (result == NULL) { 520 status = U_MEMORY_ALLOCATION_ERROR; 521 return NULL; 522 } 523 result->Init(status); 524 if (U_FAILURE(status)) { 525 delete result; 526 return NULL; 527 } 528 529 load(inLocale, result, status); 530 531 if (U_FAILURE(status)) { 532 delete result; 533 return NULL; 534 } 535 return result; 536 } 537 538 namespace { 539 540 struct CmptDecDataSink : public ResourceSink { 541 542 CDFLocaleData& dataBundle; // Where to save values when they are read 543 UBool isLatin; // Whether or not we are traversing the Latin tree 544 UBool isFallback; // Whether or not we are traversing the Latin tree as fallback 545 546 enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG }; 547 enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT }; 548 549 /* 550 * NumberElements{ <-- top (numbering system table) 551 * latn{ <-- patternsTable (one per numbering system) 552 * patternsLong{ <-- formatsTable (one per pattern) 553 * decimalFormat{ <-- powersOfTenTable (one per format) 554 * 1000{ <-- pluralVariantsTable (one per power of ten) 555 * one{"0 thousand"} <-- plural variant and template 556 */ 557 558 CmptDecDataSink(CDFLocaleData& _dataBundle) 559 : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {} 560 virtual ~CmptDecDataSink(); 561 562 virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) { 563 // SPECIAL CASE: Don't consume root in the non-Latin numbering system 564 if (isRoot && !isLatin) { return; } 565 566 ResourceTable patternsTable = value.getTable(errorCode); 567 if (U_FAILURE(errorCode)) { return; } 568 for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) { 569 570 // Check for patternsShort or patternsLong 571 EPatternsTableKey patternsTableKey; 572 if (uprv_strcmp(key, gPatternsShort) == 0) { 573 patternsTableKey = PATTERNS_SHORT; 574 } else if (uprv_strcmp(key, gPatternsLong) == 0) { 575 patternsTableKey = PATTERNS_LONG; 576 } else { 577 continue; 578 } 579 580 // Traverse into the formats table 581 ResourceTable formatsTable = value.getTable(errorCode); 582 if (U_FAILURE(errorCode)) { return; } 583 for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) { 584 585 // Check for decimalFormat or currencyFormat 586 EFormatsTableKey formatsTableKey; 587 if (uprv_strcmp(key, gDecimalFormatTag) == 0) { 588 formatsTableKey = DECIMAL_FORMAT; 589 // TODO: Enable this statement when currency support is added 590 // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) { 591 // formatsTableKey = CURRENCY_FORMAT; 592 } else { 593 continue; 594 } 595 596 // Set the current style and destination based on the two keys 597 UNumberCompactStyle style; 598 CDFLocaleStyleData* destination = NULL; 599 if (patternsTableKey == PATTERNS_LONG 600 && formatsTableKey == DECIMAL_FORMAT) { 601 style = UNUM_LONG; 602 destination = &dataBundle.longData; 603 } else if (patternsTableKey == PATTERNS_SHORT 604 && formatsTableKey == DECIMAL_FORMAT) { 605 style = UNUM_SHORT; 606 destination = &dataBundle.shortData; 607 // TODO: Enable the following statements when currency support is added 608 // } else if (patternsTableKey == PATTERNS_SHORT 609 // && formatsTableKey == CURRENCY_FORMAT) { 610 // style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named 611 // destination = &dataBundle.shortCurrencyData; 612 // } else { 613 // // Silently ignore this case 614 // continue; 615 } 616 617 // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE: 618 // 1) Don't consume longData if shortData was consumed from the non-Latin 619 // locale numbering system 620 // 2) Don't consume longData for the first time if this is the root bundle and 621 // shortData is already populated from a more specific locale. Note that if 622 // both longData and shortData are both only in root, longData will be 623 // consumed since it is alphabetically before shortData in the bundle. 624 if (isFallback 625 && style == UNUM_LONG 626 && !dataBundle.shortData.isEmpty() 627 && !dataBundle.shortData.fromFallback) { 628 continue; 629 } 630 if (isRoot 631 && style == UNUM_LONG 632 && dataBundle.longData.isEmpty() 633 && !dataBundle.shortData.isEmpty()) { 634 continue; 635 } 636 637 // Set the "fromFallback" flag on the data object 638 destination->fromFallback = isFallback; 639 640 // Traverse into the powers of ten table 641 ResourceTable powersOfTenTable = value.getTable(errorCode); 642 if (U_FAILURE(errorCode)) { return; } 643 for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) { 644 645 // The key will always be some even power of 10. e.g 10000. 646 char* endPtr = NULL; 647 double power10 = uprv_strtod(key, &endPtr); 648 if (*endPtr != 0) { 649 errorCode = U_INTERNAL_PROGRAM_ERROR; 650 return; 651 } 652 int32_t log10Value = computeLog10(power10, FALSE); 653 654 // Silently ignore divisors that are too big. 655 if (log10Value >= MAX_DIGITS) continue; 656 657 // Iterate over the plural variants ("one", "other", etc) 658 ResourceTable pluralVariantsTable = value.getTable(errorCode); 659 if (U_FAILURE(errorCode)) { return; } 660 for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { 661 const char* pluralVariant = key; 662 const UnicodeString formatStr = value.getUnicodeString(errorCode); 663 664 // Copy the data into the in-memory data bundle (do not overwrite 665 // existing values) 666 int32_t numZeros = populatePrefixSuffix( 667 pluralVariant, log10Value, formatStr, 668 destination->unitsByVariant, FALSE, errorCode); 669 670 // If populatePrefixSuffix returns -1, it means that this key has been 671 // encountered already. 672 if (numZeros < 0) { 673 continue; 674 } 675 676 // Set the divisor, which is based on the number of zeros in the template 677 // string. If the divisor from here is different from the one previously 678 // stored, it means that the number of zeros in different plural variants 679 // differs; throw an exception. 680 // TODO: How should I check for floating-point errors here? 681 // Is there a good reason why "divisor" is double and not long like Java? 682 double divisor = calculateDivisor(power10, numZeros); 683 if (destination->divisors[log10Value] != 0.0 684 && destination->divisors[log10Value] != divisor) { 685 errorCode = U_INTERNAL_PROGRAM_ERROR; 686 return; 687 } 688 destination->divisors[log10Value] = divisor; 689 } 690 } 691 } 692 } 693 } 694 }; 695 696 // Virtual destructors must be defined out of line. 697 CmptDecDataSink::~CmptDecDataSink() {} 698 699 } // namespace 700 701 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) { 702 LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status)); 703 if (U_FAILURE(status)) { 704 return; 705 } 706 const char* nsName = ns->getName(); 707 708 LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status)); 709 if (U_FAILURE(status)) { 710 return; 711 } 712 CmptDecDataSink sink(*result); 713 sink.isFallback = FALSE; 714 715 // First load the number elements data if nsName is not Latin. 716 if (uprv_strcmp(nsName, gLatnTag) != 0) { 717 sink.isLatin = FALSE; 718 CharString path; 719 path.append(gNumberElementsTag, status) 720 .append('/', status) 721 .append(nsName, status); 722 ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status); 723 if (status == U_MISSING_RESOURCE_ERROR) { 724 // Silently ignore and use Latin 725 status = U_ZERO_ERROR; 726 } else if (U_FAILURE(status)) { 727 return; 728 } 729 sink.isFallback = TRUE; 730 } 731 732 // Now load Latin. 733 sink.isLatin = TRUE; 734 ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status); 735 if (U_FAILURE(status)) return; 736 737 // If longData is empty, default it to be equal to shortData 738 if (result->longData.isEmpty()) { 739 result->longData.setToBogus(); 740 } 741 742 // Check for "other" variants in each of the three data classes, and resolve missing elements. 743 744 if (!result->longData.isBogus()) { 745 checkForOtherVariants(&result->longData, status); 746 if (U_FAILURE(status)) return; 747 fillInMissing(&result->longData); 748 } 749 750 checkForOtherVariants(&result->shortData, status); 751 if (U_FAILURE(status)) return; 752 fillInMissing(&result->shortData); 753 754 // TODO: Enable this statement when currency support is added 755 // checkForOtherVariants(&result->shortCurrencyData, status); 756 // if (U_FAILURE(status)) return; 757 // fillInMissing(&result->shortCurrencyData); 758 } 759 760 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a 761 // given variant and log10 value. 762 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'. 763 // formatStr is the format string from which the prefix and suffix are 764 // extracted. It is usually of form 'Pefix 000 suffix'. 765 // populatePrefixSuffix returns the number of 0's found in formatStr 766 // before the decimal point. 767 // In the special case that formatStr contains only spaces for prefix 768 // and suffix, populatePrefixSuffix returns log10Value + 1. 769 static int32_t populatePrefixSuffix( 770 const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) { 771 if (U_FAILURE(status)) { 772 return 0; 773 } 774 int32_t firstIdx = formatStr.indexOf(kZero, UPRV_LENGTHOF(kZero), 0); 775 // We must have 0's in format string. 776 if (firstIdx == -1) { 777 status = U_INTERNAL_PROGRAM_ERROR; 778 return 0; 779 } 780 int32_t lastIdx = formatStr.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx); 781 CDFUnit* unit = createCDFUnit(variant, log10Value, result, status); 782 if (U_FAILURE(status)) { 783 return 0; 784 } 785 786 // Return -1 if we are not overwriting an existing value 787 if (unit->isSet() && !overwrite) { 788 return -1; 789 } 790 unit->markAsSet(); 791 792 // Everything up to first 0 is the prefix 793 unit->prefix = formatStr.tempSubString(0, firstIdx); 794 fixQuotes(unit->prefix); 795 // Everything beyond the last 0 is the suffix 796 unit->suffix = formatStr.tempSubString(lastIdx + 1); 797 fixQuotes(unit->suffix); 798 799 // If there is effectively no prefix or suffix, ignore the actual number of 800 // 0's and act as if the number of 0's matches the size of the number. 801 if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) { 802 return log10Value + 1; 803 } 804 805 // Calculate number of zeros before decimal point 806 int32_t idx = firstIdx + 1; 807 while (idx <= lastIdx && formatStr.charAt(idx) == u_0) { 808 ++idx; 809 } 810 return (idx - firstIdx); 811 } 812 813 // Calculate a divisor based on the magnitude and number of zeros in the 814 // template string. 815 static double calculateDivisor(double power10, int32_t numZeros) { 816 double divisor = power10; 817 for (int32_t i = 1; i < numZeros; ++i) { 818 divisor /= 10.0; 819 } 820 return divisor; 821 } 822 823 static UBool onlySpaces(UnicodeString u) { 824 return u.trim().length() == 0; 825 } 826 827 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j. 828 // Modifies s in place. 829 static void fixQuotes(UnicodeString& s) { 830 QuoteState state = OUTSIDE; 831 int32_t len = s.length(); 832 int32_t dest = 0; 833 for (int32_t i = 0; i < len; ++i) { 834 UChar ch = s.charAt(i); 835 if (ch == u_apos) { 836 if (state == INSIDE_EMPTY) { 837 s.setCharAt(dest, ch); 838 ++dest; 839 } 840 } else { 841 s.setCharAt(dest, ch); 842 ++dest; 843 } 844 845 // Update state 846 switch (state) { 847 case OUTSIDE: 848 state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE; 849 break; 850 case INSIDE_EMPTY: 851 case INSIDE_FULL: 852 state = ch == u_apos ? OUTSIDE : INSIDE_FULL; 853 break; 854 default: 855 break; 856 } 857 } 858 s.truncate(dest); 859 } 860 861 // Checks to make sure that an "other" variant is present in all 862 // powers of 10. 863 static void checkForOtherVariants(CDFLocaleStyleData* result, 864 UErrorCode& status) { 865 if (result == NULL || result->unitsByVariant == NULL) { 866 return; 867 } 868 869 const CDFUnit* otherByBase = 870 (const CDFUnit*) uhash_get(result->unitsByVariant, gOther); 871 if (otherByBase == NULL) { 872 status = U_INTERNAL_PROGRAM_ERROR; 873 return; 874 } 875 876 // Check all other plural variants, and make sure that if 877 // any of them are populated, then other is also populated 878 int32_t pos = UHASH_FIRST; 879 const UHashElement* element; 880 while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) { 881 CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer; 882 if (variantsByBase == otherByBase) continue; 883 for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) { 884 if (variantsByBase[log10Value].isSet() 885 && !otherByBase[log10Value].isSet()) { 886 status = U_INTERNAL_PROGRAM_ERROR; 887 return; 888 } 889 } 890 } 891 } 892 893 // fillInMissing ensures that the data in result is complete. 894 // result data is complete if for each variant in result, there exists 895 // a prefix-suffix pair for each log10 value and there also exists 896 // a divisor for each log10 value. 897 // 898 // First this function figures out for which log10 values, the other 899 // variant already had data. These are the same log10 values defined 900 // in CLDR. 901 // 902 // For each log10 value not defined in CLDR, it uses the divisor for 903 // the last defined log10 value or 1. 904 // 905 // Then for each variant, it does the following. For each log10 906 // value not defined in CLDR, copy the prefix-suffix pair from the 907 // previous log10 value. If log10 value is defined in CLDR but is 908 // missing from given variant, copy the prefix-suffix pair for that 909 // log10 value from the 'other' variant. 910 static void fillInMissing(CDFLocaleStyleData* result) { 911 const CDFUnit* otherUnits = 912 (const CDFUnit*) uhash_get(result->unitsByVariant, gOther); 913 UBool definedInCLDR[MAX_DIGITS]; 914 double lastDivisor = 1.0; 915 for (int32_t i = 0; i < MAX_DIGITS; ++i) { 916 if (!otherUnits[i].isSet()) { 917 result->divisors[i] = lastDivisor; 918 definedInCLDR[i] = FALSE; 919 } else { 920 lastDivisor = result->divisors[i]; 921 definedInCLDR[i] = TRUE; 922 } 923 } 924 // Iterate over each variant. 925 int32_t pos = UHASH_FIRST; 926 const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos); 927 for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) { 928 CDFUnit* units = (CDFUnit*) element->value.pointer; 929 for (int32_t i = 0; i < MAX_DIGITS; ++i) { 930 if (definedInCLDR[i]) { 931 if (!units[i].isSet()) { 932 units[i] = otherUnits[i]; 933 } 934 } else { 935 if (i == 0) { 936 units[0].markAsSet(); 937 } else { 938 units[i] = units[i - 1]; 939 } 940 } 941 } 942 } 943 } 944 945 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest 946 // value computeLog10 will return MAX_DIGITS -1 even for 947 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return 948 // up to MAX_DIGITS. 949 static int32_t computeLog10(double x, UBool inRange) { 950 int32_t result = 0; 951 int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS; 952 while (x >= 10.0) { 953 x /= 10.0; 954 ++result; 955 if (result == max) { 956 break; 957 } 958 } 959 return result; 960 } 961 962 // createCDFUnit returns a pointer to the prefix-suffix pair for a given 963 // variant and log10 value within table. If no such prefix-suffix pair is 964 // stored in table, one is created within table before returning pointer. 965 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) { 966 if (U_FAILURE(status)) { 967 return NULL; 968 } 969 CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant); 970 if (cdfUnit == NULL) { 971 cdfUnit = new CDFUnit[MAX_DIGITS]; 972 if (cdfUnit == NULL) { 973 status = U_MEMORY_ALLOCATION_ERROR; 974 return NULL; 975 } 976 uhash_put(table, uprv_strdup(variant), cdfUnit, &status); 977 if (U_FAILURE(status)) { 978 return NULL; 979 } 980 } 981 CDFUnit* result = &cdfUnit[log10Value]; 982 return result; 983 } 984 985 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given 986 // variant and log10 value within table. If the given variant doesn't exist, it 987 // falls back to the OTHER variant. Therefore, this method will always return 988 // some non-NULL value. 989 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) { 990 CharString cvariant; 991 UErrorCode status = U_ZERO_ERROR; 992 const CDFUnit *cdfUnit = NULL; 993 cvariant.appendInvariantChars(variant, status); 994 if (!U_FAILURE(status)) { 995 cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data()); 996 } 997 if (cdfUnit == NULL) { 998 cdfUnit = (const CDFUnit*) uhash_get(table, gOther); 999 } 1000 return &cdfUnit[log10Value]; 1001 } 1002 1003 U_NAMESPACE_END 1004 #endif 1005