1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2009-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File PLURFMT.CPP 10 ******************************************************************************* 11 */ 12 13 #include "unicode/decimfmt.h" 14 #include "unicode/messagepattern.h" 15 #include "unicode/plurfmt.h" 16 #include "unicode/plurrule.h" 17 #include "unicode/utypes.h" 18 #include "cmemory.h" 19 #include "messageimpl.h" 20 #include "nfrule.h" 21 #include "plurrule_impl.h" 22 #include "uassert.h" 23 #include "uhash.h" 24 #include "number_decimalquantity.h" 25 #include "number_utils.h" 26 #include "number_utypes.h" 27 28 #if !UCONFIG_NO_FORMATTING 29 30 U_NAMESPACE_BEGIN 31 32 using number::impl::DecimalQuantity; 33 34 static const UChar OTHER_STRING[] = { 35 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 36 }; 37 38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) 39 40 PluralFormat::PluralFormat(UErrorCode& status) 41 : locale(Locale::getDefault()), 42 msgPattern(status), 43 numberFormat(NULL), 44 offset(0) { 45 init(NULL, UPLURAL_TYPE_CARDINAL, status); 46 } 47 48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) 49 : locale(loc), 50 msgPattern(status), 51 numberFormat(NULL), 52 offset(0) { 53 init(NULL, UPLURAL_TYPE_CARDINAL, status); 54 } 55 56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) 57 : locale(Locale::getDefault()), 58 msgPattern(status), 59 numberFormat(NULL), 60 offset(0) { 61 init(&rules, UPLURAL_TYPE_COUNT, status); 62 } 63 64 PluralFormat::PluralFormat(const Locale& loc, 65 const PluralRules& rules, 66 UErrorCode& status) 67 : locale(loc), 68 msgPattern(status), 69 numberFormat(NULL), 70 offset(0) { 71 init(&rules, UPLURAL_TYPE_COUNT, status); 72 } 73 74 PluralFormat::PluralFormat(const Locale& loc, 75 UPluralType type, 76 UErrorCode& status) 77 : locale(loc), 78 msgPattern(status), 79 numberFormat(NULL), 80 offset(0) { 81 init(NULL, type, status); 82 } 83 84 PluralFormat::PluralFormat(const UnicodeString& pat, 85 UErrorCode& status) 86 : locale(Locale::getDefault()), 87 msgPattern(status), 88 numberFormat(NULL), 89 offset(0) { 90 init(NULL, UPLURAL_TYPE_CARDINAL, status); 91 applyPattern(pat, status); 92 } 93 94 PluralFormat::PluralFormat(const Locale& loc, 95 const UnicodeString& pat, 96 UErrorCode& status) 97 : locale(loc), 98 msgPattern(status), 99 numberFormat(NULL), 100 offset(0) { 101 init(NULL, UPLURAL_TYPE_CARDINAL, status); 102 applyPattern(pat, status); 103 } 104 105 PluralFormat::PluralFormat(const PluralRules& rules, 106 const UnicodeString& pat, 107 UErrorCode& status) 108 : locale(Locale::getDefault()), 109 msgPattern(status), 110 numberFormat(NULL), 111 offset(0) { 112 init(&rules, UPLURAL_TYPE_COUNT, status); 113 applyPattern(pat, status); 114 } 115 116 PluralFormat::PluralFormat(const Locale& loc, 117 const PluralRules& rules, 118 const UnicodeString& pat, 119 UErrorCode& status) 120 : locale(loc), 121 msgPattern(status), 122 numberFormat(NULL), 123 offset(0) { 124 init(&rules, UPLURAL_TYPE_COUNT, status); 125 applyPattern(pat, status); 126 } 127 128 PluralFormat::PluralFormat(const Locale& loc, 129 UPluralType type, 130 const UnicodeString& pat, 131 UErrorCode& status) 132 : locale(loc), 133 msgPattern(status), 134 numberFormat(NULL), 135 offset(0) { 136 init(NULL, type, status); 137 applyPattern(pat, status); 138 } 139 140 PluralFormat::PluralFormat(const PluralFormat& other) 141 : Format(other), 142 locale(other.locale), 143 msgPattern(other.msgPattern), 144 numberFormat(NULL), 145 offset(other.offset) { 146 copyObjects(other); 147 } 148 149 void 150 PluralFormat::copyObjects(const PluralFormat& other) { 151 UErrorCode status = U_ZERO_ERROR; 152 if (numberFormat != NULL) { 153 delete numberFormat; 154 } 155 if (pluralRulesWrapper.pluralRules != NULL) { 156 delete pluralRulesWrapper.pluralRules; 157 } 158 159 if (other.numberFormat == NULL) { 160 numberFormat = NumberFormat::createInstance(locale, status); 161 } else { 162 numberFormat = (NumberFormat*)other.numberFormat->clone(); 163 } 164 if (other.pluralRulesWrapper.pluralRules == NULL) { 165 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); 166 } else { 167 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); 168 } 169 } 170 171 172 PluralFormat::~PluralFormat() { 173 delete numberFormat; 174 } 175 176 void 177 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { 178 if (U_FAILURE(status)) { 179 return; 180 } 181 182 if (rules==NULL) { 183 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); 184 } else { 185 pluralRulesWrapper.pluralRules = rules->clone(); 186 if (pluralRulesWrapper.pluralRules == NULL) { 187 status = U_MEMORY_ALLOCATION_ERROR; 188 return; 189 } 190 } 191 192 numberFormat= NumberFormat::createInstance(locale, status); 193 } 194 195 void 196 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { 197 msgPattern.parsePluralStyle(newPattern, NULL, status); 198 if (U_FAILURE(status)) { 199 msgPattern.clear(); 200 offset = 0; 201 return; 202 } 203 offset = msgPattern.getPluralOffset(0); 204 } 205 206 UnicodeString& 207 PluralFormat::format(const Formattable& obj, 208 UnicodeString& appendTo, 209 FieldPosition& pos, 210 UErrorCode& status) const 211 { 212 if (U_FAILURE(status)) return appendTo; 213 214 if (obj.isNumeric()) { 215 return format(obj, obj.getDouble(), appendTo, pos, status); 216 } else { 217 status = U_ILLEGAL_ARGUMENT_ERROR; 218 return appendTo; 219 } 220 } 221 222 UnicodeString 223 PluralFormat::format(int32_t number, UErrorCode& status) const { 224 FieldPosition fpos(FieldPosition::DONT_CARE); 225 UnicodeString result; 226 return format(Formattable(number), number, result, fpos, status); 227 } 228 229 UnicodeString 230 PluralFormat::format(double number, UErrorCode& status) const { 231 FieldPosition fpos(FieldPosition::DONT_CARE); 232 UnicodeString result; 233 return format(Formattable(number), number, result, fpos, status); 234 } 235 236 237 UnicodeString& 238 PluralFormat::format(int32_t number, 239 UnicodeString& appendTo, 240 FieldPosition& pos, 241 UErrorCode& status) const { 242 return format(Formattable(number), (double)number, appendTo, pos, status); 243 } 244 245 UnicodeString& 246 PluralFormat::format(double number, 247 UnicodeString& appendTo, 248 FieldPosition& pos, 249 UErrorCode& status) const { 250 return format(Formattable(number), (double)number, appendTo, pos, status); 251 } 252 253 UnicodeString& 254 PluralFormat::format(const Formattable& numberObject, double number, 255 UnicodeString& appendTo, 256 FieldPosition& pos, 257 UErrorCode& status) const { 258 if (U_FAILURE(status)) { 259 return appendTo; 260 } 261 if (msgPattern.countParts() == 0) { 262 return numberFormat->format(numberObject, appendTo, pos, status); 263 } 264 265 // Get the appropriate sub-message. 266 // Select it based on the formatted number-offset. 267 double numberMinusOffset = number - offset; 268 // Call NumberFormatter to get both the DecimalQuantity and the string. 269 // This call site needs to use more internal APIs than the Java equivalent. 270 number::impl::UFormattedNumberData data; 271 if (offset == 0) { 272 // could be BigDecimal etc. 273 numberObject.populateDecimalQuantity(data.quantity, status); 274 } else { 275 data.quantity.setToDouble(numberMinusOffset); 276 } 277 UnicodeString numberString; 278 auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 279 if(decFmt != nullptr) { 280 decFmt->toNumberFormatter().formatImpl(&data, status); // mutates &data 281 numberString = data.string.toUnicodeString(); 282 } else { 283 if (offset == 0) { 284 numberFormat->format(numberObject, numberString, status); 285 } else { 286 numberFormat->format(numberMinusOffset, numberString, status); 287 } 288 } 289 290 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status); 291 if (U_FAILURE(status)) { return appendTo; } 292 // Replace syntactic # signs in the top level of this sub-message 293 // (not in nested arguments) with the formatted number-offset. 294 const UnicodeString& pattern = msgPattern.getPatternString(); 295 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); 296 for (;;) { 297 const MessagePattern::Part& part = msgPattern.getPart(++partIndex); 298 const UMessagePatternPartType type = part.getType(); 299 int32_t index = part.getIndex(); 300 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 301 return appendTo.append(pattern, prevIndex, index - prevIndex); 302 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || 303 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { 304 appendTo.append(pattern, prevIndex, index - prevIndex); 305 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 306 appendTo.append(numberString); 307 } 308 prevIndex = part.getLimit(); 309 } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 310 appendTo.append(pattern, prevIndex, index - prevIndex); 311 prevIndex = index; 312 partIndex = msgPattern.getLimitPartIndex(partIndex); 313 index = msgPattern.getPart(partIndex).getLimit(); 314 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); 315 prevIndex = index; 316 } 317 } 318 } 319 320 UnicodeString& 321 PluralFormat::toPattern(UnicodeString& appendTo) { 322 if (0 == msgPattern.countParts()) { 323 appendTo.setToBogus(); 324 } else { 325 appendTo.append(msgPattern.getPatternString()); 326 } 327 return appendTo; 328 } 329 330 void 331 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { 332 if (U_FAILURE(status)) { 333 return; 334 } 335 locale = loc; 336 msgPattern.clear(); 337 delete numberFormat; 338 offset = 0; 339 numberFormat = NULL; 340 pluralRulesWrapper.reset(); 341 init(NULL, UPLURAL_TYPE_CARDINAL, status); 342 } 343 344 void 345 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { 346 if (U_FAILURE(status)) { 347 return; 348 } 349 NumberFormat* nf = (NumberFormat*)format->clone(); 350 if (nf != NULL) { 351 delete numberFormat; 352 numberFormat = nf; 353 } else { 354 status = U_MEMORY_ALLOCATION_ERROR; 355 } 356 } 357 358 Format* 359 PluralFormat::clone() const 360 { 361 return new PluralFormat(*this); 362 } 363 364 365 PluralFormat& 366 PluralFormat::operator=(const PluralFormat& other) { 367 if (this != &other) { 368 locale = other.locale; 369 msgPattern = other.msgPattern; 370 offset = other.offset; 371 copyObjects(other); 372 } 373 374 return *this; 375 } 376 377 UBool 378 PluralFormat::operator==(const Format& other) const { 379 if (this == &other) { 380 return TRUE; 381 } 382 if (!Format::operator==(other)) { 383 return FALSE; 384 } 385 const PluralFormat& o = (const PluralFormat&)other; 386 return 387 locale == o.locale && 388 msgPattern == o.msgPattern && // implies same offset 389 (numberFormat == NULL) == (o.numberFormat == NULL) && 390 (numberFormat == NULL || *numberFormat == *o.numberFormat) && 391 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) && 392 (pluralRulesWrapper.pluralRules == NULL || 393 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); 394 } 395 396 UBool 397 PluralFormat::operator!=(const Format& other) const { 398 return !operator==(other); 399 } 400 401 void 402 PluralFormat::parseObject(const UnicodeString& /*source*/, 403 Formattable& /*result*/, 404 ParsePosition& pos) const 405 { 406 // Parsing not supported. 407 pos.setErrorIndex(pos.getIndex()); 408 } 409 410 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, 411 const PluralSelector& selector, void *context, 412 double number, UErrorCode& ec) { 413 if (U_FAILURE(ec)) { 414 return 0; 415 } 416 int32_t count=pattern.countParts(); 417 double offset; 418 const MessagePattern::Part* part=&pattern.getPart(partIndex); 419 if (MessagePattern::Part::hasNumericValue(part->getType())) { 420 offset=pattern.getNumericValue(*part); 421 ++partIndex; 422 } else { 423 offset=0; 424 } 425 // The keyword is empty until we need to match against a non-explicit, not-"other" value. 426 // Then we get the keyword from the selector. 427 // (In other words, we never call the selector if we match against an explicit value, 428 // or if the only non-explicit keyword is "other".) 429 UnicodeString keyword; 430 UnicodeString other(FALSE, OTHER_STRING, 5); 431 // When we find a match, we set msgStart>0 and also set this boolean to true 432 // to avoid matching the keyword again (duplicates are allowed) 433 // while we continue to look for an explicit-value match. 434 UBool haveKeywordMatch=FALSE; 435 // msgStart is 0 until we find any appropriate sub-message. 436 // We remember the first "other" sub-message if we have not seen any 437 // appropriate sub-message before. 438 // We remember the first matching-keyword sub-message if we have not seen 439 // one of those before. 440 // (The parser allows [does not check for] duplicate keywords. 441 // We just have to make sure to take the first one.) 442 // We avoid matching the keyword twice by also setting haveKeywordMatch=true 443 // at the first keyword match. 444 // We keep going until we find an explicit-value match or reach the end of the plural style. 445 int32_t msgStart=0; 446 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 447 // until ARG_LIMIT or end of plural-only pattern. 448 do { 449 part=&pattern.getPart(partIndex++); 450 const UMessagePatternPartType type = part->getType(); 451 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 452 break; 453 } 454 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); 455 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 456 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { 457 // explicit value like "=2" 458 part=&pattern.getPart(partIndex++); 459 if(number==pattern.getNumericValue(*part)) { 460 // matches explicit value 461 return partIndex; 462 } 463 } else if(!haveKeywordMatch) { 464 // plural keyword like "few" or "other" 465 // Compare "other" first and call the selector if this is not "other". 466 if(pattern.partSubstringMatches(*part, other)) { 467 if(msgStart==0) { 468 msgStart=partIndex; 469 if(0 == keyword.compare(other)) { 470 // This is the first "other" sub-message, 471 // and the selected keyword is also "other". 472 // Do not match "other" again. 473 haveKeywordMatch=TRUE; 474 } 475 } 476 } else { 477 if(keyword.isEmpty()) { 478 keyword=selector.select(context, number-offset, ec); 479 if(msgStart!=0 && (0 == keyword.compare(other))) { 480 // We have already seen an "other" sub-message. 481 // Do not match "other" again. 482 haveKeywordMatch=TRUE; 483 // Skip keyword matching but do getLimitPartIndex(). 484 } 485 } 486 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { 487 // keyword matches 488 msgStart=partIndex; 489 // Do not match this keyword again. 490 haveKeywordMatch=TRUE; 491 } 492 } 493 } 494 partIndex=pattern.getLimitPartIndex(partIndex); 495 } while(++partIndex<count); 496 return msgStart; 497 } 498 499 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { 500 // If no pattern was applied, return null. 501 if (msgPattern.countParts() == 0) { 502 pos.setBeginIndex(-1); 503 pos.setEndIndex(-1); 504 return; 505 } 506 int partIndex = 0; 507 int currMatchIndex; 508 int count=msgPattern.countParts(); 509 int startingAt = pos.getBeginIndex(); 510 if (startingAt < 0) { 511 startingAt = 0; 512 } 513 514 // The keyword is null until we need to match against a non-explicit, not-"other" value. 515 // Then we get the keyword from the selector. 516 // (In other words, we never call the selector if we match against an explicit value, 517 // or if the only non-explicit keyword is "other".) 518 UnicodeString keyword; 519 UnicodeString matchedWord; 520 const UnicodeString& pattern = msgPattern.getPatternString(); 521 int matchedIndex = -1; 522 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples 523 // until the end of the plural-only pattern. 524 while (partIndex < count) { 525 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); 526 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { 527 // Bad format 528 continue; 529 } 530 531 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); 532 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { 533 // Bad format 534 continue; 535 } 536 537 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); 538 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { 539 // Bad format 540 continue; 541 } 542 543 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 544 if (rbnfLenientScanner != NULL) { 545 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. 546 int32_t length = -1; 547 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); 548 } 549 else { 550 currMatchIndex = source.indexOf(currArg, startingAt); 551 } 552 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { 553 matchedIndex = currMatchIndex; 554 matchedWord = currArg; 555 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 556 } 557 } 558 if (matchedIndex >= 0) { 559 pos.setBeginIndex(matchedIndex); 560 pos.setEndIndex(matchedIndex + matchedWord.length()); 561 result.setString(keyword); 562 return; 563 } 564 565 // Not found! 566 pos.setBeginIndex(-1); 567 pos.setEndIndex(-1); 568 } 569 570 PluralFormat::PluralSelector::~PluralSelector() {} 571 572 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { 573 delete pluralRules; 574 } 575 576 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, 577 UErrorCode& /*ec*/) const { 578 (void)number; // unused except in the assertion 579 IFixedDecimal *dec=static_cast<IFixedDecimal *>(context); 580 return pluralRules->select(*dec); 581 } 582 583 void PluralFormat::PluralSelectorAdapter::reset() { 584 delete pluralRules; 585 pluralRules = NULL; 586 } 587 588 589 U_NAMESPACE_END 590 591 592 #endif /* #if !UCONFIG_NO_FORMATTING */ 593 594 //eof 595