1 /* 2 ******************************************************************************* 3 * Copyright (C) 2009-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File PLURFMT.CPP 8 ******************************************************************************* 9 */ 10 11 #include "unicode/decimfmt.h" 12 #include "unicode/messagepattern.h" 13 #include "unicode/plurfmt.h" 14 #include "unicode/plurrule.h" 15 #include "unicode/utypes.h" 16 #include "cmemory.h" 17 #include "messageimpl.h" 18 #include "nfrule.h" 19 #include "plurrule_impl.h" 20 #include "uassert.h" 21 #include "uhash.h" 22 23 #if !UCONFIG_NO_FORMATTING 24 25 U_NAMESPACE_BEGIN 26 27 static const UChar OTHER_STRING[] = { 28 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 29 }; 30 31 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) 32 33 PluralFormat::PluralFormat(UErrorCode& status) 34 : locale(Locale::getDefault()), 35 msgPattern(status), 36 numberFormat(NULL), 37 offset(0) { 38 init(NULL, UPLURAL_TYPE_CARDINAL, status); 39 } 40 41 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) 42 : locale(loc), 43 msgPattern(status), 44 numberFormat(NULL), 45 offset(0) { 46 init(NULL, UPLURAL_TYPE_CARDINAL, status); 47 } 48 49 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) 50 : locale(Locale::getDefault()), 51 msgPattern(status), 52 numberFormat(NULL), 53 offset(0) { 54 init(&rules, UPLURAL_TYPE_COUNT, status); 55 } 56 57 PluralFormat::PluralFormat(const Locale& loc, 58 const PluralRules& rules, 59 UErrorCode& status) 60 : locale(loc), 61 msgPattern(status), 62 numberFormat(NULL), 63 offset(0) { 64 init(&rules, UPLURAL_TYPE_COUNT, status); 65 } 66 67 PluralFormat::PluralFormat(const Locale& loc, 68 UPluralType type, 69 UErrorCode& status) 70 : locale(loc), 71 msgPattern(status), 72 numberFormat(NULL), 73 offset(0) { 74 init(NULL, type, status); 75 } 76 77 PluralFormat::PluralFormat(const UnicodeString& pat, 78 UErrorCode& status) 79 : locale(Locale::getDefault()), 80 msgPattern(status), 81 numberFormat(NULL), 82 offset(0) { 83 init(NULL, UPLURAL_TYPE_CARDINAL, status); 84 applyPattern(pat, status); 85 } 86 87 PluralFormat::PluralFormat(const Locale& loc, 88 const UnicodeString& pat, 89 UErrorCode& status) 90 : locale(loc), 91 msgPattern(status), 92 numberFormat(NULL), 93 offset(0) { 94 init(NULL, UPLURAL_TYPE_CARDINAL, status); 95 applyPattern(pat, status); 96 } 97 98 PluralFormat::PluralFormat(const PluralRules& rules, 99 const UnicodeString& pat, 100 UErrorCode& status) 101 : locale(Locale::getDefault()), 102 msgPattern(status), 103 numberFormat(NULL), 104 offset(0) { 105 init(&rules, UPLURAL_TYPE_COUNT, status); 106 applyPattern(pat, status); 107 } 108 109 PluralFormat::PluralFormat(const Locale& loc, 110 const PluralRules& rules, 111 const UnicodeString& pat, 112 UErrorCode& status) 113 : locale(loc), 114 msgPattern(status), 115 numberFormat(NULL), 116 offset(0) { 117 init(&rules, UPLURAL_TYPE_COUNT, status); 118 applyPattern(pat, status); 119 } 120 121 PluralFormat::PluralFormat(const Locale& loc, 122 UPluralType type, 123 const UnicodeString& pat, 124 UErrorCode& status) 125 : locale(loc), 126 msgPattern(status), 127 numberFormat(NULL), 128 offset(0) { 129 init(NULL, type, status); 130 applyPattern(pat, status); 131 } 132 133 PluralFormat::PluralFormat(const PluralFormat& other) 134 : Format(other), 135 locale(other.locale), 136 msgPattern(other.msgPattern), 137 numberFormat(NULL), 138 offset(other.offset) { 139 copyObjects(other); 140 } 141 142 void 143 PluralFormat::copyObjects(const PluralFormat& other) { 144 UErrorCode status = U_ZERO_ERROR; 145 if (numberFormat != NULL) { 146 delete numberFormat; 147 } 148 if (pluralRulesWrapper.pluralRules != NULL) { 149 delete pluralRulesWrapper.pluralRules; 150 } 151 152 if (other.numberFormat == NULL) { 153 numberFormat = NumberFormat::createInstance(locale, status); 154 } else { 155 numberFormat = (NumberFormat*)other.numberFormat->clone(); 156 } 157 if (other.pluralRulesWrapper.pluralRules == NULL) { 158 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); 159 } else { 160 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); 161 } 162 } 163 164 165 PluralFormat::~PluralFormat() { 166 delete numberFormat; 167 } 168 169 void 170 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { 171 if (U_FAILURE(status)) { 172 return; 173 } 174 175 if (rules==NULL) { 176 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); 177 } else { 178 pluralRulesWrapper.pluralRules = rules->clone(); 179 if (pluralRulesWrapper.pluralRules == NULL) { 180 status = U_MEMORY_ALLOCATION_ERROR; 181 return; 182 } 183 } 184 185 numberFormat= NumberFormat::createInstance(locale, status); 186 } 187 188 void 189 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { 190 msgPattern.parsePluralStyle(newPattern, NULL, status); 191 if (U_FAILURE(status)) { 192 msgPattern.clear(); 193 offset = 0; 194 return; 195 } 196 offset = msgPattern.getPluralOffset(0); 197 } 198 199 UnicodeString& 200 PluralFormat::format(const Formattable& obj, 201 UnicodeString& appendTo, 202 FieldPosition& pos, 203 UErrorCode& status) const 204 { 205 if (U_FAILURE(status)) return appendTo; 206 207 if (obj.isNumeric()) { 208 return format(obj, obj.getDouble(), appendTo, pos, status); 209 } else { 210 status = U_ILLEGAL_ARGUMENT_ERROR; 211 return appendTo; 212 } 213 } 214 215 UnicodeString 216 PluralFormat::format(int32_t number, UErrorCode& status) const { 217 FieldPosition fpos(0); 218 UnicodeString result; 219 return format(Formattable(number), number, result, fpos, status); 220 } 221 222 UnicodeString 223 PluralFormat::format(double number, UErrorCode& status) const { 224 FieldPosition fpos(0); 225 UnicodeString result; 226 return format(Formattable(number), number, result, fpos, status); 227 } 228 229 230 UnicodeString& 231 PluralFormat::format(int32_t number, 232 UnicodeString& appendTo, 233 FieldPosition& pos, 234 UErrorCode& status) const { 235 return format(Formattable(number), (double)number, appendTo, pos, status); 236 } 237 238 UnicodeString& 239 PluralFormat::format(double number, 240 UnicodeString& appendTo, 241 FieldPosition& pos, 242 UErrorCode& status) const { 243 return format(Formattable(number), (double)number, appendTo, pos, status); 244 } 245 246 UnicodeString& 247 PluralFormat::format(const Formattable& numberObject, double number, 248 UnicodeString& appendTo, 249 FieldPosition& pos, 250 UErrorCode& status) const { 251 if (U_FAILURE(status)) { 252 return appendTo; 253 } 254 if (msgPattern.countParts() == 0) { 255 return numberFormat->format(numberObject, appendTo, pos, status); 256 } 257 // Get the appropriate sub-message. 258 // Select it based on the formatted number-offset. 259 double numberMinusOffset = number - offset; 260 UnicodeString numberString; 261 FieldPosition ignorePos; 262 FixedDecimal dec(numberMinusOffset); 263 if (offset == 0) { 264 numberFormat->format(numberObject, numberString, ignorePos, status); // could be BigDecimal etc. 265 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 266 if(decFmt != NULL) { 267 dec = decFmt->getFixedDecimal(numberObject, status); 268 } 269 } else { 270 numberFormat->format(numberMinusOffset, numberString, ignorePos, status); 271 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 272 if(decFmt != NULL) { 273 dec = decFmt->getFixedDecimal(numberMinusOffset, status); 274 } 275 } 276 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status); 277 if (U_FAILURE(status)) { return appendTo; } 278 // Replace syntactic # signs in the top level of this sub-message 279 // (not in nested arguments) with the formatted number-offset. 280 const UnicodeString& pattern = msgPattern.getPatternString(); 281 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); 282 for (;;) { 283 const MessagePattern::Part& part = msgPattern.getPart(++partIndex); 284 const UMessagePatternPartType type = part.getType(); 285 int32_t index = part.getIndex(); 286 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 287 return appendTo.append(pattern, prevIndex, index - prevIndex); 288 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || 289 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { 290 appendTo.append(pattern, prevIndex, index - prevIndex); 291 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 292 appendTo.append(numberString); 293 } 294 prevIndex = part.getLimit(); 295 } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 296 appendTo.append(pattern, prevIndex, index - prevIndex); 297 prevIndex = index; 298 partIndex = msgPattern.getLimitPartIndex(partIndex); 299 index = msgPattern.getPart(partIndex).getLimit(); 300 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); 301 prevIndex = index; 302 } 303 } 304 } 305 306 UnicodeString& 307 PluralFormat::toPattern(UnicodeString& appendTo) { 308 if (0 == msgPattern.countParts()) { 309 appendTo.setToBogus(); 310 } else { 311 appendTo.append(msgPattern.getPatternString()); 312 } 313 return appendTo; 314 } 315 316 void 317 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { 318 if (U_FAILURE(status)) { 319 return; 320 } 321 locale = loc; 322 msgPattern.clear(); 323 delete numberFormat; 324 offset = 0; 325 numberFormat = NULL; 326 pluralRulesWrapper.reset(); 327 init(NULL, UPLURAL_TYPE_CARDINAL, status); 328 } 329 330 void 331 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { 332 if (U_FAILURE(status)) { 333 return; 334 } 335 NumberFormat* nf = (NumberFormat*)format->clone(); 336 if (nf != NULL) { 337 delete numberFormat; 338 numberFormat = nf; 339 } else { 340 status = U_MEMORY_ALLOCATION_ERROR; 341 } 342 } 343 344 Format* 345 PluralFormat::clone() const 346 { 347 return new PluralFormat(*this); 348 } 349 350 351 PluralFormat& 352 PluralFormat::operator=(const PluralFormat& other) { 353 if (this != &other) { 354 locale = other.locale; 355 msgPattern = other.msgPattern; 356 offset = other.offset; 357 copyObjects(other); 358 } 359 360 return *this; 361 } 362 363 UBool 364 PluralFormat::operator==(const Format& other) const { 365 if (this == &other) { 366 return TRUE; 367 } 368 if (!Format::operator==(other)) { 369 return FALSE; 370 } 371 const PluralFormat& o = (const PluralFormat&)other; 372 return 373 locale == o.locale && 374 msgPattern == o.msgPattern && // implies same offset 375 (numberFormat == NULL) == (o.numberFormat == NULL) && 376 (numberFormat == NULL || *numberFormat == *o.numberFormat) && 377 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) && 378 (pluralRulesWrapper.pluralRules == NULL || 379 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); 380 } 381 382 UBool 383 PluralFormat::operator!=(const Format& other) const { 384 return !operator==(other); 385 } 386 387 void 388 PluralFormat::parseObject(const UnicodeString& /*source*/, 389 Formattable& /*result*/, 390 ParsePosition& pos) const 391 { 392 // Parsing not supported. 393 pos.setErrorIndex(pos.getIndex()); 394 } 395 396 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, 397 const PluralSelector& selector, void *context, 398 double number, UErrorCode& ec) { 399 if (U_FAILURE(ec)) { 400 return 0; 401 } 402 int32_t count=pattern.countParts(); 403 double offset; 404 const MessagePattern::Part* part=&pattern.getPart(partIndex); 405 if (MessagePattern::Part::hasNumericValue(part->getType())) { 406 offset=pattern.getNumericValue(*part); 407 ++partIndex; 408 } else { 409 offset=0; 410 } 411 // The keyword is empty until we need to match against a non-explicit, not-"other" value. 412 // Then we get the keyword from the selector. 413 // (In other words, we never call the selector if we match against an explicit value, 414 // or if the only non-explicit keyword is "other".) 415 UnicodeString keyword; 416 UnicodeString other(FALSE, OTHER_STRING, 5); 417 // When we find a match, we set msgStart>0 and also set this boolean to true 418 // to avoid matching the keyword again (duplicates are allowed) 419 // while we continue to look for an explicit-value match. 420 UBool haveKeywordMatch=FALSE; 421 // msgStart is 0 until we find any appropriate sub-message. 422 // We remember the first "other" sub-message if we have not seen any 423 // appropriate sub-message before. 424 // We remember the first matching-keyword sub-message if we have not seen 425 // one of those before. 426 // (The parser allows [does not check for] duplicate keywords. 427 // We just have to make sure to take the first one.) 428 // We avoid matching the keyword twice by also setting haveKeywordMatch=true 429 // at the first keyword match. 430 // We keep going until we find an explicit-value match or reach the end of the plural style. 431 int32_t msgStart=0; 432 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 433 // until ARG_LIMIT or end of plural-only pattern. 434 do { 435 part=&pattern.getPart(partIndex++); 436 const UMessagePatternPartType type = part->getType(); 437 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 438 break; 439 } 440 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); 441 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 442 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { 443 // explicit value like "=2" 444 part=&pattern.getPart(partIndex++); 445 if(number==pattern.getNumericValue(*part)) { 446 // matches explicit value 447 return partIndex; 448 } 449 } else if(!haveKeywordMatch) { 450 // plural keyword like "few" or "other" 451 // Compare "other" first and call the selector if this is not "other". 452 if(pattern.partSubstringMatches(*part, other)) { 453 if(msgStart==0) { 454 msgStart=partIndex; 455 if(0 == keyword.compare(other)) { 456 // This is the first "other" sub-message, 457 // and the selected keyword is also "other". 458 // Do not match "other" again. 459 haveKeywordMatch=TRUE; 460 } 461 } 462 } else { 463 if(keyword.isEmpty()) { 464 keyword=selector.select(context, number-offset, ec); 465 if(msgStart!=0 && (0 == keyword.compare(other))) { 466 // We have already seen an "other" sub-message. 467 // Do not match "other" again. 468 haveKeywordMatch=TRUE; 469 // Skip keyword matching but do getLimitPartIndex(). 470 } 471 } 472 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { 473 // keyword matches 474 msgStart=partIndex; 475 // Do not match this keyword again. 476 haveKeywordMatch=TRUE; 477 } 478 } 479 } 480 partIndex=pattern.getLimitPartIndex(partIndex); 481 } while(++partIndex<count); 482 return msgStart; 483 } 484 485 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { 486 // If no pattern was applied, return null. 487 if (msgPattern.countParts() == 0) { 488 pos.setBeginIndex(-1); 489 pos.setEndIndex(-1); 490 return; 491 } 492 int partIndex = 0; 493 int currMatchIndex; 494 int count=msgPattern.countParts(); 495 int startingAt = pos.getBeginIndex(); 496 if (startingAt < 0) { 497 startingAt = 0; 498 } 499 500 // The keyword is null until we need to match against a non-explicit, not-"other" value. 501 // Then we get the keyword from the selector. 502 // (In other words, we never call the selector if we match against an explicit value, 503 // or if the only non-explicit keyword is "other".) 504 UnicodeString keyword; 505 UnicodeString matchedWord; 506 const UnicodeString& pattern = msgPattern.getPatternString(); 507 int matchedIndex = -1; 508 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples 509 // until the end of the plural-only pattern. 510 while (partIndex < count) { 511 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); 512 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { 513 // Bad format 514 continue; 515 } 516 517 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); 518 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { 519 // Bad format 520 continue; 521 } 522 523 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); 524 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { 525 // Bad format 526 continue; 527 } 528 529 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 530 if (rbnfLenientScanner != NULL) { 531 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. 532 int32_t length = -1; 533 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); 534 } 535 else { 536 currMatchIndex = source.indexOf(currArg, startingAt); 537 } 538 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { 539 matchedIndex = currMatchIndex; 540 matchedWord = currArg; 541 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 542 } 543 } 544 if (matchedIndex >= 0) { 545 pos.setBeginIndex(matchedIndex); 546 pos.setEndIndex(matchedIndex + matchedWord.length()); 547 result.setString(keyword); 548 return; 549 } 550 551 // Not found! 552 pos.setBeginIndex(-1); 553 pos.setEndIndex(-1); 554 } 555 556 PluralFormat::PluralSelector::~PluralSelector() {} 557 558 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { 559 delete pluralRules; 560 } 561 562 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, 563 UErrorCode& /*ec*/) const { 564 (void)number; // unused except in the assertion 565 FixedDecimal *dec=static_cast<FixedDecimal *>(context); 566 U_ASSERT(dec->source==number); 567 return pluralRules->select(*dec); 568 } 569 570 void PluralFormat::PluralSelectorAdapter::reset() { 571 delete pluralRules; 572 pluralRules = NULL; 573 } 574 575 576 U_NAMESPACE_END 577 578 579 #endif /* #if !UCONFIG_NO_FORMATTING */ 580 581 //eof 582