1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************** 6 * 7 * File MSGFMT.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/19/97 aliu Converted from java. 13 * 03/20/97 helena Finished first cut of implementation. 14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. 15 * 06/11/97 helena Fixed addPattern to take the pattern correctly. 16 * 06/17/97 helena Fixed the getPattern to return the correct pattern. 17 * 07/09/97 helena Made ParsePosition into a class. 18 * 02/22/99 stephen Removed character literals for EBCDIC safety 19 * 11/01/09 kirtig Added SelectFormat 20 ********************************************************************/ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_FORMATTING 25 26 #include "unicode/appendable.h" 27 #include "unicode/choicfmt.h" 28 #include "unicode/datefmt.h" 29 #include "unicode/decimfmt.h" 30 #include "unicode/localpointer.h" 31 #include "unicode/msgfmt.h" 32 #include "unicode/plurfmt.h" 33 #include "unicode/rbnf.h" 34 #include "unicode/selfmt.h" 35 #include "unicode/smpdtfmt.h" 36 #include "unicode/umsg.h" 37 #include "unicode/ustring.h" 38 #include "cmemory.h" 39 #include "patternprops.h" 40 #include "messageimpl.h" 41 #include "msgfmt_impl.h" 42 #include "uassert.h" 43 #include "uelement.h" 44 #include "uhash.h" 45 #include "ustrfmt.h" 46 #include "util.h" 47 #include "uvector.h" 48 49 // ***************************************************************************** 50 // class MessageFormat 51 // ***************************************************************************** 52 53 #define SINGLE_QUOTE ((UChar)0x0027) 54 #define COMMA ((UChar)0x002C) 55 #define LEFT_CURLY_BRACE ((UChar)0x007B) 56 #define RIGHT_CURLY_BRACE ((UChar)0x007D) 57 58 //--------------------------------------- 59 // static data 60 61 static const UChar ID_NUMBER[] = { 62 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */ 63 }; 64 static const UChar ID_DATE[] = { 65 0x64, 0x61, 0x74, 0x65, 0 /* "date" */ 66 }; 67 static const UChar ID_TIME[] = { 68 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */ 69 }; 70 static const UChar ID_SPELLOUT[] = { 71 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */ 72 }; 73 static const UChar ID_ORDINAL[] = { 74 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */ 75 }; 76 static const UChar ID_DURATION[] = { 77 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ 78 }; 79 80 // MessageFormat Type List Number, Date, Time or Choice 81 static const UChar * const TYPE_IDS[] = { 82 ID_NUMBER, 83 ID_DATE, 84 ID_TIME, 85 ID_SPELLOUT, 86 ID_ORDINAL, 87 ID_DURATION, 88 NULL, 89 }; 90 91 static const UChar ID_EMPTY[] = { 92 0 /* empty string, used for default so that null can mark end of list */ 93 }; 94 static const UChar ID_CURRENCY[] = { 95 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */ 96 }; 97 static const UChar ID_PERCENT[] = { 98 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */ 99 }; 100 static const UChar ID_INTEGER[] = { 101 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */ 102 }; 103 104 // NumberFormat modifier list, default, currency, percent or integer 105 static const UChar * const NUMBER_STYLE_IDS[] = { 106 ID_EMPTY, 107 ID_CURRENCY, 108 ID_PERCENT, 109 ID_INTEGER, 110 NULL, 111 }; 112 113 static const UChar ID_SHORT[] = { 114 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */ 115 }; 116 static const UChar ID_MEDIUM[] = { 117 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */ 118 }; 119 static const UChar ID_LONG[] = { 120 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */ 121 }; 122 static const UChar ID_FULL[] = { 123 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */ 124 }; 125 126 // DateFormat modifier list, default, short, medium, long or full 127 static const UChar * const DATE_STYLE_IDS[] = { 128 ID_EMPTY, 129 ID_SHORT, 130 ID_MEDIUM, 131 ID_LONG, 132 ID_FULL, 133 NULL, 134 }; 135 136 static const icu::DateFormat::EStyle DATE_STYLES[] = { 137 icu::DateFormat::kDefault, 138 icu::DateFormat::kShort, 139 icu::DateFormat::kMedium, 140 icu::DateFormat::kLong, 141 icu::DateFormat::kFull, 142 }; 143 144 static const int32_t DEFAULT_INITIAL_CAPACITY = 10; 145 146 static const UChar NULL_STRING[] = { 147 0x6E, 0x75, 0x6C, 0x6C, 0 // "null" 148 }; 149 150 static const UChar OTHER_STRING[] = { 151 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 152 }; 153 154 U_CDECL_BEGIN 155 static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1, 156 const UHashTok key2) { 157 return icu::MessageFormat::equalFormats(key1.pointer, key2.pointer); 158 } 159 160 U_CDECL_END 161 162 U_NAMESPACE_BEGIN 163 164 // ------------------------------------- 165 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat) 166 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(MessageFormat::DummyFormat) 167 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) 168 169 //-------------------------------------------------------------------- 170 171 /** 172 * Convert an integer value to a string and append the result to 173 * the given UnicodeString. 174 */ 175 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) { 176 UChar temp[16]; 177 uprv_itou(temp,16,i,10,0); // 10 == radix 178 appendTo.append(temp, -1); 179 return appendTo; 180 } 181 182 183 // AppendableWrapper: encapsulates the result of formatting, keeping track 184 // of the string and its length. 185 class AppendableWrapper : public UMemory { 186 public: 187 AppendableWrapper(Appendable& appendable) : app(appendable), len(0) { 188 } 189 void append(const UnicodeString& s) { 190 app.appendString(s.getBuffer(), s.length()); 191 len += s.length(); 192 } 193 void append(const UChar* s, const int32_t sLength) { 194 app.appendString(s, sLength); 195 len += sLength; 196 } 197 void append(const UnicodeString& s, int32_t start, int32_t length) { 198 append(s.tempSubString(start, length)); 199 } 200 void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) { 201 UnicodeString s; 202 formatter->format(arg, s, ec); 203 if (U_SUCCESS(ec)) { 204 append(s); 205 } 206 } 207 int32_t length() { 208 return len; 209 } 210 private: 211 Appendable& app; 212 int32_t len; 213 }; 214 215 216 // ------------------------------------- 217 // Creates a MessageFormat instance based on the pattern. 218 219 MessageFormat::MessageFormat(const UnicodeString& pattern, 220 UErrorCode& success) 221 : fLocale(Locale::getDefault()), // Uses the default locale 222 msgPattern(success), 223 formatAliases(NULL), 224 formatAliasesCapacity(0), 225 argTypes(NULL), 226 argTypeCount(0), 227 argTypeCapacity(0), 228 hasArgTypeConflicts(FALSE), 229 defaultNumberFormat(NULL), 230 defaultDateFormat(NULL), 231 cachedFormatters(NULL), 232 customFormatArgStarts(NULL), 233 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 234 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 235 { 236 setLocaleIDs(fLocale.getName(), fLocale.getName()); 237 applyPattern(pattern, success); 238 } 239 240 MessageFormat::MessageFormat(const UnicodeString& pattern, 241 const Locale& newLocale, 242 UErrorCode& success) 243 : fLocale(newLocale), 244 msgPattern(success), 245 formatAliases(NULL), 246 formatAliasesCapacity(0), 247 argTypes(NULL), 248 argTypeCount(0), 249 argTypeCapacity(0), 250 hasArgTypeConflicts(FALSE), 251 defaultNumberFormat(NULL), 252 defaultDateFormat(NULL), 253 cachedFormatters(NULL), 254 customFormatArgStarts(NULL), 255 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 256 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 257 { 258 setLocaleIDs(fLocale.getName(), fLocale.getName()); 259 applyPattern(pattern, success); 260 } 261 262 MessageFormat::MessageFormat(const UnicodeString& pattern, 263 const Locale& newLocale, 264 UParseError& parseError, 265 UErrorCode& success) 266 : fLocale(newLocale), 267 msgPattern(success), 268 formatAliases(NULL), 269 formatAliasesCapacity(0), 270 argTypes(NULL), 271 argTypeCount(0), 272 argTypeCapacity(0), 273 hasArgTypeConflicts(FALSE), 274 defaultNumberFormat(NULL), 275 defaultDateFormat(NULL), 276 cachedFormatters(NULL), 277 customFormatArgStarts(NULL), 278 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 279 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 280 { 281 setLocaleIDs(fLocale.getName(), fLocale.getName()); 282 applyPattern(pattern, parseError, success); 283 } 284 285 MessageFormat::MessageFormat(const MessageFormat& that) 286 : 287 Format(that), 288 fLocale(that.fLocale), 289 msgPattern(that.msgPattern), 290 formatAliases(NULL), 291 formatAliasesCapacity(0), 292 argTypes(NULL), 293 argTypeCount(0), 294 argTypeCapacity(0), 295 hasArgTypeConflicts(that.hasArgTypeConflicts), 296 defaultNumberFormat(NULL), 297 defaultDateFormat(NULL), 298 cachedFormatters(NULL), 299 customFormatArgStarts(NULL), 300 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 301 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 302 { 303 // This will take care of creating the hash tables (since they are NULL). 304 UErrorCode ec = U_ZERO_ERROR; 305 copyObjects(that, ec); 306 if (U_FAILURE(ec)) { 307 resetPattern(); 308 } 309 } 310 311 MessageFormat::~MessageFormat() 312 { 313 uhash_close(cachedFormatters); 314 uhash_close(customFormatArgStarts); 315 316 uprv_free(argTypes); 317 uprv_free(formatAliases); 318 delete defaultNumberFormat; 319 delete defaultDateFormat; 320 } 321 322 //-------------------------------------------------------------------- 323 // Variable-size array management 324 325 /** 326 * Allocate argTypes[] to at least the given capacity and return 327 * TRUE if successful. If not, leave argTypes[] unchanged. 328 * 329 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it 330 * if necessary to be at least as large as specified. 331 */ 332 UBool MessageFormat::allocateArgTypes(int32_t capacity, UErrorCode& status) { 333 if (U_FAILURE(status)) { 334 return FALSE; 335 } 336 if (argTypeCapacity >= capacity) { 337 return TRUE; 338 } 339 if (capacity < DEFAULT_INITIAL_CAPACITY) { 340 capacity = DEFAULT_INITIAL_CAPACITY; 341 } else if (capacity < 2*argTypeCapacity) { 342 capacity = 2*argTypeCapacity; 343 } 344 Formattable::Type* a = (Formattable::Type*) 345 uprv_realloc(argTypes, sizeof(*argTypes) * capacity); 346 if (a == NULL) { 347 status = U_MEMORY_ALLOCATION_ERROR; 348 return FALSE; 349 } 350 argTypes = a; 351 argTypeCapacity = capacity; 352 return TRUE; 353 } 354 355 // ------------------------------------- 356 // assignment operator 357 358 const MessageFormat& 359 MessageFormat::operator=(const MessageFormat& that) 360 { 361 if (this != &that) { 362 // Calls the super class for assignment first. 363 Format::operator=(that); 364 365 setLocale(that.fLocale); 366 msgPattern = that.msgPattern; 367 hasArgTypeConflicts = that.hasArgTypeConflicts; 368 369 UErrorCode ec = U_ZERO_ERROR; 370 copyObjects(that, ec); 371 if (U_FAILURE(ec)) { 372 resetPattern(); 373 } 374 } 375 return *this; 376 } 377 378 UBool 379 MessageFormat::operator==(const Format& rhs) const 380 { 381 if (this == &rhs) return TRUE; 382 383 MessageFormat& that = (MessageFormat&)rhs; 384 385 // Check class ID before checking MessageFormat members 386 if (!Format::operator==(rhs) || 387 msgPattern != that.msgPattern || 388 fLocale != that.fLocale) { 389 return FALSE; 390 } 391 392 // Compare hashtables. 393 if ((customFormatArgStarts == NULL) != (that.customFormatArgStarts == NULL)) { 394 return FALSE; 395 } 396 if (customFormatArgStarts == NULL) { 397 return TRUE; 398 } 399 400 UErrorCode ec = U_ZERO_ERROR; 401 const int32_t count = uhash_count(customFormatArgStarts); 402 const int32_t rhs_count = uhash_count(that.customFormatArgStarts); 403 if (count != rhs_count) { 404 return FALSE; 405 } 406 int32_t idx = 0, rhs_idx = 0, pos = -1, rhs_pos = -1; 407 for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) { 408 const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos); 409 const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos); 410 if (cur->key.integer != rhs_cur->key.integer) { 411 return FALSE; 412 } 413 const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer); 414 const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer); 415 if (*format != *rhs_format) { 416 return FALSE; 417 } 418 } 419 return TRUE; 420 } 421 422 // ------------------------------------- 423 // Creates a copy of this MessageFormat, the caller owns the copy. 424 425 Format* 426 MessageFormat::clone() const 427 { 428 return new MessageFormat(*this); 429 } 430 431 // ------------------------------------- 432 // Sets the locale of this MessageFormat object to theLocale. 433 434 void 435 MessageFormat::setLocale(const Locale& theLocale) 436 { 437 if (fLocale != theLocale) { 438 delete defaultNumberFormat; 439 defaultNumberFormat = NULL; 440 delete defaultDateFormat; 441 defaultDateFormat = NULL; 442 fLocale = theLocale; 443 setLocaleIDs(fLocale.getName(), fLocale.getName()); 444 pluralProvider.reset(&fLocale); 445 ordinalProvider.reset(&fLocale); 446 } 447 } 448 449 // ------------------------------------- 450 // Gets the locale of this MessageFormat object. 451 452 const Locale& 453 MessageFormat::getLocale() const 454 { 455 return fLocale; 456 } 457 458 void 459 MessageFormat::applyPattern(const UnicodeString& newPattern, 460 UErrorCode& status) 461 { 462 UParseError parseError; 463 applyPattern(newPattern,parseError,status); 464 } 465 466 467 // ------------------------------------- 468 // Applies the new pattern and returns an error if the pattern 469 // is not correct. 470 void 471 MessageFormat::applyPattern(const UnicodeString& pattern, 472 UParseError& parseError, 473 UErrorCode& ec) 474 { 475 if(U_FAILURE(ec)) { 476 return; 477 } 478 msgPattern.parse(pattern, &parseError, ec); 479 cacheExplicitFormats(ec); 480 481 if (U_FAILURE(ec)) { 482 resetPattern(); 483 } 484 } 485 486 void MessageFormat::resetPattern() { 487 msgPattern.clear(); 488 uhash_close(cachedFormatters); 489 cachedFormatters = NULL; 490 uhash_close(customFormatArgStarts); 491 customFormatArgStarts = NULL; 492 argTypeCount = 0; 493 hasArgTypeConflicts = FALSE; 494 } 495 496 void 497 MessageFormat::applyPattern(const UnicodeString& pattern, 498 UMessagePatternApostropheMode aposMode, 499 UParseError* parseError, 500 UErrorCode& status) { 501 if (aposMode != msgPattern.getApostropheMode()) { 502 msgPattern.clearPatternAndSetApostropheMode(aposMode); 503 } 504 applyPattern(pattern, *parseError, status); 505 } 506 507 // ------------------------------------- 508 // Converts this MessageFormat instance to a pattern. 509 510 UnicodeString& 511 MessageFormat::toPattern(UnicodeString& appendTo) const { 512 if ((customFormatArgStarts != NULL && 0 != uhash_count(customFormatArgStarts)) || 513 0 == msgPattern.countParts() 514 ) { 515 appendTo.setToBogus(); 516 return appendTo; 517 } 518 return appendTo.append(msgPattern.getPatternString()); 519 } 520 521 int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex) const { 522 if (partIndex != 0) { 523 partIndex = msgPattern.getLimitPartIndex(partIndex); 524 } 525 for (;;) { 526 UMessagePatternPartType type = msgPattern.getPartType(++partIndex); 527 if (type == UMSGPAT_PART_TYPE_ARG_START) { 528 return partIndex; 529 } 530 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 531 return -1; 532 } 533 } 534 } 535 536 void MessageFormat::setArgStartFormat(int32_t argStart, 537 Format* formatter, 538 UErrorCode& status) { 539 if (U_FAILURE(status)) { 540 delete formatter; 541 } 542 if (cachedFormatters == NULL) { 543 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, 544 equalFormatsForHash, &status); 545 if (U_FAILURE(status)) { 546 delete formatter; 547 return; 548 } 549 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); 550 } 551 if (formatter == NULL) { 552 formatter = new DummyFormat(); 553 } 554 uhash_iput(cachedFormatters, argStart, formatter, &status); 555 } 556 557 558 UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) { 559 const MessagePattern::Part& part = msgPattern.getPart(partIndex); 560 return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ? 561 msgPattern.partSubstringMatches(part, argName) : 562 part.getValue() == argNumber; // ARG_NUMBER 563 } 564 565 // Sets a custom formatter for a MessagePattern ARG_START part index. 566 // "Custom" formatters are provided by the user via setFormat() or similar APIs. 567 void MessageFormat::setCustomArgStartFormat(int32_t argStart, 568 Format* formatter, 569 UErrorCode& status) { 570 setArgStartFormat(argStart, formatter, status); 571 if (customFormatArgStarts == NULL) { 572 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, 573 NULL, &status); 574 } 575 uhash_iputi(customFormatArgStarts, argStart, 1, &status); 576 } 577 578 Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const { 579 if (cachedFormatters == NULL) { 580 return NULL; 581 } 582 void* ptr = uhash_iget(cachedFormatters, argumentNumber); 583 if (ptr != NULL && dynamic_cast<DummyFormat*>((Format*)ptr) == NULL) { 584 return (Format*) ptr; 585 } else { 586 // Not cached, or a DummyFormat representing setFormat(NULL). 587 return NULL; 588 } 589 } 590 591 // ------------------------------------- 592 // Adopts the new formats array and updates the array count. 593 // This MessageFormat instance owns the new formats. 594 void 595 MessageFormat::adoptFormats(Format** newFormats, 596 int32_t count) { 597 if (newFormats == NULL || count < 0) { 598 return; 599 } 600 // Throw away any cached formatters. 601 if (cachedFormatters != NULL) { 602 uhash_removeAll(cachedFormatters); 603 } 604 if (customFormatArgStarts != NULL) { 605 uhash_removeAll(customFormatArgStarts); 606 } 607 608 int32_t formatNumber = 0; 609 UErrorCode status = U_ZERO_ERROR; 610 for (int32_t partIndex = 0; 611 formatNumber < count && U_SUCCESS(status) && 612 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 613 setCustomArgStartFormat(partIndex, newFormats[formatNumber], status); 614 ++formatNumber; 615 } 616 // Delete those that didn't get used (if any). 617 for (; formatNumber < count; ++formatNumber) { 618 delete newFormats[formatNumber]; 619 } 620 621 } 622 623 // ------------------------------------- 624 // Sets the new formats array and updates the array count. 625 // This MessageFormat instance maks a copy of the new formats. 626 627 void 628 MessageFormat::setFormats(const Format** newFormats, 629 int32_t count) { 630 if (newFormats == NULL || count < 0) { 631 return; 632 } 633 // Throw away any cached formatters. 634 if (cachedFormatters != NULL) { 635 uhash_removeAll(cachedFormatters); 636 } 637 if (customFormatArgStarts != NULL) { 638 uhash_removeAll(customFormatArgStarts); 639 } 640 641 UErrorCode status = U_ZERO_ERROR; 642 int32_t formatNumber = 0; 643 for (int32_t partIndex = 0; 644 formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 645 Format* newFormat = NULL; 646 if (newFormats[formatNumber] != NULL) { 647 newFormat = newFormats[formatNumber]->clone(); 648 if (newFormat == NULL) { 649 status = U_MEMORY_ALLOCATION_ERROR; 650 } 651 } 652 setCustomArgStartFormat(partIndex, newFormat, status); 653 ++formatNumber; 654 } 655 if (U_FAILURE(status)) { 656 resetPattern(); 657 } 658 } 659 660 // ------------------------------------- 661 // Adopt a single format by format number. 662 // Do nothing if the format number is not less than the array count. 663 664 void 665 MessageFormat::adoptFormat(int32_t n, Format *newFormat) { 666 LocalPointer<Format> p(newFormat); 667 if (n >= 0) { 668 int32_t formatNumber = 0; 669 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 670 if (n == formatNumber) { 671 UErrorCode status = U_ZERO_ERROR; 672 setCustomArgStartFormat(partIndex, p.orphan(), status); 673 return; 674 } 675 ++formatNumber; 676 } 677 } 678 } 679 680 // ------------------------------------- 681 // Adopt a single format by format name. 682 // Do nothing if there is no match of formatName. 683 void 684 MessageFormat::adoptFormat(const UnicodeString& formatName, 685 Format* formatToAdopt, 686 UErrorCode& status) { 687 LocalPointer<Format> p(formatToAdopt); 688 if (U_FAILURE(status)) { 689 return; 690 } 691 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 692 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 693 status = U_ILLEGAL_ARGUMENT_ERROR; 694 return; 695 } 696 for (int32_t partIndex = 0; 697 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status); 698 ) { 699 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 700 Format* f; 701 if (p.isValid()) { 702 f = p.orphan(); 703 } else if (formatToAdopt == NULL) { 704 f = NULL; 705 } else { 706 f = formatToAdopt->clone(); 707 if (f == NULL) { 708 status = U_MEMORY_ALLOCATION_ERROR; 709 return; 710 } 711 } 712 setCustomArgStartFormat(partIndex, f, status); 713 } 714 } 715 } 716 717 // ------------------------------------- 718 // Set a single format. 719 // Do nothing if the variable is not less than the array count. 720 void 721 MessageFormat::setFormat(int32_t n, const Format& newFormat) { 722 723 if (n >= 0) { 724 int32_t formatNumber = 0; 725 for (int32_t partIndex = 0; 726 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 727 if (n == formatNumber) { 728 Format* new_format = newFormat.clone(); 729 if (new_format) { 730 UErrorCode status = U_ZERO_ERROR; 731 setCustomArgStartFormat(partIndex, new_format, status); 732 } 733 return; 734 } 735 ++formatNumber; 736 } 737 } 738 } 739 740 // ------------------------------------- 741 // Get a single format by format name. 742 // Do nothing if the variable is not less than the array count. 743 Format * 744 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) { 745 if (U_FAILURE(status) || cachedFormatters == NULL) return NULL; 746 747 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 748 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 749 status = U_ILLEGAL_ARGUMENT_ERROR; 750 return NULL; 751 } 752 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 753 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 754 return getCachedFormatter(partIndex); 755 } 756 } 757 return NULL; 758 } 759 760 // ------------------------------------- 761 // Set a single format by format name 762 // Do nothing if the variable is not less than the array count. 763 void 764 MessageFormat::setFormat(const UnicodeString& formatName, 765 const Format& newFormat, 766 UErrorCode& status) { 767 if (U_FAILURE(status)) return; 768 769 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 770 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 771 status = U_ILLEGAL_ARGUMENT_ERROR; 772 return; 773 } 774 for (int32_t partIndex = 0; 775 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status); 776 ) { 777 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 778 if (&newFormat == NULL) { 779 setCustomArgStartFormat(partIndex, NULL, status); 780 } else { 781 Format* new_format = newFormat.clone(); 782 if (new_format == NULL) { 783 status = U_MEMORY_ALLOCATION_ERROR; 784 return; 785 } 786 setCustomArgStartFormat(partIndex, new_format, status); 787 } 788 } 789 } 790 } 791 792 // ------------------------------------- 793 // Gets the format array. 794 const Format** 795 MessageFormat::getFormats(int32_t& cnt) const 796 { 797 // This old API returns an array (which we hold) of Format* 798 // pointers. The array is valid up to the next call to any 799 // method on this object. We construct and resize an array 800 // on demand that contains aliases to the subformats[i].format 801 // pointers. 802 MessageFormat* t = const_cast<MessageFormat*> (this); 803 cnt = 0; 804 if (formatAliases == NULL) { 805 t->formatAliasesCapacity = (argTypeCount<10) ? 10 : argTypeCount; 806 Format** a = (Format**) 807 uprv_malloc(sizeof(Format*) * formatAliasesCapacity); 808 if (a == NULL) { 809 t->formatAliasesCapacity = 0; 810 return NULL; 811 } 812 t->formatAliases = a; 813 } else if (argTypeCount > formatAliasesCapacity) { 814 Format** a = (Format**) 815 uprv_realloc(formatAliases, sizeof(Format*) * argTypeCount); 816 if (a == NULL) { 817 t->formatAliasesCapacity = 0; 818 return NULL; 819 } 820 t->formatAliases = a; 821 t->formatAliasesCapacity = argTypeCount; 822 } 823 824 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 825 t->formatAliases[cnt++] = getCachedFormatter(partIndex); 826 } 827 828 return (const Format**)formatAliases; 829 } 830 831 832 UnicodeString MessageFormat::getArgName(int32_t partIndex) { 833 const MessagePattern::Part& part = msgPattern.getPart(partIndex); 834 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) { 835 return msgPattern.getSubstring(part); 836 } else { 837 UnicodeString temp; 838 return itos(part.getValue(), temp); 839 } 840 } 841 842 StringEnumeration* 843 MessageFormat::getFormatNames(UErrorCode& status) { 844 if (U_FAILURE(status)) return NULL; 845 846 UVector *fFormatNames = new UVector(status); 847 if (U_FAILURE(status)) { 848 status = U_MEMORY_ALLOCATION_ERROR; 849 return NULL; 850 } 851 fFormatNames->setDeleter(uprv_deleteUObject); 852 853 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 854 fFormatNames->addElement(new UnicodeString(getArgName(partIndex + 1)), status); 855 } 856 857 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); 858 return nameEnumerator; 859 } 860 861 // ------------------------------------- 862 // Formats the source Formattable array and copy into the result buffer. 863 // Ignore the FieldPosition result for error checking. 864 865 UnicodeString& 866 MessageFormat::format(const Formattable* source, 867 int32_t cnt, 868 UnicodeString& appendTo, 869 FieldPosition& ignore, 870 UErrorCode& success) const 871 { 872 return format(source, NULL, cnt, appendTo, &ignore, success); 873 } 874 875 // ------------------------------------- 876 // Internally creates a MessageFormat instance based on the 877 // pattern and formats the arguments Formattable array and 878 // copy into the appendTo buffer. 879 880 UnicodeString& 881 MessageFormat::format( const UnicodeString& pattern, 882 const Formattable* arguments, 883 int32_t cnt, 884 UnicodeString& appendTo, 885 UErrorCode& success) 886 { 887 MessageFormat temp(pattern, success); 888 return temp.format(arguments, NULL, cnt, appendTo, NULL, success); 889 } 890 891 // ------------------------------------- 892 // Formats the source Formattable object and copy into the 893 // appendTo buffer. The Formattable object must be an array 894 // of Formattable instances, returns error otherwise. 895 896 UnicodeString& 897 MessageFormat::format(const Formattable& source, 898 UnicodeString& appendTo, 899 FieldPosition& ignore, 900 UErrorCode& success) const 901 { 902 if (U_FAILURE(success)) 903 return appendTo; 904 if (source.getType() != Formattable::kArray) { 905 success = U_ILLEGAL_ARGUMENT_ERROR; 906 return appendTo; 907 } 908 int32_t cnt; 909 const Formattable* tmpPtr = source.getArray(cnt); 910 return format(tmpPtr, NULL, cnt, appendTo, &ignore, success); 911 } 912 913 UnicodeString& 914 MessageFormat::format(const UnicodeString* argumentNames, 915 const Formattable* arguments, 916 int32_t count, 917 UnicodeString& appendTo, 918 UErrorCode& success) const { 919 return format(arguments, argumentNames, count, appendTo, NULL, success); 920 } 921 922 // Does linear search to find the match for an ArgName. 923 const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments, 924 const UnicodeString *argumentNames, 925 int32_t cnt, UnicodeString& name) const { 926 for (int32_t i = 0; i < cnt; ++i) { 927 if (0 == argumentNames[i].compare(name)) { 928 return arguments + i; 929 } 930 } 931 return NULL; 932 } 933 934 935 UnicodeString& 936 MessageFormat::format(const Formattable* arguments, 937 const UnicodeString *argumentNames, 938 int32_t cnt, 939 UnicodeString& appendTo, 940 FieldPosition* pos, 941 UErrorCode& status) const { 942 if (U_FAILURE(status)) { 943 return appendTo; 944 } 945 946 UnicodeStringAppendable usapp(appendTo); 947 AppendableWrapper app(usapp); 948 format(0, 0.0, arguments, argumentNames, cnt, app, pos, status); 949 return appendTo; 950 } 951 952 // if argumentNames is NULL, this means arguments is a numeric array. 953 // arguments can not be NULL. 954 void MessageFormat::format(int32_t msgStart, double pluralNumber, 955 const Formattable* arguments, 956 const UnicodeString *argumentNames, 957 int32_t cnt, 958 AppendableWrapper& appendTo, 959 FieldPosition* ignore, 960 UErrorCode& success) const { 961 if (U_FAILURE(success)) { 962 return; 963 } 964 965 const UnicodeString& msgString = msgPattern.getPatternString(); 966 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); 967 for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) { 968 const MessagePattern::Part* part = &msgPattern.getPart(i); 969 const UMessagePatternPartType type = part->getType(); 970 int32_t index = part->getIndex(); 971 appendTo.append(msgString, prevIndex, index - prevIndex); 972 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 973 return; 974 } 975 prevIndex = part->getLimit(); 976 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 977 const NumberFormat* nf = getDefaultNumberFormat(success); 978 appendTo.formatAndAppend(nf, Formattable(pluralNumber), success); 979 continue; 980 } 981 if (type != UMSGPAT_PART_TYPE_ARG_START) { 982 continue; 983 } 984 int32_t argLimit = msgPattern.getLimitPartIndex(i); 985 UMessagePatternArgType argType = part->getArgType(); 986 part = &msgPattern.getPart(++i); 987 const Formattable* arg; 988 UnicodeString noArg; 989 if (argumentNames == NULL) { 990 int32_t argNumber = part->getValue(); // ARG_NUMBER 991 if (0 <= argNumber && argNumber < cnt) { 992 arg = arguments + argNumber; 993 } else { 994 arg = NULL; 995 noArg.append(LEFT_CURLY_BRACE); 996 itos(argNumber, noArg); 997 noArg.append(RIGHT_CURLY_BRACE); 998 } 999 } else { 1000 UnicodeString key; 1001 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NAME) { 1002 key = msgPattern.getSubstring(*part); 1003 } else /* UMSGPAT_PART_TYPE_ARG_NUMBER */ { 1004 itos(part->getValue(), key); 1005 } 1006 arg = getArgFromListByName(arguments, argumentNames, cnt, key); 1007 if (arg == NULL) { 1008 noArg.append(LEFT_CURLY_BRACE); 1009 noArg.append(key); 1010 noArg.append(RIGHT_CURLY_BRACE); 1011 } 1012 } 1013 ++i; 1014 int32_t prevDestLength = appendTo.length(); 1015 const Format* formatter = NULL; 1016 if (!noArg.isEmpty()) { 1017 appendTo.append(noArg); 1018 } else if (arg == NULL) { 1019 appendTo.append(NULL_STRING, 4); 1020 } else if ((formatter = getCachedFormatter(i -2))) { 1021 // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings. 1022 if (dynamic_cast<const ChoiceFormat*>(formatter) || 1023 dynamic_cast<const PluralFormat*>(formatter) || 1024 dynamic_cast<const SelectFormat*>(formatter)) { 1025 // We only handle nested formats here if they were provided via 1026 // setFormat() or its siblings. Otherwise they are not cached and instead 1027 // handled below according to argType. 1028 UnicodeString subMsgString; 1029 formatter->format(*arg, subMsgString, success); 1030 if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 || 1031 (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern)) 1032 ) { 1033 MessageFormat subMsgFormat(subMsgString, fLocale, success); 1034 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, ignore, success); 1035 } else { 1036 appendTo.append(subMsgString); 1037 } 1038 } else { 1039 appendTo.formatAndAppend(formatter, *arg, success); 1040 } 1041 } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) { 1042 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. 1043 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check 1044 // for the hash table containind DummyFormat. 1045 if (arg->isNumeric()) { 1046 const NumberFormat* nf = getDefaultNumberFormat(success); 1047 appendTo.formatAndAppend(nf, *arg, success); 1048 } else if (arg->getType() == Formattable::kDate) { 1049 const DateFormat* df = getDefaultDateFormat(success); 1050 appendTo.formatAndAppend(df, *arg, success); 1051 } else { 1052 appendTo.append(arg->getString(success)); 1053 } 1054 } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { 1055 if (!arg->isNumeric()) { 1056 success = U_ILLEGAL_ARGUMENT_ERROR; 1057 return; 1058 } 1059 // We must use the Formattable::getDouble() variant with the UErrorCode parameter 1060 // because only this one converts non-double numeric types to double. 1061 const double number = arg->getDouble(success); 1062 int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number); 1063 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames, 1064 cnt, appendTo, success); 1065 } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) { 1066 if (!arg->isNumeric()) { 1067 success = U_ILLEGAL_ARGUMENT_ERROR; 1068 return; 1069 } 1070 const PluralFormat::PluralSelector &selector = 1071 argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider; 1072 // We must use the Formattable::getDouble() variant with the UErrorCode parameter 1073 // because only this one converts non-double numeric types to double. 1074 double number = arg->getDouble(success); 1075 int32_t subMsgStart = PluralFormat::findSubMessage(msgPattern, i, selector, number, 1076 success); 1077 double offset = msgPattern.getPluralOffset(i); 1078 formatComplexSubMessage(subMsgStart, number-offset, arguments, argumentNames, 1079 cnt, appendTo, success); 1080 } else if (argType == UMSGPAT_ARG_TYPE_SELECT) { 1081 int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success); 1082 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames, 1083 cnt, appendTo, success); 1084 } else { 1085 // This should never happen. 1086 success = U_INTERNAL_PROGRAM_ERROR; 1087 return; 1088 } 1089 ignore = updateMetaData(appendTo, prevDestLength, ignore, arg); 1090 prevIndex = msgPattern.getPart(argLimit).getLimit(); 1091 i = argLimit; 1092 } 1093 } 1094 1095 1096 void MessageFormat::formatComplexSubMessage(int32_t msgStart, 1097 double pluralNumber, 1098 const Formattable* arguments, 1099 const UnicodeString *argumentNames, 1100 int32_t cnt, 1101 AppendableWrapper& appendTo, 1102 UErrorCode& success) const { 1103 if (U_FAILURE(success)) { 1104 return; 1105 } 1106 1107 if (!MessageImpl::jdkAposMode(msgPattern)) { 1108 format(msgStart, pluralNumber, arguments, argumentNames, cnt, appendTo, NULL, success); 1109 return; 1110 } 1111 1112 // JDK compatibility mode: (see JDK MessageFormat.format() API docs) 1113 // - remove SKIP_SYNTAX; that is, remove half of the apostrophes 1114 // - if the result string contains an open curly brace '{' then 1115 // instantiate a temporary MessageFormat object and format again; 1116 // otherwise just append the result string 1117 const UnicodeString& msgString = msgPattern.getPatternString(); 1118 UnicodeString sb; 1119 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); 1120 for (int32_t i = msgStart;;) { 1121 const MessagePattern::Part& part = msgPattern.getPart(++i); 1122 const UMessagePatternPartType type = part.getType(); 1123 int32_t index = part.getIndex(); 1124 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 1125 sb.append(msgString, prevIndex, index - prevIndex); 1126 break; 1127 } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 1128 sb.append(msgString, prevIndex, index - prevIndex); 1129 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 1130 const NumberFormat* nf = getDefaultNumberFormat(success); 1131 sb.append(nf->format(pluralNumber, sb, success)); 1132 } 1133 prevIndex = part.getLimit(); 1134 } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 1135 sb.append(msgString, prevIndex, index - prevIndex); 1136 prevIndex = index; 1137 i = msgPattern.getLimitPartIndex(i); 1138 index = msgPattern.getPart(i).getLimit(); 1139 MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb); 1140 prevIndex = index; 1141 } 1142 } 1143 if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) { 1144 UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter. 1145 MessageFormat subMsgFormat(emptyPattern, fLocale, success); 1146 subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success); 1147 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, NULL, success); 1148 } else { 1149 appendTo.append(sb); 1150 } 1151 } 1152 1153 1154 UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const { 1155 const UnicodeString& msgString=msgPattern.getPatternString(); 1156 int32_t prevIndex=msgPattern.getPart(from).getLimit(); 1157 UnicodeString b; 1158 for (int32_t i = from + 1; ; ++i) { 1159 const MessagePattern::Part& part = msgPattern.getPart(i); 1160 const UMessagePatternPartType type=part.getType(); 1161 int32_t index=part.getIndex(); 1162 b.append(msgString, prevIndex, index - prevIndex); 1163 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) { 1164 return b; 1165 } 1166 // Unexpected Part "part" in parsed message. 1167 U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR); 1168 prevIndex=part.getLimit(); 1169 } 1170 } 1171 1172 1173 FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/, 1174 FieldPosition* /*fp*/, const Formattable* /*argId*/) const { 1175 // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing. 1176 return NULL; 1177 /* 1178 if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) { 1179 fp->setBeginIndex(prevLength); 1180 fp->setEndIndex(dest.get_length()); 1181 return NULL; 1182 } 1183 return fp; 1184 */ 1185 } 1186 1187 void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) { 1188 // Deep copy pointer fields. 1189 // We need not copy the formatAliases because they are re-filled 1190 // in each getFormats() call. 1191 // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules 1192 // also get created on demand. 1193 argTypeCount = that.argTypeCount; 1194 if (argTypeCount > 0) { 1195 if (!allocateArgTypes(argTypeCount, ec)) { 1196 return; 1197 } 1198 uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0])); 1199 } 1200 if (cachedFormatters != NULL) { 1201 uhash_removeAll(cachedFormatters); 1202 } 1203 if (customFormatArgStarts != NULL) { 1204 uhash_removeAll(customFormatArgStarts); 1205 } 1206 if (that.cachedFormatters) { 1207 if (cachedFormatters == NULL) { 1208 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, 1209 equalFormatsForHash, &ec); 1210 if (U_FAILURE(ec)) { 1211 return; 1212 } 1213 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); 1214 } 1215 1216 const int32_t count = uhash_count(that.cachedFormatters); 1217 int32_t pos, idx; 1218 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) { 1219 const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos); 1220 Format* newFormat = ((Format*)(cur->value.pointer))->clone(); 1221 if (newFormat) { 1222 uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec); 1223 } else { 1224 ec = U_MEMORY_ALLOCATION_ERROR; 1225 return; 1226 } 1227 } 1228 } 1229 if (that.customFormatArgStarts) { 1230 if (customFormatArgStarts == NULL) { 1231 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, 1232 NULL, &ec); 1233 } 1234 const int32_t count = uhash_count(that.customFormatArgStarts); 1235 int32_t pos, idx; 1236 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) { 1237 const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos); 1238 uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec); 1239 } 1240 } 1241 } 1242 1243 1244 Formattable* 1245 MessageFormat::parse(int32_t msgStart, 1246 const UnicodeString& source, 1247 ParsePosition& pos, 1248 int32_t& count, 1249 UErrorCode& ec) const { 1250 count = 0; 1251 if (U_FAILURE(ec)) { 1252 pos.setErrorIndex(pos.getIndex()); 1253 return NULL; 1254 } 1255 // parse() does not work with named arguments. 1256 if (msgPattern.hasNamedArguments()) { 1257 ec = U_ARGUMENT_TYPE_MISMATCH; 1258 pos.setErrorIndex(pos.getIndex()); 1259 return NULL; 1260 } 1261 LocalArray<Formattable> resultArray(new Formattable[argTypeCount ? argTypeCount : 1]); 1262 const UnicodeString& msgString=msgPattern.getPatternString(); 1263 int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); 1264 int32_t sourceOffset = pos.getIndex(); 1265 ParsePosition tempStatus(0); 1266 1267 for(int32_t i=msgStart+1; ; ++i) { 1268 UBool haveArgResult = FALSE; 1269 const MessagePattern::Part* part=&msgPattern.getPart(i); 1270 const UMessagePatternPartType type=part->getType(); 1271 int32_t index=part->getIndex(); 1272 // Make sure the literal string matches. 1273 int32_t len = index - prevIndex; 1274 if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) { 1275 sourceOffset += len; 1276 prevIndex += len; 1277 } else { 1278 pos.setErrorIndex(sourceOffset); 1279 return NULL; // leave index as is to signal error 1280 } 1281 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { 1282 // Things went well! Done. 1283 pos.setIndex(sourceOffset); 1284 return resultArray.orphan(); 1285 } 1286 if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) { 1287 prevIndex=part->getLimit(); 1288 continue; 1289 } 1290 // We do not support parsing Plural formats. (No REPLACE_NUMBER here.) 1291 // Unexpected Part "part" in parsed message. 1292 U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START); 1293 int32_t argLimit=msgPattern.getLimitPartIndex(i); 1294 1295 UMessagePatternArgType argType=part->getArgType(); 1296 part=&msgPattern.getPart(++i); 1297 int32_t argNumber = part->getValue(); // ARG_NUMBER 1298 UnicodeString key; 1299 ++i; 1300 const Format* formatter = NULL; 1301 Formattable& argResult = resultArray[argNumber]; 1302 1303 if(cachedFormatters!=NULL && (formatter = getCachedFormatter(i - 2))!=NULL) { 1304 // Just parse using the formatter. 1305 tempStatus.setIndex(sourceOffset); 1306 formatter->parseObject(source, argResult, tempStatus); 1307 if (tempStatus.getIndex() == sourceOffset) { 1308 pos.setErrorIndex(sourceOffset); 1309 return NULL; // leave index as is to signal error 1310 } 1311 sourceOffset = tempStatus.getIndex(); 1312 haveArgResult = TRUE; 1313 } else if( 1314 argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) { 1315 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. 1316 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check 1317 // for the hash table containind DummyFormat. 1318 1319 // Match as a string. 1320 // if at end, use longest possible match 1321 // otherwise uses first match to intervening string 1322 // does NOT recursively try all possibilities 1323 UnicodeString stringAfterArgument = getLiteralStringUntilNextArgument(argLimit); 1324 int32_t next; 1325 if (!stringAfterArgument.isEmpty()) { 1326 next = source.indexOf(stringAfterArgument, sourceOffset); 1327 } else { 1328 next = source.length(); 1329 } 1330 if (next < 0) { 1331 pos.setErrorIndex(sourceOffset); 1332 return NULL; // leave index as is to signal error 1333 } else { 1334 UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset)); 1335 UnicodeString compValue; 1336 compValue.append(LEFT_CURLY_BRACE); 1337 itos(argNumber, compValue); 1338 compValue.append(RIGHT_CURLY_BRACE); 1339 if (0 != strValue.compare(compValue)) { 1340 argResult.setString(strValue); 1341 haveArgResult = TRUE; 1342 } 1343 sourceOffset = next; 1344 } 1345 } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { 1346 tempStatus.setIndex(sourceOffset); 1347 double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus); 1348 if (tempStatus.getIndex() == sourceOffset) { 1349 pos.setErrorIndex(sourceOffset); 1350 return NULL; // leave index as is to signal error 1351 } 1352 argResult.setDouble(choiceResult); 1353 haveArgResult = TRUE; 1354 sourceOffset = tempStatus.getIndex(); 1355 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) || argType==UMSGPAT_ARG_TYPE_SELECT) { 1356 // Parsing not supported. 1357 ec = U_UNSUPPORTED_ERROR; 1358 return NULL; 1359 } else { 1360 // This should never happen. 1361 ec = U_INTERNAL_PROGRAM_ERROR; 1362 return NULL; 1363 } 1364 if (haveArgResult && count <= argNumber) { 1365 count = argNumber + 1; 1366 } 1367 prevIndex=msgPattern.getPart(argLimit).getLimit(); 1368 i=argLimit; 1369 } 1370 } 1371 // ------------------------------------- 1372 // Parses the source pattern and returns the Formattable objects array, 1373 // the array count and the ending parse position. The caller of this method 1374 // owns the array. 1375 1376 Formattable* 1377 MessageFormat::parse(const UnicodeString& source, 1378 ParsePosition& pos, 1379 int32_t& count) const { 1380 UErrorCode ec = U_ZERO_ERROR; 1381 return parse(0, source, pos, count, ec); 1382 } 1383 1384 // ------------------------------------- 1385 // Parses the source string and returns the array of 1386 // Formattable objects and the array count. The caller 1387 // owns the returned array. 1388 1389 Formattable* 1390 MessageFormat::parse(const UnicodeString& source, 1391 int32_t& cnt, 1392 UErrorCode& success) const 1393 { 1394 if (msgPattern.hasNamedArguments()) { 1395 success = U_ARGUMENT_TYPE_MISMATCH; 1396 return NULL; 1397 } 1398 ParsePosition status(0); 1399 // Calls the actual implementation method and starts 1400 // from zero offset of the source text. 1401 Formattable* result = parse(source, status, cnt); 1402 if (status.getIndex() == 0) { 1403 success = U_MESSAGE_PARSE_ERROR; 1404 delete[] result; 1405 return NULL; 1406 } 1407 return result; 1408 } 1409 1410 // ------------------------------------- 1411 // Parses the source text and copy into the result buffer. 1412 1413 void 1414 MessageFormat::parseObject( const UnicodeString& source, 1415 Formattable& result, 1416 ParsePosition& status) const 1417 { 1418 int32_t cnt = 0; 1419 Formattable* tmpResult = parse(source, status, cnt); 1420 if (tmpResult != NULL) 1421 result.adoptArray(tmpResult, cnt); 1422 } 1423 1424 UnicodeString 1425 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) { 1426 UnicodeString result; 1427 if (U_SUCCESS(status)) { 1428 int32_t plen = pattern.length(); 1429 const UChar* pat = pattern.getBuffer(); 1430 int32_t blen = plen * 2 + 1; // space for null termination, convenience 1431 UChar* buf = result.getBuffer(blen); 1432 if (buf == NULL) { 1433 status = U_MEMORY_ALLOCATION_ERROR; 1434 } else { 1435 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); 1436 result.releaseBuffer(U_SUCCESS(status) ? len : 0); 1437 } 1438 } 1439 if (U_FAILURE(status)) { 1440 result.setToBogus(); 1441 } 1442 return result; 1443 } 1444 1445 // ------------------------------------- 1446 1447 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) { 1448 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec); 1449 if (fmt == NULL) { 1450 ec = U_MEMORY_ALLOCATION_ERROR; 1451 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) { 1452 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set 1453 fmt->setDefaultRuleSet(defaultRuleSet, localStatus); 1454 } 1455 return fmt; 1456 } 1457 1458 void MessageFormat::cacheExplicitFormats(UErrorCode& status) { 1459 if (U_FAILURE(status)) { 1460 return; 1461 } 1462 1463 if (cachedFormatters != NULL) { 1464 uhash_removeAll(cachedFormatters); 1465 } 1466 if (customFormatArgStarts != NULL) { 1467 uhash_removeAll(customFormatArgStarts); 1468 } 1469 1470 // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT 1471 // which we need not examine. 1472 int32_t limit = msgPattern.countParts() - 2; 1473 argTypeCount = 0; 1474 // We also need not look at the first two "parts" 1475 // (at most MSG_START and ARG_START) in this loop. 1476 // We determine the argTypeCount first so that we can allocateArgTypes 1477 // so that the next loop can set argTypes[argNumber]. 1478 // (This is for the C API which needs the argTypes to read its va_arg list.) 1479 for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) { 1480 const MessagePattern::Part& part = msgPattern.getPart(i); 1481 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { 1482 const int argNumber = part.getValue(); 1483 if (argNumber >= argTypeCount) { 1484 argTypeCount = argNumber + 1; 1485 } 1486 } 1487 } 1488 if (!allocateArgTypes(argTypeCount, status)) { 1489 return; 1490 } 1491 // Set all argTypes to kObject, as a "none" value, for lack of any better value. 1492 // We never use kObject for real arguments. 1493 // We use it as "no argument yet" for the check for hasArgTypeConflicts. 1494 for (int32_t i = 0; i < argTypeCount; ++i) { 1495 argTypes[i] = Formattable::kObject; 1496 } 1497 hasArgTypeConflicts = FALSE; 1498 1499 // This loop starts at part index 1 because we do need to examine 1500 // ARG_START parts. (But we can ignore the MSG_START.) 1501 for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) { 1502 const MessagePattern::Part* part = &msgPattern.getPart(i); 1503 if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) { 1504 continue; 1505 } 1506 UMessagePatternArgType argType = part->getArgType(); 1507 1508 int32_t argNumber = -1; 1509 part = &msgPattern.getPart(i + 1); 1510 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { 1511 argNumber = part->getValue(); 1512 } 1513 Formattable::Type formattableType; 1514 1515 switch (argType) { 1516 case UMSGPAT_ARG_TYPE_NONE: 1517 formattableType = Formattable::kString; 1518 break; 1519 case UMSGPAT_ARG_TYPE_SIMPLE: { 1520 int32_t index = i; 1521 i += 2; 1522 UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++)); 1523 UnicodeString style; 1524 if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { 1525 style = msgPattern.getSubstring(*part); 1526 ++i; 1527 } 1528 UParseError parseError; 1529 Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status); 1530 setArgStartFormat(index, formatter, status); 1531 break; 1532 } 1533 case UMSGPAT_ARG_TYPE_CHOICE: 1534 case UMSGPAT_ARG_TYPE_PLURAL: 1535 case UMSGPAT_ARG_TYPE_SELECTORDINAL: 1536 formattableType = Formattable::kDouble; 1537 break; 1538 case UMSGPAT_ARG_TYPE_SELECT: 1539 formattableType = Formattable::kString; 1540 break; 1541 default: 1542 status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable. 1543 formattableType = Formattable::kString; 1544 break; 1545 } 1546 if (argNumber != -1) { 1547 if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) { 1548 hasArgTypeConflicts = TRUE; 1549 } 1550 argTypes[argNumber] = formattableType; 1551 } 1552 } 1553 } 1554 1555 1556 Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style, 1557 Formattable::Type& formattableType, UParseError& parseError, 1558 UErrorCode& ec) { 1559 if (U_FAILURE(ec)) { 1560 return NULL; 1561 } 1562 Format* fmt = NULL; 1563 int32_t typeID, styleID; 1564 DateFormat::EStyle date_style; 1565 1566 switch (typeID = findKeyword(type, TYPE_IDS)) { 1567 case 0: // number 1568 formattableType = Formattable::kDouble; 1569 switch (findKeyword(style, NUMBER_STYLE_IDS)) { 1570 case 0: // default 1571 fmt = NumberFormat::createInstance(fLocale, ec); 1572 break; 1573 case 1: // currency 1574 fmt = NumberFormat::createCurrencyInstance(fLocale, ec); 1575 break; 1576 case 2: // percent 1577 fmt = NumberFormat::createPercentInstance(fLocale, ec); 1578 break; 1579 case 3: // integer 1580 formattableType = Formattable::kLong; 1581 fmt = createIntegerFormat(fLocale, ec); 1582 break; 1583 default: // pattern 1584 fmt = NumberFormat::createInstance(fLocale, ec); 1585 if (fmt) { 1586 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt); 1587 if (decfmt != NULL) { 1588 decfmt->applyPattern(style,parseError,ec); 1589 } 1590 } 1591 break; 1592 } 1593 break; 1594 1595 case 1: // date 1596 case 2: // time 1597 formattableType = Formattable::kDate; 1598 styleID = findKeyword(style, DATE_STYLE_IDS); 1599 date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; 1600 1601 if (typeID == 1) { 1602 fmt = DateFormat::createDateInstance(date_style, fLocale); 1603 } else { 1604 fmt = DateFormat::createTimeInstance(date_style, fLocale); 1605 } 1606 1607 if (styleID < 0 && fmt != NULL) { 1608 SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt); 1609 if (sdtfmt != NULL) { 1610 sdtfmt->applyPattern(style); 1611 } 1612 } 1613 break; 1614 1615 case 3: // spellout 1616 formattableType = Formattable::kDouble; 1617 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec); 1618 break; 1619 case 4: // ordinal 1620 formattableType = Formattable::kDouble; 1621 fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec); 1622 break; 1623 case 5: // duration 1624 formattableType = Formattable::kDouble; 1625 fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec); 1626 break; 1627 default: 1628 formattableType = Formattable::kString; 1629 ec = U_ILLEGAL_ARGUMENT_ERROR; 1630 break; 1631 } 1632 1633 return fmt; 1634 } 1635 1636 1637 //------------------------------------- 1638 // Finds the string, s, in the string array, list. 1639 int32_t MessageFormat::findKeyword(const UnicodeString& s, 1640 const UChar * const *list) 1641 { 1642 if (s.isEmpty()) { 1643 return 0; // default 1644 } 1645 1646 int32_t length = s.length(); 1647 const UChar *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length); 1648 UnicodeString buffer(FALSE, ps, length); 1649 // Trims the space characters and turns all characters 1650 // in s to lower case. 1651 buffer.toLower(""); 1652 for (int32_t i = 0; list[i]; ++i) { 1653 if (!buffer.compare(list[i], u_strlen(list[i]))) { 1654 return i; 1655 } 1656 } 1657 return -1; 1658 } 1659 1660 /** 1661 * Convenience method that ought to be in NumberFormat 1662 */ 1663 NumberFormat* 1664 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const { 1665 NumberFormat *temp = NumberFormat::createInstance(locale, status); 1666 DecimalFormat *temp2; 1667 if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) { 1668 temp2->setMaximumFractionDigits(0); 1669 temp2->setDecimalSeparatorAlwaysShown(FALSE); 1670 temp2->setParseIntegerOnly(TRUE); 1671 } 1672 1673 return temp; 1674 } 1675 1676 /** 1677 * Return the default number format. Used to format a numeric 1678 * argument when subformats[i].format is NULL. Returns NULL 1679 * on failure. 1680 * 1681 * Semantically const but may modify *this. 1682 */ 1683 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const { 1684 if (defaultNumberFormat == NULL) { 1685 MessageFormat* t = (MessageFormat*) this; 1686 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec); 1687 if (U_FAILURE(ec)) { 1688 delete t->defaultNumberFormat; 1689 t->defaultNumberFormat = NULL; 1690 } else if (t->defaultNumberFormat == NULL) { 1691 ec = U_MEMORY_ALLOCATION_ERROR; 1692 } 1693 } 1694 return defaultNumberFormat; 1695 } 1696 1697 /** 1698 * Return the default date format. Used to format a date 1699 * argument when subformats[i].format is NULL. Returns NULL 1700 * on failure. 1701 * 1702 * Semantically const but may modify *this. 1703 */ 1704 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { 1705 if (defaultDateFormat == NULL) { 1706 MessageFormat* t = (MessageFormat*) this; 1707 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale); 1708 if (t->defaultDateFormat == NULL) { 1709 ec = U_MEMORY_ALLOCATION_ERROR; 1710 } 1711 } 1712 return defaultDateFormat; 1713 } 1714 1715 UBool 1716 MessageFormat::usesNamedArguments() const { 1717 return msgPattern.hasNamedArguments(); 1718 } 1719 1720 int32_t 1721 MessageFormat::getArgTypeCount() const { 1722 return argTypeCount; 1723 } 1724 1725 UBool MessageFormat::equalFormats(const void* left, const void* right) { 1726 return *(const Format*)left==*(const Format*)right; 1727 } 1728 1729 1730 UBool MessageFormat::DummyFormat::operator==(const Format&) const { 1731 return TRUE; 1732 } 1733 1734 Format* MessageFormat::DummyFormat::clone() const { 1735 return new DummyFormat(); 1736 } 1737 1738 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, 1739 UnicodeString& appendTo, 1740 UErrorCode& status) const { 1741 if (U_SUCCESS(status)) { 1742 status = U_UNSUPPORTED_ERROR; 1743 } 1744 return appendTo; 1745 } 1746 1747 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, 1748 UnicodeString& appendTo, 1749 FieldPosition&, 1750 UErrorCode& status) const { 1751 if (U_SUCCESS(status)) { 1752 status = U_UNSUPPORTED_ERROR; 1753 } 1754 return appendTo; 1755 } 1756 1757 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, 1758 UnicodeString& appendTo, 1759 FieldPositionIterator*, 1760 UErrorCode& status) const { 1761 if (U_SUCCESS(status)) { 1762 status = U_UNSUPPORTED_ERROR; 1763 } 1764 return appendTo; 1765 } 1766 1767 void MessageFormat::DummyFormat::parseObject(const UnicodeString&, 1768 Formattable&, 1769 ParsePosition& ) const { 1770 } 1771 1772 1773 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { 1774 pos=0; 1775 fFormatNames = fNameList; 1776 } 1777 1778 const UnicodeString* 1779 FormatNameEnumeration::snext(UErrorCode& status) { 1780 if (U_SUCCESS(status) && pos < fFormatNames->size()) { 1781 return (const UnicodeString*)fFormatNames->elementAt(pos++); 1782 } 1783 return NULL; 1784 } 1785 1786 void 1787 FormatNameEnumeration::reset(UErrorCode& /*status*/) { 1788 pos=0; 1789 } 1790 1791 int32_t 1792 FormatNameEnumeration::count(UErrorCode& /*status*/) const { 1793 return (fFormatNames==NULL) ? 0 : fFormatNames->size(); 1794 } 1795 1796 FormatNameEnumeration::~FormatNameEnumeration() { 1797 delete fFormatNames; 1798 } 1799 1800 1801 MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const Locale* loc, UPluralType t) 1802 : locale(loc), rules(NULL), type(t) { 1803 } 1804 1805 MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() { 1806 // We own the rules but not the locale. 1807 delete rules; 1808 } 1809 1810 UnicodeString MessageFormat::PluralSelectorProvider::select(double number, UErrorCode& ec) const { 1811 if (U_FAILURE(ec)) { 1812 return UnicodeString(FALSE, OTHER_STRING, 5); 1813 } 1814 MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this); 1815 if(rules == NULL) { 1816 t->rules = PluralRules::forLocale(*locale, type, ec); 1817 if (U_FAILURE(ec)) { 1818 return UnicodeString(FALSE, OTHER_STRING, 5); 1819 } 1820 } 1821 return rules->select(number); 1822 } 1823 1824 void MessageFormat::PluralSelectorProvider::reset(const Locale* loc) { 1825 locale = loc; 1826 delete rules; 1827 rules = NULL; 1828 } 1829 1830 1831 U_NAMESPACE_END 1832 1833 #endif /* #if !UCONFIG_NO_FORMATTING */ 1834 1835 //eof 1836