1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************** 6 * 7 * File MSGFMT.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/19/97 aliu Converted from java. 13 * 03/20/97 helena Finished first cut of implementation. 14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. 15 * 06/11/97 helena Fixed addPattern to take the pattern correctly. 16 * 06/17/97 helena Fixed the getPattern to return the correct pattern. 17 * 07/09/97 helena Made ParsePosition into a class. 18 * 02/22/99 stephen Removed character literals for EBCDIC safety 19 * 11/01/09 kirtig Added SelectFormat 20 ********************************************************************/ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_FORMATTING 25 26 #include "unicode/appendable.h" 27 #include "unicode/choicfmt.h" 28 #include "unicode/datefmt.h" 29 #include "unicode/decimfmt.h" 30 #include "unicode/localpointer.h" 31 #include "unicode/msgfmt.h" 32 #include "unicode/plurfmt.h" 33 #include "unicode/rbnf.h" 34 #include "unicode/selfmt.h" 35 #include "unicode/smpdtfmt.h" 36 #include "unicode/umsg.h" 37 #include "unicode/ustring.h" 38 #include "cmemory.h" 39 #include "patternprops.h" 40 #include "messageimpl.h" 41 #include "msgfmt_impl.h" 42 #include "uassert.h" 43 #include "uelement.h" 44 #include "uhash.h" 45 #include "ustrfmt.h" 46 #include "util.h" 47 #include "uvector.h" 48 49 // ***************************************************************************** 50 // class MessageFormat 51 // ***************************************************************************** 52 53 #define SINGLE_QUOTE ((UChar)0x0027) 54 #define COMMA ((UChar)0x002C) 55 #define LEFT_CURLY_BRACE ((UChar)0x007B) 56 #define RIGHT_CURLY_BRACE ((UChar)0x007D) 57 58 //--------------------------------------- 59 // static data 60 61 static const UChar ID_NUMBER[] = { 62 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */ 63 }; 64 static const UChar ID_DATE[] = { 65 0x64, 0x61, 0x74, 0x65, 0 /* "date" */ 66 }; 67 static const UChar ID_TIME[] = { 68 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */ 69 }; 70 static const UChar ID_SPELLOUT[] = { 71 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */ 72 }; 73 static const UChar ID_ORDINAL[] = { 74 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */ 75 }; 76 static const UChar ID_DURATION[] = { 77 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ 78 }; 79 80 // MessageFormat Type List Number, Date, Time or Choice 81 static const UChar * const TYPE_IDS[] = { 82 ID_NUMBER, 83 ID_DATE, 84 ID_TIME, 85 ID_SPELLOUT, 86 ID_ORDINAL, 87 ID_DURATION, 88 NULL, 89 }; 90 91 static const UChar ID_EMPTY[] = { 92 0 /* empty string, used for default so that null can mark end of list */ 93 }; 94 static const UChar ID_CURRENCY[] = { 95 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */ 96 }; 97 static const UChar ID_PERCENT[] = { 98 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */ 99 }; 100 static const UChar ID_INTEGER[] = { 101 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */ 102 }; 103 104 // NumberFormat modifier list, default, currency, percent or integer 105 static const UChar * const NUMBER_STYLE_IDS[] = { 106 ID_EMPTY, 107 ID_CURRENCY, 108 ID_PERCENT, 109 ID_INTEGER, 110 NULL, 111 }; 112 113 static const UChar ID_SHORT[] = { 114 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */ 115 }; 116 static const UChar ID_MEDIUM[] = { 117 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */ 118 }; 119 static const UChar ID_LONG[] = { 120 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */ 121 }; 122 static const UChar ID_FULL[] = { 123 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */ 124 }; 125 126 // DateFormat modifier list, default, short, medium, long or full 127 static const UChar * const DATE_STYLE_IDS[] = { 128 ID_EMPTY, 129 ID_SHORT, 130 ID_MEDIUM, 131 ID_LONG, 132 ID_FULL, 133 NULL, 134 }; 135 136 static const icu::DateFormat::EStyle DATE_STYLES[] = { 137 icu::DateFormat::kDefault, 138 icu::DateFormat::kShort, 139 icu::DateFormat::kMedium, 140 icu::DateFormat::kLong, 141 icu::DateFormat::kFull, 142 }; 143 144 static const int32_t DEFAULT_INITIAL_CAPACITY = 10; 145 146 static const UChar NULL_STRING[] = { 147 0x6E, 0x75, 0x6C, 0x6C, 0 // "null" 148 }; 149 150 static const UChar OTHER_STRING[] = { 151 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 152 }; 153 154 U_CDECL_BEGIN 155 static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1, 156 const UHashTok key2) { 157 return icu::MessageFormat::equalFormats(key1.pointer, key2.pointer); 158 } 159 160 U_CDECL_END 161 162 U_NAMESPACE_BEGIN 163 164 // ------------------------------------- 165 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat) 166 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) 167 168 //-------------------------------------------------------------------- 169 170 /** 171 * Convert an integer value to a string and append the result to 172 * the given UnicodeString. 173 */ 174 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) { 175 UChar temp[16]; 176 uprv_itou(temp,16,i,10,0); // 10 == radix 177 appendTo.append(temp, -1); 178 return appendTo; 179 } 180 181 182 // AppendableWrapper: encapsulates the result of formatting, keeping track 183 // of the string and its length. 184 class AppendableWrapper : public UMemory { 185 public: 186 AppendableWrapper(Appendable& appendable) : app(appendable), len(0) { 187 } 188 void append(const UnicodeString& s) { 189 app.appendString(s.getBuffer(), s.length()); 190 len += s.length(); 191 } 192 void append(const UChar* s, const int32_t sLength) { 193 app.appendString(s, sLength); 194 len += sLength; 195 } 196 void append(const UnicodeString& s, int32_t start, int32_t length) { 197 append(s.tempSubString(start, length)); 198 } 199 void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) { 200 UnicodeString s; 201 formatter->format(arg, s, ec); 202 if (U_SUCCESS(ec)) { 203 append(s); 204 } 205 } 206 int32_t length() { 207 return len; 208 } 209 private: 210 Appendable& app; 211 int32_t len; 212 }; 213 214 215 // ------------------------------------- 216 // Creates a MessageFormat instance based on the pattern. 217 218 MessageFormat::MessageFormat(const UnicodeString& pattern, 219 UErrorCode& success) 220 : fLocale(Locale::getDefault()), // Uses the default locale 221 msgPattern(success), 222 formatAliases(NULL), 223 formatAliasesCapacity(0), 224 argTypes(NULL), 225 argTypeCount(0), 226 argTypeCapacity(0), 227 hasArgTypeConflicts(FALSE), 228 defaultNumberFormat(NULL), 229 defaultDateFormat(NULL), 230 cachedFormatters(NULL), 231 customFormatArgStarts(NULL), 232 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 233 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 234 { 235 setLocaleIDs(fLocale.getName(), fLocale.getName()); 236 applyPattern(pattern, success); 237 } 238 239 MessageFormat::MessageFormat(const UnicodeString& pattern, 240 const Locale& newLocale, 241 UErrorCode& success) 242 : fLocale(newLocale), 243 msgPattern(success), 244 formatAliases(NULL), 245 formatAliasesCapacity(0), 246 argTypes(NULL), 247 argTypeCount(0), 248 argTypeCapacity(0), 249 hasArgTypeConflicts(FALSE), 250 defaultNumberFormat(NULL), 251 defaultDateFormat(NULL), 252 cachedFormatters(NULL), 253 customFormatArgStarts(NULL), 254 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 255 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 256 { 257 setLocaleIDs(fLocale.getName(), fLocale.getName()); 258 applyPattern(pattern, success); 259 } 260 261 MessageFormat::MessageFormat(const UnicodeString& pattern, 262 const Locale& newLocale, 263 UParseError& parseError, 264 UErrorCode& success) 265 : fLocale(newLocale), 266 msgPattern(success), 267 formatAliases(NULL), 268 formatAliasesCapacity(0), 269 argTypes(NULL), 270 argTypeCount(0), 271 argTypeCapacity(0), 272 hasArgTypeConflicts(FALSE), 273 defaultNumberFormat(NULL), 274 defaultDateFormat(NULL), 275 cachedFormatters(NULL), 276 customFormatArgStarts(NULL), 277 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 278 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 279 { 280 setLocaleIDs(fLocale.getName(), fLocale.getName()); 281 applyPattern(pattern, parseError, success); 282 } 283 284 MessageFormat::MessageFormat(const MessageFormat& that) 285 : 286 Format(that), 287 fLocale(that.fLocale), 288 msgPattern(that.msgPattern), 289 formatAliases(NULL), 290 formatAliasesCapacity(0), 291 argTypes(NULL), 292 argTypeCount(0), 293 argTypeCapacity(0), 294 hasArgTypeConflicts(that.hasArgTypeConflicts), 295 defaultNumberFormat(NULL), 296 defaultDateFormat(NULL), 297 cachedFormatters(NULL), 298 customFormatArgStarts(NULL), 299 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL), 300 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL) 301 { 302 // This will take care of creating the hash tables (since they are NULL). 303 UErrorCode ec = U_ZERO_ERROR; 304 copyObjects(that, ec); 305 if (U_FAILURE(ec)) { 306 resetPattern(); 307 } 308 } 309 310 MessageFormat::~MessageFormat() 311 { 312 uhash_close(cachedFormatters); 313 uhash_close(customFormatArgStarts); 314 315 uprv_free(argTypes); 316 uprv_free(formatAliases); 317 delete defaultNumberFormat; 318 delete defaultDateFormat; 319 } 320 321 //-------------------------------------------------------------------- 322 // Variable-size array management 323 324 /** 325 * Allocate argTypes[] to at least the given capacity and return 326 * TRUE if successful. If not, leave argTypes[] unchanged. 327 * 328 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it 329 * if necessary to be at least as large as specified. 330 */ 331 UBool MessageFormat::allocateArgTypes(int32_t capacity, UErrorCode& status) { 332 if (U_FAILURE(status)) { 333 return FALSE; 334 } 335 if (argTypeCapacity >= capacity) { 336 return TRUE; 337 } 338 if (capacity < DEFAULT_INITIAL_CAPACITY) { 339 capacity = DEFAULT_INITIAL_CAPACITY; 340 } else if (capacity < 2*argTypeCapacity) { 341 capacity = 2*argTypeCapacity; 342 } 343 Formattable::Type* a = (Formattable::Type*) 344 uprv_realloc(argTypes, sizeof(*argTypes) * capacity); 345 if (a == NULL) { 346 status = U_MEMORY_ALLOCATION_ERROR; 347 return FALSE; 348 } 349 argTypes = a; 350 argTypeCapacity = capacity; 351 return TRUE; 352 } 353 354 // ------------------------------------- 355 // assignment operator 356 357 const MessageFormat& 358 MessageFormat::operator=(const MessageFormat& that) 359 { 360 if (this != &that) { 361 // Calls the super class for assignment first. 362 Format::operator=(that); 363 364 setLocale(that.fLocale); 365 msgPattern = that.msgPattern; 366 hasArgTypeConflicts = that.hasArgTypeConflicts; 367 368 UErrorCode ec = U_ZERO_ERROR; 369 copyObjects(that, ec); 370 if (U_FAILURE(ec)) { 371 resetPattern(); 372 } 373 } 374 return *this; 375 } 376 377 UBool 378 MessageFormat::operator==(const Format& rhs) const 379 { 380 if (this == &rhs) return TRUE; 381 382 MessageFormat& that = (MessageFormat&)rhs; 383 384 // Check class ID before checking MessageFormat members 385 if (!Format::operator==(rhs) || 386 msgPattern != that.msgPattern || 387 fLocale != that.fLocale) { 388 return FALSE; 389 } 390 391 // Compare hashtables. 392 if ((customFormatArgStarts == NULL) != (that.customFormatArgStarts == NULL)) { 393 return FALSE; 394 } 395 if (customFormatArgStarts == NULL) { 396 return TRUE; 397 } 398 399 UErrorCode ec = U_ZERO_ERROR; 400 const int32_t count = uhash_count(customFormatArgStarts); 401 const int32_t rhs_count = uhash_count(that.customFormatArgStarts); 402 if (count != rhs_count) { 403 return FALSE; 404 } 405 int32_t idx = 0, rhs_idx = 0, pos = -1, rhs_pos = -1; 406 for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) { 407 const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos); 408 const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos); 409 if (cur->key.integer != rhs_cur->key.integer) { 410 return FALSE; 411 } 412 const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer); 413 const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer); 414 if (*format != *rhs_format) { 415 return FALSE; 416 } 417 } 418 return TRUE; 419 } 420 421 // ------------------------------------- 422 // Creates a copy of this MessageFormat, the caller owns the copy. 423 424 Format* 425 MessageFormat::clone() const 426 { 427 return new MessageFormat(*this); 428 } 429 430 // ------------------------------------- 431 // Sets the locale of this MessageFormat object to theLocale. 432 433 void 434 MessageFormat::setLocale(const Locale& theLocale) 435 { 436 if (fLocale != theLocale) { 437 delete defaultNumberFormat; 438 defaultNumberFormat = NULL; 439 delete defaultDateFormat; 440 defaultDateFormat = NULL; 441 fLocale = theLocale; 442 setLocaleIDs(fLocale.getName(), fLocale.getName()); 443 pluralProvider.reset(&fLocale); 444 ordinalProvider.reset(&fLocale); 445 } 446 } 447 448 // ------------------------------------- 449 // Gets the locale of this MessageFormat object. 450 451 const Locale& 452 MessageFormat::getLocale() const 453 { 454 return fLocale; 455 } 456 457 void 458 MessageFormat::applyPattern(const UnicodeString& newPattern, 459 UErrorCode& status) 460 { 461 UParseError parseError; 462 applyPattern(newPattern,parseError,status); 463 } 464 465 466 // ------------------------------------- 467 // Applies the new pattern and returns an error if the pattern 468 // is not correct. 469 void 470 MessageFormat::applyPattern(const UnicodeString& pattern, 471 UParseError& parseError, 472 UErrorCode& ec) 473 { 474 if(U_FAILURE(ec)) { 475 return; 476 } 477 msgPattern.parse(pattern, &parseError, ec); 478 cacheExplicitFormats(ec); 479 480 if (U_FAILURE(ec)) { 481 resetPattern(); 482 } 483 } 484 485 void MessageFormat::resetPattern() { 486 msgPattern.clear(); 487 uhash_close(cachedFormatters); 488 cachedFormatters = NULL; 489 uhash_close(customFormatArgStarts); 490 customFormatArgStarts = NULL; 491 argTypeCount = 0; 492 hasArgTypeConflicts = FALSE; 493 } 494 495 void 496 MessageFormat::applyPattern(const UnicodeString& pattern, 497 UMessagePatternApostropheMode aposMode, 498 UParseError* parseError, 499 UErrorCode& status) { 500 if (aposMode != msgPattern.getApostropheMode()) { 501 msgPattern.clearPatternAndSetApostropheMode(aposMode); 502 } 503 applyPattern(pattern, *parseError, status); 504 } 505 506 // ------------------------------------- 507 // Converts this MessageFormat instance to a pattern. 508 509 UnicodeString& 510 MessageFormat::toPattern(UnicodeString& appendTo) const { 511 if ((customFormatArgStarts != NULL && 0 != uhash_count(customFormatArgStarts)) || 512 0 == msgPattern.countParts() 513 ) { 514 appendTo.setToBogus(); 515 return appendTo; 516 } 517 return appendTo.append(msgPattern.getPatternString()); 518 } 519 520 int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex) const { 521 if (partIndex != 0) { 522 partIndex = msgPattern.getLimitPartIndex(partIndex); 523 } 524 for (;;) { 525 UMessagePatternPartType type = msgPattern.getPartType(++partIndex); 526 if (type == UMSGPAT_PART_TYPE_ARG_START) { 527 return partIndex; 528 } 529 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 530 return -1; 531 } 532 } 533 } 534 535 void MessageFormat::setArgStartFormat(int32_t argStart, 536 Format* formatter, 537 UErrorCode& status) { 538 if (U_FAILURE(status)) { 539 delete formatter; 540 } 541 if (cachedFormatters == NULL) { 542 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, 543 equalFormatsForHash, &status); 544 if (U_FAILURE(status)) { 545 delete formatter; 546 return; 547 } 548 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); 549 } 550 if (formatter == NULL) { 551 formatter = new DummyFormat(); 552 } 553 uhash_iput(cachedFormatters, argStart, formatter, &status); 554 } 555 556 557 UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) { 558 const MessagePattern::Part& part = msgPattern.getPart(partIndex); 559 return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ? 560 msgPattern.partSubstringMatches(part, argName) : 561 part.getValue() == argNumber; // ARG_NUMBER 562 } 563 564 // Sets a custom formatter for a MessagePattern ARG_START part index. 565 // "Custom" formatters are provided by the user via setFormat() or similar APIs. 566 void MessageFormat::setCustomArgStartFormat(int32_t argStart, 567 Format* formatter, 568 UErrorCode& status) { 569 setArgStartFormat(argStart, formatter, status); 570 if (customFormatArgStarts == NULL) { 571 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, 572 NULL, &status); 573 } 574 uhash_iputi(customFormatArgStarts, argStart, 1, &status); 575 } 576 577 Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const { 578 if (cachedFormatters == NULL) { 579 return NULL; 580 } 581 void* ptr = uhash_iget(cachedFormatters, argumentNumber); 582 if (ptr != NULL && dynamic_cast<DummyFormat*>((Format*)ptr) == NULL) { 583 return (Format*) ptr; 584 } else { 585 // Not cached, or a DummyFormat representing setFormat(NULL). 586 return NULL; 587 } 588 } 589 590 // ------------------------------------- 591 // Adopts the new formats array and updates the array count. 592 // This MessageFormat instance owns the new formats. 593 void 594 MessageFormat::adoptFormats(Format** newFormats, 595 int32_t count) { 596 if (newFormats == NULL || count < 0) { 597 return; 598 } 599 // Throw away any cached formatters. 600 if (cachedFormatters != NULL) { 601 uhash_removeAll(cachedFormatters); 602 } 603 if (customFormatArgStarts != NULL) { 604 uhash_removeAll(customFormatArgStarts); 605 } 606 607 int32_t formatNumber = 0; 608 UErrorCode status = U_ZERO_ERROR; 609 for (int32_t partIndex = 0; 610 formatNumber < count && U_SUCCESS(status) && 611 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 612 setCustomArgStartFormat(partIndex, newFormats[formatNumber], status); 613 ++formatNumber; 614 } 615 // Delete those that didn't get used (if any). 616 for (; formatNumber < count; ++formatNumber) { 617 delete newFormats[formatNumber]; 618 } 619 620 } 621 622 // ------------------------------------- 623 // Sets the new formats array and updates the array count. 624 // This MessageFormat instance maks a copy of the new formats. 625 626 void 627 MessageFormat::setFormats(const Format** newFormats, 628 int32_t count) { 629 if (newFormats == NULL || count < 0) { 630 return; 631 } 632 // Throw away any cached formatters. 633 if (cachedFormatters != NULL) { 634 uhash_removeAll(cachedFormatters); 635 } 636 if (customFormatArgStarts != NULL) { 637 uhash_removeAll(customFormatArgStarts); 638 } 639 640 UErrorCode status = U_ZERO_ERROR; 641 int32_t formatNumber = 0; 642 for (int32_t partIndex = 0; 643 formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 644 Format* newFormat = NULL; 645 if (newFormats[formatNumber] != NULL) { 646 newFormat = newFormats[formatNumber]->clone(); 647 if (newFormat == NULL) { 648 status = U_MEMORY_ALLOCATION_ERROR; 649 } 650 } 651 setCustomArgStartFormat(partIndex, newFormat, status); 652 ++formatNumber; 653 } 654 if (U_FAILURE(status)) { 655 resetPattern(); 656 } 657 } 658 659 // ------------------------------------- 660 // Adopt a single format by format number. 661 // Do nothing if the format number is not less than the array count. 662 663 void 664 MessageFormat::adoptFormat(int32_t n, Format *newFormat) { 665 LocalPointer<Format> p(newFormat); 666 if (n >= 0) { 667 int32_t formatNumber = 0; 668 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 669 if (n == formatNumber) { 670 UErrorCode status = U_ZERO_ERROR; 671 setCustomArgStartFormat(partIndex, p.orphan(), status); 672 return; 673 } 674 ++formatNumber; 675 } 676 } 677 } 678 679 // ------------------------------------- 680 // Adopt a single format by format name. 681 // Do nothing if there is no match of formatName. 682 void 683 MessageFormat::adoptFormat(const UnicodeString& formatName, 684 Format* formatToAdopt, 685 UErrorCode& status) { 686 LocalPointer<Format> p(formatToAdopt); 687 if (U_FAILURE(status)) { 688 return; 689 } 690 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 691 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 692 status = U_ILLEGAL_ARGUMENT_ERROR; 693 return; 694 } 695 for (int32_t partIndex = 0; 696 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status); 697 ) { 698 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 699 Format* f; 700 if (p.isValid()) { 701 f = p.orphan(); 702 } else if (formatToAdopt == NULL) { 703 f = NULL; 704 } else { 705 f = formatToAdopt->clone(); 706 if (f == NULL) { 707 status = U_MEMORY_ALLOCATION_ERROR; 708 return; 709 } 710 } 711 setCustomArgStartFormat(partIndex, f, status); 712 } 713 } 714 } 715 716 // ------------------------------------- 717 // Set a single format. 718 // Do nothing if the variable is not less than the array count. 719 void 720 MessageFormat::setFormat(int32_t n, const Format& newFormat) { 721 722 if (n >= 0) { 723 int32_t formatNumber = 0; 724 for (int32_t partIndex = 0; 725 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 726 if (n == formatNumber) { 727 Format* new_format = newFormat.clone(); 728 if (new_format) { 729 UErrorCode status = U_ZERO_ERROR; 730 setCustomArgStartFormat(partIndex, new_format, status); 731 } 732 return; 733 } 734 ++formatNumber; 735 } 736 } 737 } 738 739 // ------------------------------------- 740 // Get a single format by format name. 741 // Do nothing if the variable is not less than the array count. 742 Format * 743 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) { 744 if (U_FAILURE(status) || cachedFormatters == NULL) return NULL; 745 746 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 747 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 748 status = U_ILLEGAL_ARGUMENT_ERROR; 749 return NULL; 750 } 751 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 752 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 753 return getCachedFormatter(partIndex); 754 } 755 } 756 return NULL; 757 } 758 759 // ------------------------------------- 760 // Set a single format by format name 761 // Do nothing if the variable is not less than the array count. 762 void 763 MessageFormat::setFormat(const UnicodeString& formatName, 764 const Format& newFormat, 765 UErrorCode& status) { 766 if (U_FAILURE(status)) return; 767 768 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 769 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 770 status = U_ILLEGAL_ARGUMENT_ERROR; 771 return; 772 } 773 for (int32_t partIndex = 0; 774 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status); 775 ) { 776 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 777 if (&newFormat == NULL) { 778 setCustomArgStartFormat(partIndex, NULL, status); 779 } else { 780 Format* new_format = newFormat.clone(); 781 if (new_format == NULL) { 782 status = U_MEMORY_ALLOCATION_ERROR; 783 return; 784 } 785 setCustomArgStartFormat(partIndex, new_format, status); 786 } 787 } 788 } 789 } 790 791 // ------------------------------------- 792 // Gets the format array. 793 const Format** 794 MessageFormat::getFormats(int32_t& cnt) const 795 { 796 // This old API returns an array (which we hold) of Format* 797 // pointers. The array is valid up to the next call to any 798 // method on this object. We construct and resize an array 799 // on demand that contains aliases to the subformats[i].format 800 // pointers. 801 MessageFormat* t = const_cast<MessageFormat*> (this); 802 cnt = 0; 803 if (formatAliases == NULL) { 804 t->formatAliasesCapacity = (argTypeCount<10) ? 10 : argTypeCount; 805 Format** a = (Format**) 806 uprv_malloc(sizeof(Format*) * formatAliasesCapacity); 807 if (a == NULL) { 808 t->formatAliasesCapacity = 0; 809 return NULL; 810 } 811 t->formatAliases = a; 812 } else if (argTypeCount > formatAliasesCapacity) { 813 Format** a = (Format**) 814 uprv_realloc(formatAliases, sizeof(Format*) * argTypeCount); 815 if (a == NULL) { 816 t->formatAliasesCapacity = 0; 817 return NULL; 818 } 819 t->formatAliases = a; 820 t->formatAliasesCapacity = argTypeCount; 821 } 822 823 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 824 t->formatAliases[cnt++] = getCachedFormatter(partIndex); 825 } 826 827 return (const Format**)formatAliases; 828 } 829 830 831 UnicodeString MessageFormat::getArgName(int32_t partIndex) { 832 const MessagePattern::Part& part = msgPattern.getPart(partIndex); 833 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) { 834 return msgPattern.getSubstring(part); 835 } else { 836 UnicodeString temp; 837 return itos(part.getValue(), temp); 838 } 839 } 840 841 StringEnumeration* 842 MessageFormat::getFormatNames(UErrorCode& status) { 843 if (U_FAILURE(status)) return NULL; 844 845 UVector *fFormatNames = new UVector(status); 846 if (U_FAILURE(status)) { 847 status = U_MEMORY_ALLOCATION_ERROR; 848 return NULL; 849 } 850 fFormatNames->setDeleter(uprv_deleteUObject); 851 852 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 853 fFormatNames->addElement(new UnicodeString(getArgName(partIndex + 1)), status); 854 } 855 856 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); 857 return nameEnumerator; 858 } 859 860 // ------------------------------------- 861 // Formats the source Formattable array and copy into the result buffer. 862 // Ignore the FieldPosition result for error checking. 863 864 UnicodeString& 865 MessageFormat::format(const Formattable* source, 866 int32_t cnt, 867 UnicodeString& appendTo, 868 FieldPosition& ignore, 869 UErrorCode& success) const 870 { 871 return format(source, NULL, cnt, appendTo, &ignore, success); 872 } 873 874 // ------------------------------------- 875 // Internally creates a MessageFormat instance based on the 876 // pattern and formats the arguments Formattable array and 877 // copy into the appendTo buffer. 878 879 UnicodeString& 880 MessageFormat::format( const UnicodeString& pattern, 881 const Formattable* arguments, 882 int32_t cnt, 883 UnicodeString& appendTo, 884 UErrorCode& success) 885 { 886 MessageFormat temp(pattern, success); 887 return temp.format(arguments, NULL, cnt, appendTo, NULL, success); 888 } 889 890 // ------------------------------------- 891 // Formats the source Formattable object and copy into the 892 // appendTo buffer. The Formattable object must be an array 893 // of Formattable instances, returns error otherwise. 894 895 UnicodeString& 896 MessageFormat::format(const Formattable& source, 897 UnicodeString& appendTo, 898 FieldPosition& ignore, 899 UErrorCode& success) const 900 { 901 if (U_FAILURE(success)) 902 return appendTo; 903 if (source.getType() != Formattable::kArray) { 904 success = U_ILLEGAL_ARGUMENT_ERROR; 905 return appendTo; 906 } 907 int32_t cnt; 908 const Formattable* tmpPtr = source.getArray(cnt); 909 return format(tmpPtr, NULL, cnt, appendTo, &ignore, success); 910 } 911 912 UnicodeString& 913 MessageFormat::format(const UnicodeString* argumentNames, 914 const Formattable* arguments, 915 int32_t count, 916 UnicodeString& appendTo, 917 UErrorCode& success) const { 918 return format(arguments, argumentNames, count, appendTo, NULL, success); 919 } 920 921 // Does linear search to find the match for an ArgName. 922 const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments, 923 const UnicodeString *argumentNames, 924 int32_t cnt, UnicodeString& name) const { 925 for (int32_t i = 0; i < cnt; ++i) { 926 if (0 == argumentNames[i].compare(name)) { 927 return arguments + i; 928 } 929 } 930 return NULL; 931 } 932 933 934 UnicodeString& 935 MessageFormat::format(const Formattable* arguments, 936 const UnicodeString *argumentNames, 937 int32_t cnt, 938 UnicodeString& appendTo, 939 FieldPosition* pos, 940 UErrorCode& status) const { 941 if (U_FAILURE(status)) { 942 return appendTo; 943 } 944 945 UnicodeStringAppendable usapp(appendTo); 946 AppendableWrapper app(usapp); 947 format(0, 0.0, arguments, argumentNames, cnt, app, pos, status); 948 return appendTo; 949 } 950 951 // if argumentNames is NULL, this means arguments is a numeric array. 952 // arguments can not be NULL. 953 void MessageFormat::format(int32_t msgStart, double pluralNumber, 954 const Formattable* arguments, 955 const UnicodeString *argumentNames, 956 int32_t cnt, 957 AppendableWrapper& appendTo, 958 FieldPosition* ignore, 959 UErrorCode& success) const { 960 if (U_FAILURE(success)) { 961 return; 962 } 963 964 const UnicodeString& msgString = msgPattern.getPatternString(); 965 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); 966 for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) { 967 const MessagePattern::Part* part = &msgPattern.getPart(i); 968 const UMessagePatternPartType type = part->getType(); 969 int32_t index = part->getIndex(); 970 appendTo.append(msgString, prevIndex, index - prevIndex); 971 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 972 return; 973 } 974 prevIndex = part->getLimit(); 975 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 976 const NumberFormat* nf = getDefaultNumberFormat(success); 977 appendTo.formatAndAppend(nf, Formattable(pluralNumber), success); 978 continue; 979 } 980 if (type != UMSGPAT_PART_TYPE_ARG_START) { 981 continue; 982 } 983 int32_t argLimit = msgPattern.getLimitPartIndex(i); 984 UMessagePatternArgType argType = part->getArgType(); 985 part = &msgPattern.getPart(++i); 986 const Formattable* arg; 987 UnicodeString noArg; 988 if (argumentNames == NULL) { 989 int32_t argNumber = part->getValue(); // ARG_NUMBER 990 if (0 <= argNumber && argNumber < cnt) { 991 arg = arguments + argNumber; 992 } else { 993 arg = NULL; 994 noArg.append(LEFT_CURLY_BRACE); 995 itos(argNumber, noArg); 996 noArg.append(RIGHT_CURLY_BRACE); 997 } 998 } else { 999 UnicodeString key; 1000 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NAME) { 1001 key = msgPattern.getSubstring(*part); 1002 } else /* UMSGPAT_PART_TYPE_ARG_NUMBER */ { 1003 itos(part->getValue(), key); 1004 } 1005 arg = getArgFromListByName(arguments, argumentNames, cnt, key); 1006 if (arg == NULL) { 1007 noArg.append(LEFT_CURLY_BRACE); 1008 noArg.append(key); 1009 noArg.append(RIGHT_CURLY_BRACE); 1010 } 1011 } 1012 ++i; 1013 int32_t prevDestLength = appendTo.length(); 1014 const Format* formatter = NULL; 1015 if (!noArg.isEmpty()) { 1016 appendTo.append(noArg); 1017 } else if (arg == NULL) { 1018 appendTo.append(NULL_STRING, 4); 1019 } else if ((formatter = getCachedFormatter(i -2))) { 1020 // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings. 1021 if (dynamic_cast<const ChoiceFormat*>(formatter) || 1022 dynamic_cast<const PluralFormat*>(formatter) || 1023 dynamic_cast<const SelectFormat*>(formatter)) { 1024 // We only handle nested formats here if they were provided via 1025 // setFormat() or its siblings. Otherwise they are not cached and instead 1026 // handled below according to argType. 1027 UnicodeString subMsgString; 1028 formatter->format(*arg, subMsgString, success); 1029 if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 || 1030 (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern)) 1031 ) { 1032 MessageFormat subMsgFormat(subMsgString, fLocale, success); 1033 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, ignore, success); 1034 } else { 1035 appendTo.append(subMsgString); 1036 } 1037 } else { 1038 appendTo.formatAndAppend(formatter, *arg, success); 1039 } 1040 } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) { 1041 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. 1042 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check 1043 // for the hash table containind DummyFormat. 1044 if (arg->isNumeric()) { 1045 const NumberFormat* nf = getDefaultNumberFormat(success); 1046 appendTo.formatAndAppend(nf, *arg, success); 1047 } else if (arg->getType() == Formattable::kDate) { 1048 const DateFormat* df = getDefaultDateFormat(success); 1049 appendTo.formatAndAppend(df, *arg, success); 1050 } else { 1051 appendTo.append(arg->getString(success)); 1052 } 1053 } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { 1054 if (!arg->isNumeric()) { 1055 success = U_ILLEGAL_ARGUMENT_ERROR; 1056 return; 1057 } 1058 // We must use the Formattable::getDouble() variant with the UErrorCode parameter 1059 // because only this one converts non-double numeric types to double. 1060 const double number = arg->getDouble(success); 1061 int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number); 1062 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames, 1063 cnt, appendTo, success); 1064 } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) { 1065 if (!arg->isNumeric()) { 1066 success = U_ILLEGAL_ARGUMENT_ERROR; 1067 return; 1068 } 1069 const PluralFormat::PluralSelector &selector = 1070 argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider; 1071 // We must use the Formattable::getDouble() variant with the UErrorCode parameter 1072 // because only this one converts non-double numeric types to double. 1073 double number = arg->getDouble(success); 1074 int32_t subMsgStart = PluralFormat::findSubMessage(msgPattern, i, selector, number, 1075 success); 1076 double offset = msgPattern.getPluralOffset(i); 1077 formatComplexSubMessage(subMsgStart, number-offset, arguments, argumentNames, 1078 cnt, appendTo, success); 1079 } else if (argType == UMSGPAT_ARG_TYPE_SELECT) { 1080 int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success); 1081 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames, 1082 cnt, appendTo, success); 1083 } else { 1084 // This should never happen. 1085 success = U_INTERNAL_PROGRAM_ERROR; 1086 return; 1087 } 1088 ignore = updateMetaData(appendTo, prevDestLength, ignore, arg); 1089 prevIndex = msgPattern.getPart(argLimit).getLimit(); 1090 i = argLimit; 1091 } 1092 } 1093 1094 1095 void MessageFormat::formatComplexSubMessage(int32_t msgStart, 1096 double pluralNumber, 1097 const Formattable* arguments, 1098 const UnicodeString *argumentNames, 1099 int32_t cnt, 1100 AppendableWrapper& appendTo, 1101 UErrorCode& success) const { 1102 if (U_FAILURE(success)) { 1103 return; 1104 } 1105 1106 if (!MessageImpl::jdkAposMode(msgPattern)) { 1107 format(msgStart, pluralNumber, arguments, argumentNames, cnt, appendTo, NULL, success); 1108 return; 1109 } 1110 1111 // JDK compatibility mode: (see JDK MessageFormat.format() API docs) 1112 // - remove SKIP_SYNTAX; that is, remove half of the apostrophes 1113 // - if the result string contains an open curly brace '{' then 1114 // instantiate a temporary MessageFormat object and format again; 1115 // otherwise just append the result string 1116 const UnicodeString& msgString = msgPattern.getPatternString(); 1117 UnicodeString sb; 1118 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); 1119 for (int32_t i = msgStart;;) { 1120 const MessagePattern::Part& part = msgPattern.getPart(++i); 1121 const UMessagePatternPartType type = part.getType(); 1122 int32_t index = part.getIndex(); 1123 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 1124 sb.append(msgString, prevIndex, index - prevIndex); 1125 break; 1126 } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 1127 sb.append(msgString, prevIndex, index - prevIndex); 1128 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 1129 const NumberFormat* nf = getDefaultNumberFormat(success); 1130 sb.append(nf->format(pluralNumber, sb, success)); 1131 } 1132 prevIndex = part.getLimit(); 1133 } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 1134 sb.append(msgString, prevIndex, index - prevIndex); 1135 prevIndex = index; 1136 i = msgPattern.getLimitPartIndex(i); 1137 index = msgPattern.getPart(i).getLimit(); 1138 MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb); 1139 prevIndex = index; 1140 } 1141 } 1142 if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) { 1143 UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter. 1144 MessageFormat subMsgFormat(emptyPattern, fLocale, success); 1145 subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success); 1146 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, NULL, success); 1147 } else { 1148 appendTo.append(sb); 1149 } 1150 } 1151 1152 1153 UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const { 1154 const UnicodeString& msgString=msgPattern.getPatternString(); 1155 int32_t prevIndex=msgPattern.getPart(from).getLimit(); 1156 UnicodeString b; 1157 for (int32_t i = from + 1; ; ++i) { 1158 const MessagePattern::Part& part = msgPattern.getPart(i); 1159 const UMessagePatternPartType type=part.getType(); 1160 int32_t index=part.getIndex(); 1161 b.append(msgString, prevIndex, index - prevIndex); 1162 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) { 1163 return b; 1164 } 1165 // Unexpected Part "part" in parsed message. 1166 U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR); 1167 prevIndex=part.getLimit(); 1168 } 1169 } 1170 1171 1172 FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/, 1173 FieldPosition* /*fp*/, const Formattable* /*argId*/) const { 1174 // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing. 1175 return NULL; 1176 /* 1177 if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) { 1178 fp->setBeginIndex(prevLength); 1179 fp->setEndIndex(dest.get_length()); 1180 return NULL; 1181 } 1182 return fp; 1183 */ 1184 } 1185 1186 void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) { 1187 // Deep copy pointer fields. 1188 // We need not copy the formatAliases because they are re-filled 1189 // in each getFormats() call. 1190 // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules 1191 // also get created on demand. 1192 argTypeCount = that.argTypeCount; 1193 if (argTypeCount > 0) { 1194 if (!allocateArgTypes(argTypeCount, ec)) { 1195 return; 1196 } 1197 uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0])); 1198 } 1199 if (cachedFormatters != NULL) { 1200 uhash_removeAll(cachedFormatters); 1201 } 1202 if (customFormatArgStarts != NULL) { 1203 uhash_removeAll(customFormatArgStarts); 1204 } 1205 if (that.cachedFormatters) { 1206 if (cachedFormatters == NULL) { 1207 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, 1208 equalFormatsForHash, &ec); 1209 if (U_FAILURE(ec)) { 1210 return; 1211 } 1212 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); 1213 } 1214 1215 const int32_t count = uhash_count(that.cachedFormatters); 1216 int32_t pos, idx; 1217 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) { 1218 const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos); 1219 Format* newFormat = ((Format*)(cur->value.pointer))->clone(); 1220 if (newFormat) { 1221 uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec); 1222 } else { 1223 ec = U_MEMORY_ALLOCATION_ERROR; 1224 return; 1225 } 1226 } 1227 } 1228 if (that.customFormatArgStarts) { 1229 if (customFormatArgStarts == NULL) { 1230 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, 1231 NULL, &ec); 1232 } 1233 const int32_t count = uhash_count(that.customFormatArgStarts); 1234 int32_t pos, idx; 1235 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) { 1236 const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos); 1237 uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec); 1238 } 1239 } 1240 } 1241 1242 1243 Formattable* 1244 MessageFormat::parse(int32_t msgStart, 1245 const UnicodeString& source, 1246 ParsePosition& pos, 1247 int32_t& count, 1248 UErrorCode& ec) const { 1249 count = 0; 1250 if (U_FAILURE(ec)) { 1251 pos.setErrorIndex(pos.getIndex()); 1252 return NULL; 1253 } 1254 // parse() does not work with named arguments. 1255 if (msgPattern.hasNamedArguments()) { 1256 ec = U_ARGUMENT_TYPE_MISMATCH; 1257 pos.setErrorIndex(pos.getIndex()); 1258 return NULL; 1259 } 1260 LocalArray<Formattable> resultArray(new Formattable[argTypeCount ? argTypeCount : 1]); 1261 const UnicodeString& msgString=msgPattern.getPatternString(); 1262 int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); 1263 int32_t sourceOffset = pos.getIndex(); 1264 ParsePosition tempStatus(0); 1265 1266 for(int32_t i=msgStart+1; ; ++i) { 1267 UBool haveArgResult = FALSE; 1268 const MessagePattern::Part* part=&msgPattern.getPart(i); 1269 const UMessagePatternPartType type=part->getType(); 1270 int32_t index=part->getIndex(); 1271 // Make sure the literal string matches. 1272 int32_t len = index - prevIndex; 1273 if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) { 1274 sourceOffset += len; 1275 prevIndex += len; 1276 } else { 1277 pos.setErrorIndex(sourceOffset); 1278 return NULL; // leave index as is to signal error 1279 } 1280 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { 1281 // Things went well! Done. 1282 pos.setIndex(sourceOffset); 1283 return resultArray.orphan(); 1284 } 1285 if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) { 1286 prevIndex=part->getLimit(); 1287 continue; 1288 } 1289 // We do not support parsing Plural formats. (No REPLACE_NUMBER here.) 1290 // Unexpected Part "part" in parsed message. 1291 U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START); 1292 int32_t argLimit=msgPattern.getLimitPartIndex(i); 1293 1294 UMessagePatternArgType argType=part->getArgType(); 1295 part=&msgPattern.getPart(++i); 1296 int32_t argNumber = part->getValue(); // ARG_NUMBER 1297 UnicodeString key; 1298 ++i; 1299 const Format* formatter = NULL; 1300 Formattable& argResult = resultArray[argNumber]; 1301 1302 if(cachedFormatters!=NULL && (formatter = getCachedFormatter(i - 2))!=NULL) { 1303 // Just parse using the formatter. 1304 tempStatus.setIndex(sourceOffset); 1305 formatter->parseObject(source, argResult, tempStatus); 1306 if (tempStatus.getIndex() == sourceOffset) { 1307 pos.setErrorIndex(sourceOffset); 1308 return NULL; // leave index as is to signal error 1309 } 1310 sourceOffset = tempStatus.getIndex(); 1311 haveArgResult = TRUE; 1312 } else if( 1313 argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) { 1314 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. 1315 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check 1316 // for the hash table containind DummyFormat. 1317 1318 // Match as a string. 1319 // if at end, use longest possible match 1320 // otherwise uses first match to intervening string 1321 // does NOT recursively try all possibilities 1322 UnicodeString stringAfterArgument = getLiteralStringUntilNextArgument(argLimit); 1323 int32_t next; 1324 if (!stringAfterArgument.isEmpty()) { 1325 next = source.indexOf(stringAfterArgument, sourceOffset); 1326 } else { 1327 next = source.length(); 1328 } 1329 if (next < 0) { 1330 pos.setErrorIndex(sourceOffset); 1331 return NULL; // leave index as is to signal error 1332 } else { 1333 UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset)); 1334 UnicodeString compValue; 1335 compValue.append(LEFT_CURLY_BRACE); 1336 itos(argNumber, compValue); 1337 compValue.append(RIGHT_CURLY_BRACE); 1338 if (0 != strValue.compare(compValue)) { 1339 argResult.setString(strValue); 1340 haveArgResult = TRUE; 1341 } 1342 sourceOffset = next; 1343 } 1344 } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { 1345 tempStatus.setIndex(sourceOffset); 1346 double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus); 1347 if (tempStatus.getIndex() == sourceOffset) { 1348 pos.setErrorIndex(sourceOffset); 1349 return NULL; // leave index as is to signal error 1350 } 1351 argResult.setDouble(choiceResult); 1352 haveArgResult = TRUE; 1353 sourceOffset = tempStatus.getIndex(); 1354 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) || argType==UMSGPAT_ARG_TYPE_SELECT) { 1355 // Parsing not supported. 1356 ec = U_UNSUPPORTED_ERROR; 1357 return NULL; 1358 } else { 1359 // This should never happen. 1360 ec = U_INTERNAL_PROGRAM_ERROR; 1361 return NULL; 1362 } 1363 if (haveArgResult && count <= argNumber) { 1364 count = argNumber + 1; 1365 } 1366 prevIndex=msgPattern.getPart(argLimit).getLimit(); 1367 i=argLimit; 1368 } 1369 } 1370 // ------------------------------------- 1371 // Parses the source pattern and returns the Formattable objects array, 1372 // the array count and the ending parse position. The caller of this method 1373 // owns the array. 1374 1375 Formattable* 1376 MessageFormat::parse(const UnicodeString& source, 1377 ParsePosition& pos, 1378 int32_t& count) const { 1379 UErrorCode ec = U_ZERO_ERROR; 1380 return parse(0, source, pos, count, ec); 1381 } 1382 1383 // ------------------------------------- 1384 // Parses the source string and returns the array of 1385 // Formattable objects and the array count. The caller 1386 // owns the returned array. 1387 1388 Formattable* 1389 MessageFormat::parse(const UnicodeString& source, 1390 int32_t& cnt, 1391 UErrorCode& success) const 1392 { 1393 if (msgPattern.hasNamedArguments()) { 1394 success = U_ARGUMENT_TYPE_MISMATCH; 1395 return NULL; 1396 } 1397 ParsePosition status(0); 1398 // Calls the actual implementation method and starts 1399 // from zero offset of the source text. 1400 Formattable* result = parse(source, status, cnt); 1401 if (status.getIndex() == 0) { 1402 success = U_MESSAGE_PARSE_ERROR; 1403 delete[] result; 1404 return NULL; 1405 } 1406 return result; 1407 } 1408 1409 // ------------------------------------- 1410 // Parses the source text and copy into the result buffer. 1411 1412 void 1413 MessageFormat::parseObject( const UnicodeString& source, 1414 Formattable& result, 1415 ParsePosition& status) const 1416 { 1417 int32_t cnt = 0; 1418 Formattable* tmpResult = parse(source, status, cnt); 1419 if (tmpResult != NULL) 1420 result.adoptArray(tmpResult, cnt); 1421 } 1422 1423 UnicodeString 1424 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) { 1425 UnicodeString result; 1426 if (U_SUCCESS(status)) { 1427 int32_t plen = pattern.length(); 1428 const UChar* pat = pattern.getBuffer(); 1429 int32_t blen = plen * 2 + 1; // space for null termination, convenience 1430 UChar* buf = result.getBuffer(blen); 1431 if (buf == NULL) { 1432 status = U_MEMORY_ALLOCATION_ERROR; 1433 } else { 1434 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); 1435 result.releaseBuffer(U_SUCCESS(status) ? len : 0); 1436 } 1437 } 1438 if (U_FAILURE(status)) { 1439 result.setToBogus(); 1440 } 1441 return result; 1442 } 1443 1444 // ------------------------------------- 1445 1446 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) { 1447 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec); 1448 if (fmt == NULL) { 1449 ec = U_MEMORY_ALLOCATION_ERROR; 1450 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) { 1451 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set 1452 fmt->setDefaultRuleSet(defaultRuleSet, localStatus); 1453 } 1454 return fmt; 1455 } 1456 1457 void MessageFormat::cacheExplicitFormats(UErrorCode& status) { 1458 if (U_FAILURE(status)) { 1459 return; 1460 } 1461 1462 if (cachedFormatters != NULL) { 1463 uhash_removeAll(cachedFormatters); 1464 } 1465 if (customFormatArgStarts != NULL) { 1466 uhash_removeAll(customFormatArgStarts); 1467 } 1468 1469 // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT 1470 // which we need not examine. 1471 int32_t limit = msgPattern.countParts() - 2; 1472 argTypeCount = 0; 1473 // We also need not look at the first two "parts" 1474 // (at most MSG_START and ARG_START) in this loop. 1475 // We determine the argTypeCount first so that we can allocateArgTypes 1476 // so that the next loop can set argTypes[argNumber]. 1477 // (This is for the C API which needs the argTypes to read its va_arg list.) 1478 for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) { 1479 const MessagePattern::Part& part = msgPattern.getPart(i); 1480 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { 1481 const int argNumber = part.getValue(); 1482 if (argNumber >= argTypeCount) { 1483 argTypeCount = argNumber + 1; 1484 } 1485 } 1486 } 1487 if (!allocateArgTypes(argTypeCount, status)) { 1488 return; 1489 } 1490 // Set all argTypes to kObject, as a "none" value, for lack of any better value. 1491 // We never use kObject for real arguments. 1492 // We use it as "no argument yet" for the check for hasArgTypeConflicts. 1493 for (int32_t i = 0; i < argTypeCount; ++i) { 1494 argTypes[i] = Formattable::kObject; 1495 } 1496 hasArgTypeConflicts = FALSE; 1497 1498 // This loop starts at part index 1 because we do need to examine 1499 // ARG_START parts. (But we can ignore the MSG_START.) 1500 for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) { 1501 const MessagePattern::Part* part = &msgPattern.getPart(i); 1502 if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) { 1503 continue; 1504 } 1505 UMessagePatternArgType argType = part->getArgType(); 1506 1507 int32_t argNumber = -1; 1508 part = &msgPattern.getPart(i + 1); 1509 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { 1510 argNumber = part->getValue(); 1511 } 1512 Formattable::Type formattableType; 1513 1514 switch (argType) { 1515 case UMSGPAT_ARG_TYPE_NONE: 1516 formattableType = Formattable::kString; 1517 break; 1518 case UMSGPAT_ARG_TYPE_SIMPLE: { 1519 int32_t index = i; 1520 i += 2; 1521 UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++)); 1522 UnicodeString style; 1523 if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { 1524 style = msgPattern.getSubstring(*part); 1525 ++i; 1526 } 1527 UParseError parseError; 1528 Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status); 1529 setArgStartFormat(index, formatter, status); 1530 break; 1531 } 1532 case UMSGPAT_ARG_TYPE_CHOICE: 1533 case UMSGPAT_ARG_TYPE_PLURAL: 1534 case UMSGPAT_ARG_TYPE_SELECTORDINAL: 1535 formattableType = Formattable::kDouble; 1536 break; 1537 case UMSGPAT_ARG_TYPE_SELECT: 1538 formattableType = Formattable::kString; 1539 break; 1540 default: 1541 status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable. 1542 formattableType = Formattable::kString; 1543 break; 1544 } 1545 if (argNumber != -1) { 1546 if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) { 1547 hasArgTypeConflicts = TRUE; 1548 } 1549 argTypes[argNumber] = formattableType; 1550 } 1551 } 1552 } 1553 1554 1555 Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style, 1556 Formattable::Type& formattableType, UParseError& parseError, 1557 UErrorCode& ec) { 1558 if (U_FAILURE(ec)) { 1559 return NULL; 1560 } 1561 Format* fmt = NULL; 1562 int32_t typeID, styleID; 1563 DateFormat::EStyle date_style; 1564 1565 switch (typeID = findKeyword(type, TYPE_IDS)) { 1566 case 0: // number 1567 formattableType = Formattable::kDouble; 1568 switch (findKeyword(style, NUMBER_STYLE_IDS)) { 1569 case 0: // default 1570 fmt = NumberFormat::createInstance(fLocale, ec); 1571 break; 1572 case 1: // currency 1573 fmt = NumberFormat::createCurrencyInstance(fLocale, ec); 1574 break; 1575 case 2: // percent 1576 fmt = NumberFormat::createPercentInstance(fLocale, ec); 1577 break; 1578 case 3: // integer 1579 formattableType = Formattable::kLong; 1580 fmt = createIntegerFormat(fLocale, ec); 1581 break; 1582 default: // pattern 1583 fmt = NumberFormat::createInstance(fLocale, ec); 1584 if (fmt) { 1585 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt); 1586 if (decfmt != NULL) { 1587 decfmt->applyPattern(style,parseError,ec); 1588 } 1589 } 1590 break; 1591 } 1592 break; 1593 1594 case 1: // date 1595 case 2: // time 1596 formattableType = Formattable::kDate; 1597 styleID = findKeyword(style, DATE_STYLE_IDS); 1598 date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; 1599 1600 if (typeID == 1) { 1601 fmt = DateFormat::createDateInstance(date_style, fLocale); 1602 } else { 1603 fmt = DateFormat::createTimeInstance(date_style, fLocale); 1604 } 1605 1606 if (styleID < 0 && fmt != NULL) { 1607 SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt); 1608 if (sdtfmt != NULL) { 1609 sdtfmt->applyPattern(style); 1610 } 1611 } 1612 break; 1613 1614 case 3: // spellout 1615 formattableType = Formattable::kDouble; 1616 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec); 1617 break; 1618 case 4: // ordinal 1619 formattableType = Formattable::kDouble; 1620 fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec); 1621 break; 1622 case 5: // duration 1623 formattableType = Formattable::kDouble; 1624 fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec); 1625 break; 1626 default: 1627 formattableType = Formattable::kString; 1628 ec = U_ILLEGAL_ARGUMENT_ERROR; 1629 break; 1630 } 1631 1632 return fmt; 1633 } 1634 1635 1636 //------------------------------------- 1637 // Finds the string, s, in the string array, list. 1638 int32_t MessageFormat::findKeyword(const UnicodeString& s, 1639 const UChar * const *list) 1640 { 1641 if (s.isEmpty()) { 1642 return 0; // default 1643 } 1644 1645 int32_t length = s.length(); 1646 const UChar *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length); 1647 UnicodeString buffer(FALSE, ps, length); 1648 // Trims the space characters and turns all characters 1649 // in s to lower case. 1650 buffer.toLower(""); 1651 for (int32_t i = 0; list[i]; ++i) { 1652 if (!buffer.compare(list[i], u_strlen(list[i]))) { 1653 return i; 1654 } 1655 } 1656 return -1; 1657 } 1658 1659 /** 1660 * Convenience method that ought to be in NumberFormat 1661 */ 1662 NumberFormat* 1663 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const { 1664 NumberFormat *temp = NumberFormat::createInstance(locale, status); 1665 DecimalFormat *temp2; 1666 if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) { 1667 temp2->setMaximumFractionDigits(0); 1668 temp2->setDecimalSeparatorAlwaysShown(FALSE); 1669 temp2->setParseIntegerOnly(TRUE); 1670 } 1671 1672 return temp; 1673 } 1674 1675 /** 1676 * Return the default number format. Used to format a numeric 1677 * argument when subformats[i].format is NULL. Returns NULL 1678 * on failure. 1679 * 1680 * Semantically const but may modify *this. 1681 */ 1682 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const { 1683 if (defaultNumberFormat == NULL) { 1684 MessageFormat* t = (MessageFormat*) this; 1685 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec); 1686 if (U_FAILURE(ec)) { 1687 delete t->defaultNumberFormat; 1688 t->defaultNumberFormat = NULL; 1689 } else if (t->defaultNumberFormat == NULL) { 1690 ec = U_MEMORY_ALLOCATION_ERROR; 1691 } 1692 } 1693 return defaultNumberFormat; 1694 } 1695 1696 /** 1697 * Return the default date format. Used to format a date 1698 * argument when subformats[i].format is NULL. Returns NULL 1699 * on failure. 1700 * 1701 * Semantically const but may modify *this. 1702 */ 1703 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { 1704 if (defaultDateFormat == NULL) { 1705 MessageFormat* t = (MessageFormat*) this; 1706 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale); 1707 if (t->defaultDateFormat == NULL) { 1708 ec = U_MEMORY_ALLOCATION_ERROR; 1709 } 1710 } 1711 return defaultDateFormat; 1712 } 1713 1714 UBool 1715 MessageFormat::usesNamedArguments() const { 1716 return msgPattern.hasNamedArguments(); 1717 } 1718 1719 int32_t 1720 MessageFormat::getArgTypeCount() const { 1721 return argTypeCount; 1722 } 1723 1724 UBool MessageFormat::equalFormats(const void* left, const void* right) { 1725 return *(const Format*)left==*(const Format*)right; 1726 } 1727 1728 1729 UBool MessageFormat::DummyFormat::operator==(const Format&) const { 1730 return TRUE; 1731 } 1732 1733 Format* MessageFormat::DummyFormat::clone() const { 1734 return new DummyFormat(); 1735 } 1736 1737 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, 1738 UnicodeString& appendTo, 1739 UErrorCode& status) const { 1740 if (U_SUCCESS(status)) { 1741 status = U_UNSUPPORTED_ERROR; 1742 } 1743 return appendTo; 1744 } 1745 1746 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, 1747 UnicodeString& appendTo, 1748 FieldPosition&, 1749 UErrorCode& status) const { 1750 if (U_SUCCESS(status)) { 1751 status = U_UNSUPPORTED_ERROR; 1752 } 1753 return appendTo; 1754 } 1755 1756 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, 1757 UnicodeString& appendTo, 1758 FieldPositionIterator*, 1759 UErrorCode& status) const { 1760 if (U_SUCCESS(status)) { 1761 status = U_UNSUPPORTED_ERROR; 1762 } 1763 return appendTo; 1764 } 1765 1766 void MessageFormat::DummyFormat::parseObject(const UnicodeString&, 1767 Formattable&, 1768 ParsePosition& ) const { 1769 } 1770 1771 1772 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { 1773 pos=0; 1774 fFormatNames = fNameList; 1775 } 1776 1777 const UnicodeString* 1778 FormatNameEnumeration::snext(UErrorCode& status) { 1779 if (U_SUCCESS(status) && pos < fFormatNames->size()) { 1780 return (const UnicodeString*)fFormatNames->elementAt(pos++); 1781 } 1782 return NULL; 1783 } 1784 1785 void 1786 FormatNameEnumeration::reset(UErrorCode& /*status*/) { 1787 pos=0; 1788 } 1789 1790 int32_t 1791 FormatNameEnumeration::count(UErrorCode& /*status*/) const { 1792 return (fFormatNames==NULL) ? 0 : fFormatNames->size(); 1793 } 1794 1795 FormatNameEnumeration::~FormatNameEnumeration() { 1796 delete fFormatNames; 1797 } 1798 1799 1800 MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const Locale* loc, UPluralType t) 1801 : locale(loc), rules(NULL), type(t) { 1802 } 1803 1804 MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() { 1805 // We own the rules but not the locale. 1806 delete rules; 1807 } 1808 1809 UnicodeString MessageFormat::PluralSelectorProvider::select(double number, UErrorCode& ec) const { 1810 if (U_FAILURE(ec)) { 1811 return UnicodeString(FALSE, OTHER_STRING, 5); 1812 } 1813 MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this); 1814 if(rules == NULL) { 1815 t->rules = PluralRules::forLocale(*locale, type, ec); 1816 if (U_FAILURE(ec)) { 1817 return UnicodeString(FALSE, OTHER_STRING, 5); 1818 } 1819 } 1820 return rules->select(number); 1821 } 1822 1823 void MessageFormat::PluralSelectorProvider::reset(const Locale* loc) { 1824 locale = loc; 1825 delete rules; 1826 rules = NULL; 1827 } 1828 1829 1830 U_NAMESPACE_END 1831 1832 #endif /* #if !UCONFIG_NO_FORMATTING */ 1833 1834 //eof 1835