1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2008, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 * File MSGFMT.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/19/97 aliu Converted from java. 13 * 03/20/97 helena Finished first cut of implementation. 14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. 15 * 06/11/97 helena Fixed addPattern to take the pattern correctly. 16 * 06/17/97 helena Fixed the getPattern to return the correct pattern. 17 * 07/09/97 helena Made ParsePosition into a class. 18 * 02/22/99 stephen Removed character literals for EBCDIC safety 19 ******************************************************************************** 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_FORMATTING 25 26 #include "unicode/msgfmt.h" 27 #include "unicode/decimfmt.h" 28 #include "unicode/datefmt.h" 29 #include "unicode/smpdtfmt.h" 30 #include "unicode/choicfmt.h" 31 #include "unicode/plurfmt.h" 32 #include "unicode/ustring.h" 33 #include "unicode/ucnv_err.h" 34 #include "unicode/uchar.h" 35 #include "unicode/umsg.h" 36 #include "unicode/rbnf.h" 37 #include "cmemory.h" 38 #include "msgfmt_impl.h" 39 #include "../common/util.h" 40 #include "uassert.h" 41 #include "ustrfmt.h" 42 #include "uvector.h" 43 44 // ***************************************************************************** 45 // class MessageFormat 46 // ***************************************************************************** 47 48 #define COMMA ((UChar)0x002C) 49 #define SINGLE_QUOTE ((UChar)0x0027) 50 #define LEFT_CURLY_BRACE ((UChar)0x007B) 51 #define RIGHT_CURLY_BRACE ((UChar)0x007D) 52 53 //--------------------------------------- 54 // static data 55 56 static const UChar ID_EMPTY[] = { 57 0 /* empty string, used for default so that null can mark end of list */ 58 }; 59 60 static const UChar ID_NUMBER[] = { 61 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */ 62 }; 63 static const UChar ID_DATE[] = { 64 0x64, 0x61, 0x74, 0x65, 0 /* "date" */ 65 }; 66 static const UChar ID_TIME[] = { 67 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */ 68 }; 69 static const UChar ID_CHOICE[] = { 70 0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */ 71 }; 72 static const UChar ID_SPELLOUT[] = { 73 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */ 74 }; 75 static const UChar ID_ORDINAL[] = { 76 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */ 77 }; 78 static const UChar ID_DURATION[] = { 79 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ 80 }; 81 static const UChar ID_PLURAL[] = { 82 0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0 /* "plural" */ 83 }; 84 85 // MessageFormat Type List Number, Date, Time or Choice 86 static const UChar * const TYPE_IDS[] = { 87 ID_EMPTY, 88 ID_NUMBER, 89 ID_DATE, 90 ID_TIME, 91 ID_CHOICE, 92 ID_SPELLOUT, 93 ID_ORDINAL, 94 ID_DURATION, 95 ID_PLURAL, 96 NULL, 97 }; 98 99 static const UChar ID_CURRENCY[] = { 100 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */ 101 }; 102 static const UChar ID_PERCENT[] = { 103 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */ 104 }; 105 static const UChar ID_INTEGER[] = { 106 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */ 107 }; 108 109 // NumberFormat modifier list, default, currency, percent or integer 110 static const UChar * const NUMBER_STYLE_IDS[] = { 111 ID_EMPTY, 112 ID_CURRENCY, 113 ID_PERCENT, 114 ID_INTEGER, 115 NULL, 116 }; 117 118 static const UChar ID_SHORT[] = { 119 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */ 120 }; 121 static const UChar ID_MEDIUM[] = { 122 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */ 123 }; 124 static const UChar ID_LONG[] = { 125 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */ 126 }; 127 static const UChar ID_FULL[] = { 128 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */ 129 }; 130 131 // DateFormat modifier list, default, short, medium, long or full 132 static const UChar * const DATE_STYLE_IDS[] = { 133 ID_EMPTY, 134 ID_SHORT, 135 ID_MEDIUM, 136 ID_LONG, 137 ID_FULL, 138 NULL, 139 }; 140 141 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = { 142 U_NAMESPACE_QUALIFIER DateFormat::kDefault, 143 U_NAMESPACE_QUALIFIER DateFormat::kShort, 144 U_NAMESPACE_QUALIFIER DateFormat::kMedium, 145 U_NAMESPACE_QUALIFIER DateFormat::kLong, 146 U_NAMESPACE_QUALIFIER DateFormat::kFull, 147 }; 148 149 static const int32_t DEFAULT_INITIAL_CAPACITY = 10; 150 151 U_NAMESPACE_BEGIN 152 153 // ------------------------------------- 154 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat) 155 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) 156 157 //-------------------------------------------------------------------- 158 159 /** 160 * Convert a string to an unsigned decimal, ignoring rule whitespace. 161 * @return a non-negative number if successful, or a negative number 162 * upon failure. 163 */ 164 static int32_t stou(const UnicodeString& string) { 165 int32_t n = 0; 166 int32_t count = 0; 167 UChar32 c; 168 for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) { 169 c = string.char32At(i); 170 if (uprv_isRuleWhiteSpace(c)) { 171 continue; 172 } 173 int32_t d = u_digit(c, 10); 174 if (d < 0 || ++count > 10) { 175 return -1; 176 } 177 n = 10*n + d; 178 } 179 return n; 180 } 181 182 /** 183 * Convert an integer value to a string and append the result to 184 * the given UnicodeString. 185 */ 186 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) { 187 UChar temp[16]; 188 uprv_itou(temp,16,i,10,0); // 10 == radix 189 appendTo.append(temp); 190 return appendTo; 191 } 192 193 /* 194 * A structure representing one subformat of this MessageFormat. 195 * Each subformat has a Format object, an offset into the plain 196 * pattern text fPattern, and an argument number. The argument 197 * number corresponds to the array of arguments to be formatted. 198 * @internal 199 */ 200 class MessageFormat::Subformat : public UMemory { 201 public: 202 /** 203 * @internal 204 */ 205 Format* format; // formatter 206 /** 207 * @internal 208 */ 209 int32_t offset; // offset into fPattern 210 /** 211 * @internal 212 */ 213 // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number 214 int32_t argNum; // 0-based argument number 215 /** 216 * @internal 217 */ 218 UnicodeString* argName; // argument name or number 219 220 /** 221 * Clone that.format and assign it to this.format 222 * Do NOT delete this.format 223 * @internal 224 */ 225 Subformat& operator=(const Subformat& that) { 226 if (this != &that) { 227 format = that.format ? that.format->clone() : NULL; 228 offset = that.offset; 229 argNum = that.argNum; 230 argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL; 231 } 232 return *this; 233 } 234 235 /** 236 * @internal 237 */ 238 UBool operator==(const Subformat& that) const { 239 // Do cheap comparisons first 240 return offset == that.offset && 241 argNum == that.argNum && 242 ((argName == that.argName) || 243 (*argName == *that.argName)) && 244 ((format == that.format) || // handles NULL 245 (*format == *that.format)); 246 } 247 248 /** 249 * @internal 250 */ 251 UBool operator!=(const Subformat& that) const { 252 return !operator==(that); 253 } 254 }; 255 256 // ------------------------------------- 257 // Creates a MessageFormat instance based on the pattern. 258 259 MessageFormat::MessageFormat(const UnicodeString& pattern, 260 UErrorCode& success) 261 : fLocale(Locale::getDefault()), // Uses the default locale 262 formatAliases(NULL), 263 formatAliasesCapacity(0), 264 idStart(UCHAR_ID_START), 265 idContinue(UCHAR_ID_CONTINUE), 266 subformats(NULL), 267 subformatCount(0), 268 subformatCapacity(0), 269 argTypes(NULL), 270 argTypeCount(0), 271 argTypeCapacity(0), 272 isArgNumeric(TRUE), 273 defaultNumberFormat(NULL), 274 defaultDateFormat(NULL) 275 { 276 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 277 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 278 success = U_MEMORY_ALLOCATION_ERROR; 279 return; 280 } 281 applyPattern(pattern, success); 282 setLocaleIDs(fLocale.getName(), fLocale.getName()); 283 } 284 285 MessageFormat::MessageFormat(const UnicodeString& pattern, 286 const Locale& newLocale, 287 UErrorCode& success) 288 : fLocale(newLocale), 289 formatAliases(NULL), 290 formatAliasesCapacity(0), 291 idStart(UCHAR_ID_START), 292 idContinue(UCHAR_ID_CONTINUE), 293 subformats(NULL), 294 subformatCount(0), 295 subformatCapacity(0), 296 argTypes(NULL), 297 argTypeCount(0), 298 argTypeCapacity(0), 299 isArgNumeric(TRUE), 300 defaultNumberFormat(NULL), 301 defaultDateFormat(NULL) 302 { 303 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 304 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 305 success = U_MEMORY_ALLOCATION_ERROR; 306 return; 307 } 308 applyPattern(pattern, success); 309 setLocaleIDs(fLocale.getName(), fLocale.getName()); 310 } 311 312 MessageFormat::MessageFormat(const UnicodeString& pattern, 313 const Locale& newLocale, 314 UParseError& parseError, 315 UErrorCode& success) 316 : fLocale(newLocale), 317 formatAliases(NULL), 318 formatAliasesCapacity(0), 319 idStart(UCHAR_ID_START), 320 idContinue(UCHAR_ID_CONTINUE), 321 subformats(NULL), 322 subformatCount(0), 323 subformatCapacity(0), 324 argTypes(NULL), 325 argTypeCount(0), 326 argTypeCapacity(0), 327 isArgNumeric(TRUE), 328 defaultNumberFormat(NULL), 329 defaultDateFormat(NULL) 330 { 331 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 332 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 333 success = U_MEMORY_ALLOCATION_ERROR; 334 return; 335 } 336 applyPattern(pattern, parseError, success); 337 setLocaleIDs(fLocale.getName(), fLocale.getName()); 338 } 339 340 MessageFormat::MessageFormat(const MessageFormat& that) 341 : Format(that), 342 formatAliases(NULL), 343 formatAliasesCapacity(0), 344 idStart(UCHAR_ID_START), 345 idContinue(UCHAR_ID_CONTINUE), 346 subformats(NULL), 347 subformatCount(0), 348 subformatCapacity(0), 349 argTypes(NULL), 350 argTypeCount(0), 351 argTypeCapacity(0), 352 isArgNumeric(TRUE), 353 defaultNumberFormat(NULL), 354 defaultDateFormat(NULL) 355 { 356 *this = that; 357 } 358 359 MessageFormat::~MessageFormat() 360 { 361 int32_t idx; 362 for (idx = 0; idx < subformatCount; idx++) { 363 delete subformats[idx].format; 364 delete subformats[idx].argName; 365 } 366 uprv_free(subformats); 367 subformats = NULL; 368 subformatCount = subformatCapacity = 0; 369 370 uprv_free(argTypes); 371 argTypes = NULL; 372 argTypeCount = argTypeCapacity = 0; 373 374 uprv_free(formatAliases); 375 376 delete defaultNumberFormat; 377 delete defaultDateFormat; 378 } 379 380 //-------------------------------------------------------------------- 381 // Variable-size array management 382 383 /** 384 * Allocate subformats[] to at least the given capacity and return 385 * TRUE if successful. If not, leave subformats[] unchanged. 386 * 387 * If subformats is NULL, allocate it. If it is not NULL, enlarge it 388 * if necessary to be at least as large as specified. 389 */ 390 UBool MessageFormat::allocateSubformats(int32_t capacity) { 391 if (subformats == NULL) { 392 subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity); 393 subformatCapacity = capacity; 394 subformatCount = 0; 395 if (subformats == NULL) { 396 subformatCapacity = 0; 397 return FALSE; 398 } 399 } else if (subformatCapacity < capacity) { 400 if (capacity < 2*subformatCapacity) { 401 capacity = 2*subformatCapacity; 402 } 403 Subformat* a = (Subformat*) 404 uprv_realloc(subformats, sizeof(*subformats) * capacity); 405 if (a == NULL) { 406 return FALSE; // request failed 407 } 408 subformats = a; 409 subformatCapacity = capacity; 410 } 411 return TRUE; 412 } 413 414 /** 415 * Allocate argTypes[] to at least the given capacity and return 416 * TRUE if successful. If not, leave argTypes[] unchanged. 417 * 418 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it 419 * if necessary to be at least as large as specified. 420 */ 421 UBool MessageFormat::allocateArgTypes(int32_t capacity) { 422 if (argTypes == NULL) { 423 argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity); 424 argTypeCount = 0; 425 argTypeCapacity = capacity; 426 if (argTypes == NULL) { 427 argTypeCapacity = 0; 428 return FALSE; 429 } 430 for (int32_t i=0; i<capacity; ++i) { 431 argTypes[i] = Formattable::kString; 432 } 433 } else if (argTypeCapacity < capacity) { 434 if (capacity < 2*argTypeCapacity) { 435 capacity = 2*argTypeCapacity; 436 } 437 Formattable::Type* a = (Formattable::Type*) 438 uprv_realloc(argTypes, sizeof(*argTypes) * capacity); 439 if (a == NULL) { 440 return FALSE; // request failed 441 } 442 for (int32_t i=argTypeCapacity; i<capacity; ++i) { 443 a[i] = Formattable::kString; 444 } 445 argTypes = a; 446 argTypeCapacity = capacity; 447 } 448 return TRUE; 449 } 450 451 // ------------------------------------- 452 // assignment operator 453 454 const MessageFormat& 455 MessageFormat::operator=(const MessageFormat& that) 456 { 457 // Reallocate the arrays BEFORE changing this object 458 if (this != &that && 459 allocateSubformats(that.subformatCount) && 460 allocateArgTypes(that.argTypeCount)) { 461 462 // Calls the super class for assignment first. 463 Format::operator=(that); 464 465 fPattern = that.fPattern; 466 setLocale(that.fLocale); 467 isArgNumeric = that.isArgNumeric; 468 int32_t j; 469 for (j=0; j<subformatCount; ++j) { 470 delete subformats[j].format; 471 } 472 subformatCount = 0; 473 474 for (j=0; j<that.subformatCount; ++j) { 475 // Subformat::operator= does NOT delete this.format 476 subformats[j] = that.subformats[j]; 477 } 478 subformatCount = that.subformatCount; 479 480 for (j=0; j<that.argTypeCount; ++j) { 481 argTypes[j] = that.argTypes[j]; 482 } 483 argTypeCount = that.argTypeCount; 484 } 485 return *this; 486 } 487 488 UBool 489 MessageFormat::operator==(const Format& rhs) const 490 { 491 if (this == &rhs) return TRUE; 492 493 MessageFormat& that = (MessageFormat&)rhs; 494 495 // Check class ID before checking MessageFormat members 496 if (!Format::operator==(rhs) || 497 fPattern != that.fPattern || 498 fLocale != that.fLocale || 499 isArgNumeric != that.isArgNumeric) { 500 return FALSE; 501 } 502 503 int32_t j; 504 for (j=0; j<subformatCount; ++j) { 505 if (subformats[j] != that.subformats[j]) { 506 return FALSE; 507 } 508 } 509 510 return TRUE; 511 } 512 513 // ------------------------------------- 514 // Creates a copy of this MessageFormat, the caller owns the copy. 515 516 Format* 517 MessageFormat::clone() const 518 { 519 return new MessageFormat(*this); 520 } 521 522 // ------------------------------------- 523 // Sets the locale of this MessageFormat object to theLocale. 524 525 void 526 MessageFormat::setLocale(const Locale& theLocale) 527 { 528 if (fLocale != theLocale) { 529 delete defaultNumberFormat; 530 defaultNumberFormat = NULL; 531 delete defaultDateFormat; 532 defaultDateFormat = NULL; 533 } 534 fLocale = theLocale; 535 setLocaleIDs(fLocale.getName(), fLocale.getName()); 536 } 537 538 // ------------------------------------- 539 // Gets the locale of this MessageFormat object. 540 541 const Locale& 542 MessageFormat::getLocale() const 543 { 544 return fLocale; 545 } 546 547 548 549 550 void 551 MessageFormat::applyPattern(const UnicodeString& newPattern, 552 UErrorCode& status) 553 { 554 UParseError parseError; 555 applyPattern(newPattern,parseError,status); 556 } 557 558 559 // ------------------------------------- 560 // Applies the new pattern and returns an error if the pattern 561 // is not correct. 562 void 563 MessageFormat::applyPattern(const UnicodeString& pattern, 564 UParseError& parseError, 565 UErrorCode& ec) 566 { 567 if(U_FAILURE(ec)) { 568 return; 569 } 570 // The pattern is broken up into segments. Each time a subformat 571 // is encountered, 4 segments are recorded. For example, consider 572 // the pattern: 573 // "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}." 574 // The first set of segments is: 575 // segments[0] = "There " 576 // segments[1] = "0" 577 // segments[2] = "choice" 578 // segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files" 579 580 // During parsing, the plain text is accumulated into segments[0]. 581 // Segments 1..3 are used to parse each subpattern. Each time a 582 // subpattern is parsed, it creates a format object that is stored 583 // in the subformats array, together with an offset and argument 584 // number. The offset into the plain text stored in 585 // segments[0]. 586 587 // Quotes in segment 0 are handled normally. They are removed. 588 // Quotes may not occur in segments 1 or 2. 589 // Quotes in segment 3 are parsed and _copied_. This makes 590 // subformat patterns work, e.g., {1,number,'#'.##} passes 591 // the pattern "'#'.##" to DecimalFormat. 592 593 UnicodeString segments[4]; 594 int32_t part = 0; // segment we are in, 0..3 595 // Record the highest argument number in the pattern. (In the 596 // subpattern {3,number} the argument number is 3.) 597 int32_t formatNumber = 0; 598 UBool inQuote = FALSE; 599 int32_t braceStack = 0; 600 // Clear error struct 601 parseError.offset = -1; 602 parseError.preContext[0] = parseError.postContext[0] = (UChar)0; 603 int32_t patLen = pattern.length(); 604 int32_t i; 605 606 for (i=0; i<subformatCount; ++i) { 607 delete subformats[i].format; 608 } 609 subformatCount = 0; 610 argTypeCount = 0; 611 612 for (i=0; i<patLen; ++i) { 613 UChar ch = pattern[i]; 614 if (part == 0) { 615 // In segment 0, recognize and remove quotes 616 if (ch == SINGLE_QUOTE) { 617 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) { 618 segments[0] += ch; 619 ++i; 620 } else { 621 inQuote = !inQuote; 622 } 623 } else if (ch == LEFT_CURLY_BRACE && !inQuote) { 624 // The only way we get from segment 0 to 1 is via an 625 // unquoted '{'. 626 part = 1; 627 } else { 628 segments[0] += ch; 629 } 630 } else if (inQuote) { 631 // In segments 1..3, recognize quoted matter, and copy it 632 // into the segment, together with the quotes. This takes 633 // care of '' as well. 634 segments[part] += ch; 635 if (ch == SINGLE_QUOTE) { 636 inQuote = FALSE; 637 } 638 } else { 639 // We have an unquoted character in segment 1..3 640 switch (ch) { 641 case COMMA: 642 // Commas bump us to the next segment, except for segment 3, 643 // which can contain commas. See example above. 644 if (part < 3) 645 part += 1; 646 else 647 segments[3] += ch; 648 break; 649 case LEFT_CURLY_BRACE: 650 // Handle '{' within segment 3. The initial '{' 651 // before segment 1 is handled above. 652 if (part != 3) { 653 ec = U_PATTERN_SYNTAX_ERROR; 654 goto SYNTAX_ERROR; 655 } 656 ++braceStack; 657 segments[part] += ch; 658 break; 659 case RIGHT_CURLY_BRACE: 660 if (braceStack == 0) { 661 makeFormat(formatNumber, segments, parseError,ec); 662 if (U_FAILURE(ec)){ 663 goto SYNTAX_ERROR; 664 } 665 formatNumber++; 666 segments[1].remove(); 667 segments[2].remove(); 668 segments[3].remove(); 669 part = 0; 670 } else { 671 --braceStack; 672 segments[part] += ch; 673 } 674 break; 675 case SINGLE_QUOTE: 676 inQuote = TRUE; 677 // fall through (copy quote chars in segments 1..3) 678 default: 679 segments[part] += ch; 680 break; 681 } 682 } 683 } 684 if (braceStack != 0 || part != 0) { 685 // Unmatched braces in the pattern 686 ec = U_UNMATCHED_BRACES; 687 goto SYNTAX_ERROR; 688 } 689 fPattern = segments[0]; 690 return; 691 692 SYNTAX_ERROR: 693 syntaxError(pattern, i, parseError); 694 for (i=0; i<subformatCount; ++i) { 695 delete subformats[i].format; 696 } 697 argTypeCount = subformatCount = 0; 698 } 699 // ------------------------------------- 700 // Converts this MessageFormat instance to a pattern. 701 702 UnicodeString& 703 MessageFormat::toPattern(UnicodeString& appendTo) const { 704 // later, make this more extensible 705 int32_t lastOffset = 0; 706 int32_t i; 707 for (i=0; i<subformatCount; ++i) { 708 copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo); 709 lastOffset = subformats[i].offset; 710 appendTo += LEFT_CURLY_BRACE; 711 if (isArgNumeric) { 712 itos(subformats[i].argNum, appendTo); 713 } 714 else { 715 appendTo += *subformats[i].argName; 716 } 717 Format* fmt = subformats[i].format; 718 if (fmt == NULL) { 719 // do nothing, string format 720 } 721 else if (fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) { 722 723 UErrorCode ec = U_ZERO_ERROR; 724 NumberFormat& formatAlias = *(NumberFormat*)fmt; 725 NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec); 726 NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec); 727 NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec); 728 NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec); 729 730 appendTo += COMMA; 731 appendTo += ID_NUMBER; 732 if (formatAlias != *defaultTemplate) { 733 appendTo += COMMA; 734 if (formatAlias == *currencyTemplate) { 735 appendTo += ID_CURRENCY; 736 } 737 else if (formatAlias == *percentTemplate) { 738 appendTo += ID_PERCENT; 739 } 740 else if (formatAlias == *integerTemplate) { 741 appendTo += ID_INTEGER; 742 } 743 else { 744 UnicodeString buffer; 745 appendTo += ((DecimalFormat*)fmt)->toPattern(buffer); 746 } 747 } 748 749 delete defaultTemplate; 750 delete currencyTemplate; 751 delete percentTemplate; 752 delete integerTemplate; 753 } 754 else if (fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) { 755 DateFormat& formatAlias = *(DateFormat*)fmt; 756 DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale); 757 DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale); 758 DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale); 759 DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale); 760 DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale); 761 DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale); 762 DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale); 763 DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale); 764 765 766 appendTo += COMMA; 767 if (formatAlias == *defaultDateTemplate) { 768 appendTo += ID_DATE; 769 } 770 else if (formatAlias == *shortDateTemplate) { 771 appendTo += ID_DATE; 772 appendTo += COMMA; 773 appendTo += ID_SHORT; 774 } 775 else if (formatAlias == *defaultDateTemplate) { 776 appendTo += ID_DATE; 777 appendTo += COMMA; 778 appendTo += ID_MEDIUM; 779 } 780 else if (formatAlias == *longDateTemplate) { 781 appendTo += ID_DATE; 782 appendTo += COMMA; 783 appendTo += ID_LONG; 784 } 785 else if (formatAlias == *fullDateTemplate) { 786 appendTo += ID_DATE; 787 appendTo += COMMA; 788 appendTo += ID_FULL; 789 } 790 else if (formatAlias == *defaultTimeTemplate) { 791 appendTo += ID_TIME; 792 } 793 else if (formatAlias == *shortTimeTemplate) { 794 appendTo += ID_TIME; 795 appendTo += COMMA; 796 appendTo += ID_SHORT; 797 } 798 else if (formatAlias == *defaultTimeTemplate) { 799 appendTo += ID_TIME; 800 appendTo += COMMA; 801 appendTo += ID_MEDIUM; 802 } 803 else if (formatAlias == *longTimeTemplate) { 804 appendTo += ID_TIME; 805 appendTo += COMMA; 806 appendTo += ID_LONG; 807 } 808 else if (formatAlias == *fullTimeTemplate) { 809 appendTo += ID_TIME; 810 appendTo += COMMA; 811 appendTo += ID_FULL; 812 } 813 else { 814 UnicodeString buffer; 815 appendTo += ID_DATE; 816 appendTo += COMMA; 817 appendTo += ((SimpleDateFormat*)fmt)->toPattern(buffer); 818 } 819 820 delete defaultDateTemplate; 821 delete shortDateTemplate; 822 delete longDateTemplate; 823 delete fullDateTemplate; 824 delete defaultTimeTemplate; 825 delete shortTimeTemplate; 826 delete longTimeTemplate; 827 delete fullTimeTemplate; 828 // {sfb} there should be a more efficient way to do this! 829 } 830 else if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID()) { 831 UnicodeString buffer; 832 appendTo += COMMA; 833 appendTo += ID_CHOICE; 834 appendTo += COMMA; 835 appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer); 836 } 837 else if (fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) { 838 UnicodeString buffer; 839 appendTo += ((PluralFormat*)fmt)->toPattern(buffer); 840 } 841 else { 842 //appendTo += ", unknown"; 843 } 844 appendTo += RIGHT_CURLY_BRACE; 845 } 846 copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo); 847 return appendTo; 848 } 849 850 // ------------------------------------- 851 // Adopts the new formats array and updates the array count. 852 // This MessageFormat instance owns the new formats. 853 854 void 855 MessageFormat::adoptFormats(Format** newFormats, 856 int32_t count) { 857 if (newFormats == NULL || count < 0) { 858 return; 859 } 860 861 int32_t i; 862 if (allocateSubformats(count)) { 863 for (i=0; i<subformatCount; ++i) { 864 delete subformats[i].format; 865 } 866 for (i=0; i<count; ++i) { 867 subformats[i].format = newFormats[i]; 868 } 869 subformatCount = count; 870 } else { 871 // An adopt method must always take ownership. Delete 872 // the incoming format objects and return unchanged. 873 for (i=0; i<count; ++i) { 874 delete newFormats[i]; 875 } 876 } 877 878 // TODO: What about the .offset and .argNum fields? 879 } 880 881 // ------------------------------------- 882 // Sets the new formats array and updates the array count. 883 // This MessageFormat instance maks a copy of the new formats. 884 885 void 886 MessageFormat::setFormats(const Format** newFormats, 887 int32_t count) { 888 if (newFormats == NULL || count < 0) { 889 return; 890 } 891 892 if (allocateSubformats(count)) { 893 int32_t i; 894 for (i=0; i<subformatCount; ++i) { 895 delete subformats[i].format; 896 } 897 subformatCount = 0; 898 899 for (i=0; i<count; ++i) { 900 subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL; 901 } 902 subformatCount = count; 903 } 904 905 // TODO: What about the .offset and .arg fields? 906 } 907 908 // ------------------------------------- 909 // Adopt a single format by format number. 910 // Do nothing if the format number is not less than the array count. 911 912 void 913 MessageFormat::adoptFormat(int32_t n, Format *newFormat) { 914 if (n < 0 || n >= subformatCount) { 915 delete newFormat; 916 } else { 917 delete subformats[n].format; 918 subformats[n].format = newFormat; 919 } 920 } 921 922 // ------------------------------------- 923 // Adopt a single format by format name. 924 // Do nothing if there is no match of formatName. 925 void 926 MessageFormat::adoptFormat(const UnicodeString& formatName, 927 Format* formatToAdopt, 928 UErrorCode& status) { 929 if (isArgNumeric ) { 930 int32_t argumentNumber = stou(formatName); 931 if (argumentNumber<0) { 932 status = U_ARGUMENT_TYPE_MISMATCH; 933 return; 934 } 935 adoptFormat(argumentNumber, formatToAdopt); 936 return; 937 } 938 for (int32_t i=0; i<subformatCount; ++i) { 939 if (formatName==*subformats[i].argName) { 940 delete subformats[i].format; 941 if ( formatToAdopt== NULL) { 942 // This should never happen -- but we'll be nice if it does 943 subformats[i].format = NULL; 944 } else { 945 subformats[i].format = formatToAdopt; 946 } 947 } 948 } 949 } 950 951 // ------------------------------------- 952 // Set a single format. 953 // Do nothing if the variable is not less than the array count. 954 955 void 956 MessageFormat::setFormat(int32_t n, const Format& newFormat) { 957 if (n >= 0 && n < subformatCount) { 958 delete subformats[n].format; 959 if (&newFormat == NULL) { 960 // This should never happen -- but we'll be nice if it does 961 subformats[n].format = NULL; 962 } else { 963 subformats[n].format = newFormat.clone(); 964 } 965 } 966 } 967 968 // ------------------------------------- 969 // Get a single format by format name. 970 // Do nothing if the variable is not less than the array count. 971 Format * 972 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) { 973 974 if (U_FAILURE(status)) return NULL; 975 976 if (isArgNumeric ) { 977 int32_t argumentNumber = stou(formatName); 978 if (argumentNumber<0) { 979 status = U_ARGUMENT_TYPE_MISMATCH; 980 return NULL; 981 } 982 if (argumentNumber < 0 || argumentNumber >= subformatCount) { 983 return subformats[argumentNumber].format; 984 } 985 else { 986 return NULL; 987 } 988 } 989 990 for (int32_t i=0; i<subformatCount; ++i) { 991 if (formatName==*subformats[i].argName) 992 { 993 return subformats[i].format; 994 } 995 } 996 return NULL; 997 } 998 999 // ------------------------------------- 1000 // Set a single format by format name 1001 // Do nothing if the variable is not less than the array count. 1002 void 1003 MessageFormat::setFormat(const UnicodeString& formatName, 1004 const Format& newFormat, 1005 UErrorCode& status) { 1006 if (isArgNumeric) { 1007 status = U_ARGUMENT_TYPE_MISMATCH; 1008 return; 1009 } 1010 for (int32_t i=0; i<subformatCount; ++i) { 1011 if (formatName==*subformats[i].argName) 1012 { 1013 delete subformats[i].format; 1014 if (&newFormat == NULL) { 1015 // This should never happen -- but we'll be nice if it does 1016 subformats[i].format = NULL; 1017 } else { 1018 subformats[i].format = newFormat.clone(); 1019 } 1020 break; 1021 } 1022 } 1023 } 1024 1025 // ------------------------------------- 1026 // Gets the format array. 1027 1028 const Format** 1029 MessageFormat::getFormats(int32_t& cnt) const 1030 { 1031 // This old API returns an array (which we hold) of Format* 1032 // pointers. The array is valid up to the next call to any 1033 // method on this object. We construct and resize an array 1034 // on demand that contains aliases to the subformats[i].format 1035 // pointers. 1036 MessageFormat* t = (MessageFormat*) this; 1037 cnt = 0; 1038 if (formatAliases == NULL) { 1039 t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount; 1040 Format** a = (Format**) 1041 uprv_malloc(sizeof(Format*) * formatAliasesCapacity); 1042 if (a == NULL) { 1043 return NULL; 1044 } 1045 t->formatAliases = a; 1046 } else if (subformatCount > formatAliasesCapacity) { 1047 Format** a = (Format**) 1048 uprv_realloc(formatAliases, sizeof(Format*) * subformatCount); 1049 if (a == NULL) { 1050 return NULL; 1051 } 1052 t->formatAliases = a; 1053 t->formatAliasesCapacity = subformatCount; 1054 } 1055 for (int32_t i=0; i<subformatCount; ++i) { 1056 t->formatAliases[i] = subformats[i].format; 1057 } 1058 cnt = subformatCount; 1059 return (const Format**)formatAliases; 1060 } 1061 1062 1063 StringEnumeration* 1064 MessageFormat::getFormatNames(UErrorCode& status) { 1065 if (U_FAILURE(status)) return NULL; 1066 1067 if (isArgNumeric) { 1068 status = U_ARGUMENT_TYPE_MISMATCH; 1069 return NULL; 1070 } 1071 UVector *fFormatNames = new UVector(status); 1072 if (U_FAILURE(status)) { 1073 status = U_MEMORY_ALLOCATION_ERROR; 1074 return NULL; 1075 } 1076 for (int32_t i=0; i<subformatCount; ++i) { 1077 fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status); 1078 } 1079 1080 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); 1081 return nameEnumerator; 1082 } 1083 1084 // ------------------------------------- 1085 // Formats the source Formattable array and copy into the result buffer. 1086 // Ignore the FieldPosition result for error checking. 1087 1088 UnicodeString& 1089 MessageFormat::format(const Formattable* source, 1090 int32_t cnt, 1091 UnicodeString& appendTo, 1092 FieldPosition& ignore, 1093 UErrorCode& success) const 1094 { 1095 if (U_FAILURE(success)) 1096 return appendTo; 1097 1098 return format(source, cnt, appendTo, ignore, 0, success); 1099 } 1100 1101 // ------------------------------------- 1102 // Internally creates a MessageFormat instance based on the 1103 // pattern and formats the arguments Formattable array and 1104 // copy into the appendTo buffer. 1105 1106 UnicodeString& 1107 MessageFormat::format( const UnicodeString& pattern, 1108 const Formattable* arguments, 1109 int32_t cnt, 1110 UnicodeString& appendTo, 1111 UErrorCode& success) 1112 { 1113 MessageFormat temp(pattern, success); 1114 FieldPosition ignore(0); 1115 temp.format(arguments, cnt, appendTo, ignore, success); 1116 return appendTo; 1117 } 1118 1119 // ------------------------------------- 1120 // Formats the source Formattable object and copy into the 1121 // appendTo buffer. The Formattable object must be an array 1122 // of Formattable instances, returns error otherwise. 1123 1124 UnicodeString& 1125 MessageFormat::format(const Formattable& source, 1126 UnicodeString& appendTo, 1127 FieldPosition& ignore, 1128 UErrorCode& success) const 1129 { 1130 int32_t cnt; 1131 1132 if (U_FAILURE(success)) 1133 return appendTo; 1134 if (source.getType() != Formattable::kArray) { 1135 success = U_ILLEGAL_ARGUMENT_ERROR; 1136 return appendTo; 1137 } 1138 const Formattable* tmpPtr = source.getArray(cnt); 1139 1140 return format(tmpPtr, cnt, appendTo, ignore, 0, success); 1141 } 1142 1143 1144 UnicodeString& 1145 MessageFormat::format(const UnicodeString* argumentNames, 1146 const Formattable* arguments, 1147 int32_t count, 1148 UnicodeString& appendTo, 1149 UErrorCode& success) const { 1150 FieldPosition ignore(0); 1151 return format(arguments, argumentNames, count, appendTo, ignore, 0, success); 1152 } 1153 1154 UnicodeString& 1155 MessageFormat::format(const Formattable* arguments, 1156 int32_t cnt, 1157 UnicodeString& appendTo, 1158 FieldPosition& status, 1159 int32_t recursionProtection, 1160 UErrorCode& success) const 1161 { 1162 return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); 1163 } 1164 1165 // ------------------------------------- 1166 // Formats the arguments Formattable array and copy into the appendTo buffer. 1167 // Ignore the FieldPosition result for error checking. 1168 1169 UnicodeString& 1170 MessageFormat::format(const Formattable* arguments, 1171 const UnicodeString *argumentNames, 1172 int32_t cnt, 1173 UnicodeString& appendTo, 1174 FieldPosition& status, 1175 int32_t recursionProtection, 1176 UErrorCode& success) const 1177 { 1178 int32_t lastOffset = 0; 1179 int32_t argumentNumber=0; 1180 if (cnt < 0 || (cnt && arguments == NULL)) { 1181 success = U_ILLEGAL_ARGUMENT_ERROR; 1182 return appendTo; 1183 } 1184 1185 if ( !isArgNumeric && argumentNames== NULL ) { 1186 success = U_ILLEGAL_ARGUMENT_ERROR; 1187 return appendTo; 1188 } 1189 1190 const Formattable *obj=NULL; 1191 for (int32_t i=0; i<subformatCount; ++i) { 1192 // Append the prefix of current format element. 1193 appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset); 1194 lastOffset = subformats[i].offset; 1195 obj = NULL; 1196 if (isArgNumeric) { 1197 argumentNumber = subformats[i].argNum; 1198 1199 // Checks the scope of the argument number. 1200 if (argumentNumber >= cnt) { 1201 appendTo += LEFT_CURLY_BRACE; 1202 itos(argumentNumber, appendTo); 1203 appendTo += RIGHT_CURLY_BRACE; 1204 continue; 1205 } 1206 obj = arguments+argumentNumber; 1207 } 1208 else { 1209 for (int32_t j=0; j<cnt; ++j) { 1210 if (argumentNames[j]== *subformats[i].argName ) { 1211 obj = arguments+j; 1212 break; 1213 } 1214 } 1215 if (obj == NULL ) { 1216 appendTo += LEFT_CURLY_BRACE; 1217 appendTo += *subformats[i].argName; 1218 appendTo += RIGHT_CURLY_BRACE; 1219 continue; 1220 1221 } 1222 } 1223 Formattable::Type type = obj->getType(); 1224 1225 // Recursively calling the format process only if the current 1226 // format argument refers to a ChoiceFormat object. 1227 Format* fmt = subformats[i].format; 1228 if (fmt != NULL) { 1229 UnicodeString argNum; 1230 fmt->format(*obj, argNum, success); 1231 1232 // Needs to reprocess the ChoiceFormat option by using the 1233 // MessageFormat pattern application. 1234 if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() || 1235 fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) && 1236 argNum.indexOf(LEFT_CURLY_BRACE) >= 0) { 1237 MessageFormat temp(argNum, fLocale, success); 1238 // TODO: Implement recursion protection 1239 if ( isArgNumeric ) { 1240 temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); 1241 } 1242 else { 1243 temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success); 1244 } 1245 if (U_FAILURE(success)) { 1246 return appendTo; 1247 } 1248 } 1249 else { 1250 appendTo += argNum; 1251 } 1252 } 1253 // If the obj data type is a number, use a NumberFormat instance. 1254 else if ((type == Formattable::kDouble) || 1255 (type == Formattable::kLong) || 1256 (type == Formattable::kInt64)) { 1257 1258 const NumberFormat* nf = getDefaultNumberFormat(success); 1259 if (nf == NULL) { 1260 return appendTo; 1261 } 1262 if (type == Formattable::kDouble) { 1263 nf->format(obj->getDouble(), appendTo); 1264 } else if (type == Formattable::kLong) { 1265 nf->format(obj->getLong(), appendTo); 1266 } else { 1267 nf->format(obj->getInt64(), appendTo); 1268 } 1269 } 1270 // If the obj data type is a Date instance, use a DateFormat instance. 1271 else if (type == Formattable::kDate) { 1272 const DateFormat* df = getDefaultDateFormat(success); 1273 if (df == NULL) { 1274 return appendTo; 1275 } 1276 df->format(obj->getDate(), appendTo); 1277 } 1278 else if (type == Formattable::kString) { 1279 appendTo += obj->getString(); 1280 } 1281 else { 1282 success = U_ILLEGAL_ARGUMENT_ERROR; 1283 return appendTo; 1284 } 1285 } 1286 // Appends the rest of the pattern characters after the real last offset. 1287 appendTo.append(fPattern, lastOffset, 0x7fffffff); 1288 return appendTo; 1289 } 1290 1291 1292 // ------------------------------------- 1293 // Parses the source pattern and returns the Formattable objects array, 1294 // the array count and the ending parse position. The caller of this method 1295 // owns the array. 1296 1297 Formattable* 1298 MessageFormat::parse(const UnicodeString& source, 1299 ParsePosition& pos, 1300 int32_t& count) const 1301 { 1302 // Allocate at least one element. Allocating an array of length 1303 // zero causes problems on some platforms (e.g. Win32). 1304 Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1]; 1305 int32_t patternOffset = 0; 1306 int32_t sourceOffset = pos.getIndex(); 1307 ParsePosition tempPos(0); 1308 count = 0; // {sfb} reset to zero 1309 int32_t len; 1310 // If resultArray could not be created, exit out. 1311 // Avoid crossing initialization of variables above. 1312 if (resultArray == NULL) { 1313 goto PARSE_ERROR; 1314 } 1315 for (int32_t i = 0; i < subformatCount; ++i) { 1316 // match up to format 1317 len = subformats[i].offset - patternOffset; 1318 if (len == 0 || 1319 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { 1320 sourceOffset += len; 1321 patternOffset += len; 1322 } 1323 else { 1324 goto PARSE_ERROR; 1325 } 1326 1327 // now use format 1328 Format* fmt = subformats[i].format; 1329 int32_t argNum = subformats[i].argNum; 1330 if (fmt == NULL) { // string format 1331 // if at end, use longest possible match 1332 // otherwise uses first match to intervening string 1333 // does NOT recursively try all possibilities 1334 int32_t tempLength = (i+1<subformatCount) ? 1335 subformats[i+1].offset : fPattern.length(); 1336 1337 int32_t next; 1338 if (patternOffset >= tempLength) { 1339 next = source.length(); 1340 } 1341 else { 1342 UnicodeString buffer; 1343 fPattern.extract(patternOffset,tempLength - patternOffset, buffer); 1344 next = source.indexOf(buffer, sourceOffset); 1345 } 1346 1347 if (next < 0) { 1348 goto PARSE_ERROR; 1349 } 1350 else { 1351 UnicodeString buffer; 1352 source.extract(sourceOffset,next - sourceOffset, buffer); 1353 UnicodeString strValue = buffer; 1354 UnicodeString temp(LEFT_CURLY_BRACE); 1355 // {sfb} check this later 1356 if (isArgNumeric) { 1357 itos(argNum, temp); 1358 } 1359 else { 1360 temp+=(*subformats[i].argName); 1361 } 1362 temp += RIGHT_CURLY_BRACE; 1363 if (strValue != temp) { 1364 source.extract(sourceOffset,next - sourceOffset, buffer); 1365 resultArray[argNum].setString(buffer); 1366 // {sfb} not sure about this 1367 if ((argNum + 1) > count) { 1368 count = argNum + 1; 1369 } 1370 } 1371 sourceOffset = next; 1372 } 1373 } 1374 else { 1375 tempPos.setIndex(sourceOffset); 1376 fmt->parseObject(source, resultArray[argNum], tempPos); 1377 if (tempPos.getIndex() == sourceOffset) { 1378 goto PARSE_ERROR; 1379 } 1380 1381 if ((argNum + 1) > count) { 1382 count = argNum + 1; 1383 } 1384 sourceOffset = tempPos.getIndex(); // update 1385 } 1386 } 1387 len = fPattern.length() - patternOffset; 1388 if (len == 0 || 1389 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { 1390 pos.setIndex(sourceOffset + len); 1391 return resultArray; 1392 } 1393 // else fall through... 1394 1395 PARSE_ERROR: 1396 pos.setErrorIndex(sourceOffset); 1397 delete [] resultArray; 1398 count = 0; 1399 return NULL; // leave index as is to signal error 1400 } 1401 1402 // ------------------------------------- 1403 // Parses the source string and returns the array of 1404 // Formattable objects and the array count. The caller 1405 // owns the returned array. 1406 1407 Formattable* 1408 MessageFormat::parse(const UnicodeString& source, 1409 int32_t& cnt, 1410 UErrorCode& success) const 1411 { 1412 if (!isArgNumeric ) { 1413 success = U_ARGUMENT_TYPE_MISMATCH; 1414 return NULL; 1415 } 1416 ParsePosition status(0); 1417 // Calls the actual implementation method and starts 1418 // from zero offset of the source text. 1419 Formattable* result = parse(source, status, cnt); 1420 if (status.getIndex() == 0) { 1421 success = U_MESSAGE_PARSE_ERROR; 1422 delete[] result; 1423 return NULL; 1424 } 1425 return result; 1426 } 1427 1428 // ------------------------------------- 1429 // Parses the source text and copy into the result buffer. 1430 1431 void 1432 MessageFormat::parseObject( const UnicodeString& source, 1433 Formattable& result, 1434 ParsePosition& status) const 1435 { 1436 int32_t cnt = 0; 1437 Formattable* tmpResult = parse(source, status, cnt); 1438 if (tmpResult != NULL) 1439 result.adoptArray(tmpResult, cnt); 1440 } 1441 1442 UnicodeString 1443 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) { 1444 UnicodeString result; 1445 if (U_SUCCESS(status)) { 1446 int32_t plen = pattern.length(); 1447 const UChar* pat = pattern.getBuffer(); 1448 int32_t blen = plen * 2 + 1; // space for null termination, convenience 1449 UChar* buf = result.getBuffer(blen); 1450 if (buf == NULL) { 1451 status = U_MEMORY_ALLOCATION_ERROR; 1452 } else { 1453 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); 1454 result.releaseBuffer(U_SUCCESS(status) ? len : 0); 1455 } 1456 } 1457 if (U_FAILURE(status)) { 1458 result.setToBogus(); 1459 } 1460 return result; 1461 } 1462 1463 // ------------------------------------- 1464 1465 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) { 1466 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec); 1467 if (fmt == NULL) { 1468 ec = U_MEMORY_ALLOCATION_ERROR; 1469 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) { 1470 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set 1471 fmt->setDefaultRuleSet(defaultRuleSet, localStatus); 1472 } 1473 return fmt; 1474 } 1475 1476 /** 1477 * Reads the segments[] array (see applyPattern()) and parses the 1478 * segments[1..3] into a Format* object. Stores the format object in 1479 * the subformats[] array. Updates the argTypes[] array type 1480 * information for the corresponding argument. 1481 * 1482 * @param formatNumber index into subformats[] for this format 1483 * @param segments array of strings with the parsed pattern segments 1484 * @param parseError parse error data (output param) 1485 * @param ec error code 1486 */ 1487 void 1488 MessageFormat::makeFormat(int32_t formatNumber, 1489 UnicodeString* segments, 1490 UParseError& parseError, 1491 UErrorCode& ec) { 1492 if (U_FAILURE(ec)) { 1493 return; 1494 } 1495 1496 // Parse the argument number 1497 int32_t argumentNumber = stou(segments[1]); // always unlocalized! 1498 UnicodeString argumentName; 1499 if (argumentNumber < 0) { 1500 if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) { 1501 ec = U_INVALID_FORMAT_ERROR; 1502 return; 1503 } 1504 isArgNumeric = FALSE; 1505 argumentNumber=formatNumber; 1506 } 1507 if (!isArgNumeric) { 1508 if ( !isLegalArgName(segments[1]) ) { 1509 ec = U_INVALID_FORMAT_ERROR; 1510 return; 1511 } 1512 argumentName = segments[1]; 1513 } 1514 1515 // Parse the format, recording the argument type and creating a 1516 // new Format object (except for string arguments). 1517 Formattable::Type argType; 1518 Format *fmt = NULL; 1519 int32_t typeID, styleID; 1520 DateFormat::EStyle style; 1521 UnicodeString unquotedPattern, quotedPattern; 1522 UBool inQuote = FALSE; 1523 1524 switch (typeID = findKeyword(segments[2], TYPE_IDS)) { 1525 1526 case 0: // string 1527 argType = Formattable::kString; 1528 break; 1529 1530 case 1: // number 1531 argType = Formattable::kDouble; 1532 1533 switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) { 1534 case 0: // default 1535 fmt = NumberFormat::createInstance(fLocale, ec); 1536 break; 1537 case 1: // currency 1538 fmt = NumberFormat::createCurrencyInstance(fLocale, ec); 1539 break; 1540 case 2: // percent 1541 fmt = NumberFormat::createPercentInstance(fLocale, ec); 1542 break; 1543 case 3: // integer 1544 argType = Formattable::kLong; 1545 fmt = createIntegerFormat(fLocale, ec); 1546 break; 1547 default: // pattern 1548 fmt = NumberFormat::createInstance(fLocale, ec); 1549 if (fmt && 1550 fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) { 1551 ((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec); 1552 } 1553 break; 1554 } 1555 break; 1556 1557 case 2: // date 1558 case 3: // time 1559 argType = Formattable::kDate; 1560 styleID = findKeyword(segments[3], DATE_STYLE_IDS); 1561 style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; 1562 1563 if (typeID == 2) { 1564 fmt = DateFormat::createDateInstance(style, fLocale); 1565 } else { 1566 fmt = DateFormat::createTimeInstance(style, fLocale); 1567 } 1568 1569 if (styleID < 0 && 1570 fmt != NULL && 1571 fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) { 1572 ((SimpleDateFormat*)fmt)->applyPattern(segments[3]); 1573 } 1574 break; 1575 1576 case 4: // choice 1577 argType = Formattable::kDouble; 1578 1579 fmt = new ChoiceFormat(segments[3], parseError, ec); 1580 break; 1581 1582 case 5: // spellout 1583 argType = Formattable::kDouble; 1584 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec); 1585 break; 1586 case 6: // ordinal 1587 argType = Formattable::kDouble; 1588 fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec); 1589 break; 1590 case 7: // duration 1591 argType = Formattable::kDouble; 1592 fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec); 1593 break; 1594 case 8: // plural 1595 argType = Formattable::kDouble; 1596 quotedPattern = segments[3]; 1597 for (int32_t i = 0; i < quotedPattern.length(); ++i) { 1598 UChar ch = quotedPattern.charAt(i); 1599 if (ch == SINGLE_QUOTE) { 1600 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) { 1601 unquotedPattern+=ch; 1602 ++i; 1603 } 1604 else { 1605 inQuote = !inQuote; 1606 } 1607 } 1608 else { 1609 unquotedPattern += ch; 1610 } 1611 } 1612 fmt = new PluralFormat(fLocale, unquotedPattern, ec); 1613 break; 1614 default: 1615 argType = Formattable::kString; 1616 ec = U_ILLEGAL_ARGUMENT_ERROR; 1617 break; 1618 } 1619 1620 if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) { 1621 ec = U_MEMORY_ALLOCATION_ERROR; 1622 } 1623 1624 if (!allocateSubformats(formatNumber+1) || 1625 !allocateArgTypes(argumentNumber+1)) { 1626 ec = U_MEMORY_ALLOCATION_ERROR; 1627 } 1628 1629 if (U_FAILURE(ec)) { 1630 delete fmt; 1631 return; 1632 } 1633 1634 // Parse succeeded; record results in our arrays 1635 subformats[formatNumber].format = fmt; 1636 subformats[formatNumber].offset = segments[0].length(); 1637 if (isArgNumeric) { 1638 subformats[formatNumber].argName = NULL; 1639 subformats[formatNumber].argNum = argumentNumber; 1640 } 1641 else { 1642 subformats[formatNumber].argName = new UnicodeString(argumentName); 1643 subformats[formatNumber].argNum = -1; 1644 } 1645 subformatCount = formatNumber+1; 1646 1647 // Careful here: argumentNumber may in general arrive out of 1648 // sequence, e.g., "There was {2} on {0,date} (see {1,number})." 1649 argTypes[argumentNumber] = argType; 1650 if (argumentNumber+1 > argTypeCount) { 1651 argTypeCount = argumentNumber+1; 1652 } 1653 } 1654 1655 // ------------------------------------- 1656 // Finds the string, s, in the string array, list. 1657 int32_t MessageFormat::findKeyword(const UnicodeString& s, 1658 const UChar * const *list) 1659 { 1660 if (s.length() == 0) 1661 return 0; // default 1662 1663 UnicodeString buffer = s; 1664 // Trims the space characters and turns all characters 1665 // in s to lower case. 1666 buffer.trim().toLower(""); 1667 for (int32_t i = 0; list[i]; ++i) { 1668 if (!buffer.compare(list[i], u_strlen(list[i]))) { 1669 return i; 1670 } 1671 } 1672 return -1; 1673 } 1674 1675 // ------------------------------------- 1676 // Checks the range of the source text to quote the special 1677 // characters, { and ' and copy to target buffer. 1678 1679 void 1680 MessageFormat::copyAndFixQuotes(const UnicodeString& source, 1681 int32_t start, 1682 int32_t end, 1683 UnicodeString& appendTo) 1684 { 1685 UBool gotLB = FALSE; 1686 1687 for (int32_t i = start; i < end; ++i) { 1688 UChar ch = source[i]; 1689 if (ch == LEFT_CURLY_BRACE) { 1690 appendTo += SINGLE_QUOTE; 1691 appendTo += LEFT_CURLY_BRACE; 1692 appendTo += SINGLE_QUOTE; 1693 gotLB = TRUE; 1694 } 1695 else if (ch == RIGHT_CURLY_BRACE) { 1696 if(gotLB) { 1697 appendTo += RIGHT_CURLY_BRACE; 1698 gotLB = FALSE; 1699 } 1700 else { 1701 // orig code. 1702 appendTo += SINGLE_QUOTE; 1703 appendTo += RIGHT_CURLY_BRACE; 1704 appendTo += SINGLE_QUOTE; 1705 } 1706 } 1707 else if (ch == SINGLE_QUOTE) { 1708 appendTo += SINGLE_QUOTE; 1709 appendTo += SINGLE_QUOTE; 1710 } 1711 else { 1712 appendTo += ch; 1713 } 1714 } 1715 } 1716 1717 /** 1718 * Convenience method that ought to be in NumberFormat 1719 */ 1720 NumberFormat* 1721 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const { 1722 NumberFormat *temp = NumberFormat::createInstance(locale, status); 1723 if (temp != NULL && temp->getDynamicClassID() == DecimalFormat::getStaticClassID()) { 1724 DecimalFormat *temp2 = (DecimalFormat*) temp; 1725 temp2->setMaximumFractionDigits(0); 1726 temp2->setDecimalSeparatorAlwaysShown(FALSE); 1727 temp2->setParseIntegerOnly(TRUE); 1728 } 1729 1730 return temp; 1731 } 1732 1733 /** 1734 * Return the default number format. Used to format a numeric 1735 * argument when subformats[i].format is NULL. Returns NULL 1736 * on failure. 1737 * 1738 * Semantically const but may modify *this. 1739 */ 1740 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const { 1741 if (defaultNumberFormat == NULL) { 1742 MessageFormat* t = (MessageFormat*) this; 1743 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec); 1744 if (U_FAILURE(ec)) { 1745 delete t->defaultNumberFormat; 1746 t->defaultNumberFormat = NULL; 1747 } else if (t->defaultNumberFormat == NULL) { 1748 ec = U_MEMORY_ALLOCATION_ERROR; 1749 } 1750 } 1751 return defaultNumberFormat; 1752 } 1753 1754 /** 1755 * Return the default date format. Used to format a date 1756 * argument when subformats[i].format is NULL. Returns NULL 1757 * on failure. 1758 * 1759 * Semantically const but may modify *this. 1760 */ 1761 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { 1762 if (defaultDateFormat == NULL) { 1763 MessageFormat* t = (MessageFormat*) this; 1764 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale); 1765 if (t->defaultDateFormat == NULL) { 1766 ec = U_MEMORY_ALLOCATION_ERROR; 1767 } 1768 } 1769 return defaultDateFormat; 1770 } 1771 1772 UBool 1773 MessageFormat::usesNamedArguments() const { 1774 return !isArgNumeric; 1775 } 1776 1777 UBool 1778 MessageFormat::isLegalArgName(const UnicodeString& argName) const { 1779 if(!u_hasBinaryProperty(argName.charAt(0), idStart)) { 1780 return FALSE; 1781 } 1782 for (int32_t i=1; i<argName.length(); ++i) { 1783 if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) { 1784 return FALSE; 1785 } 1786 } 1787 return TRUE; 1788 } 1789 1790 int32_t 1791 MessageFormat::getArgTypeCount() const { 1792 return argTypeCount; 1793 } 1794 1795 1796 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { 1797 pos=0; 1798 fFormatNames = fNameList; 1799 } 1800 1801 const UnicodeString* 1802 FormatNameEnumeration::snext(UErrorCode& status) { 1803 if (U_SUCCESS(status) && pos < fFormatNames->size()) { 1804 return (const UnicodeString*)fFormatNames->elementAt(pos++); 1805 } 1806 return NULL; 1807 } 1808 1809 void 1810 FormatNameEnumeration::reset(UErrorCode& /*status*/) { 1811 pos=0; 1812 } 1813 1814 int32_t 1815 FormatNameEnumeration::count(UErrorCode& /*status*/) const { 1816 return (fFormatNames==NULL) ? 0 : fFormatNames->size(); 1817 } 1818 1819 FormatNameEnumeration::~FormatNameEnumeration() { 1820 UnicodeString *s; 1821 for (int32_t i=0; i<fFormatNames->size(); ++i) { 1822 if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) { 1823 delete s; 1824 } 1825 } 1826 delete fFormatNames; 1827 } 1828 1829 1830 U_NAMESPACE_END 1831 1832 #endif /* #if !UCONFIG_NO_FORMATTING */ 1833 1834 //eof 1835