1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************** 6 * 7 * File MSGFMT.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/19/97 aliu Converted from java. 13 * 03/20/97 helena Finished first cut of implementation. 14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. 15 * 06/11/97 helena Fixed addPattern to take the pattern correctly. 16 * 06/17/97 helena Fixed the getPattern to return the correct pattern. 17 * 07/09/97 helena Made ParsePosition into a class. 18 * 02/22/99 stephen Removed character literals for EBCDIC safety 19 * 11/01/09 kirtig Added SelectFormat 20 ********************************************************************/ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_FORMATTING 25 26 #include "unicode/msgfmt.h" 27 #include "unicode/decimfmt.h" 28 #include "unicode/datefmt.h" 29 #include "unicode/smpdtfmt.h" 30 #include "unicode/choicfmt.h" 31 #include "unicode/plurfmt.h" 32 #include "unicode/selfmt.h" 33 #include "unicode/ustring.h" 34 #include "unicode/ucnv_err.h" 35 #include "unicode/uchar.h" 36 #include "unicode/umsg.h" 37 #include "unicode/rbnf.h" 38 #include "cmemory.h" 39 #include "msgfmt_impl.h" 40 #include "util.h" 41 #include "uassert.h" 42 #include "ustrfmt.h" 43 #include "uvector.h" 44 45 // ***************************************************************************** 46 // class MessageFormat 47 // ***************************************************************************** 48 49 #define COMMA ((UChar)0x002C) 50 #define SINGLE_QUOTE ((UChar)0x0027) 51 #define LEFT_CURLY_BRACE ((UChar)0x007B) 52 #define RIGHT_CURLY_BRACE ((UChar)0x007D) 53 54 //--------------------------------------- 55 // static data 56 57 static const UChar ID_EMPTY[] = { 58 0 /* empty string, used for default so that null can mark end of list */ 59 }; 60 61 static const UChar ID_NUMBER[] = { 62 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */ 63 }; 64 static const UChar ID_DATE[] = { 65 0x64, 0x61, 0x74, 0x65, 0 /* "date" */ 66 }; 67 static const UChar ID_TIME[] = { 68 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */ 69 }; 70 static const UChar ID_CHOICE[] = { 71 0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */ 72 }; 73 static const UChar ID_SPELLOUT[] = { 74 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */ 75 }; 76 static const UChar ID_ORDINAL[] = { 77 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */ 78 }; 79 static const UChar ID_DURATION[] = { 80 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ 81 }; 82 static const UChar ID_PLURAL[] = { 83 0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0 /* "plural" */ 84 }; 85 static const UChar ID_SELECT[] = { 86 0x73, 0x65, 0x6C, 0x65, 0x63, 0x74, 0 /* "select" */ 87 }; 88 89 // MessageFormat Type List Number, Date, Time or Choice 90 static const UChar * const TYPE_IDS[] = { 91 ID_EMPTY, 92 ID_NUMBER, 93 ID_DATE, 94 ID_TIME, 95 ID_CHOICE, 96 ID_SPELLOUT, 97 ID_ORDINAL, 98 ID_DURATION, 99 ID_PLURAL, 100 ID_SELECT, 101 NULL, 102 }; 103 104 static const UChar ID_CURRENCY[] = { 105 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */ 106 }; 107 static const UChar ID_PERCENT[] = { 108 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */ 109 }; 110 static const UChar ID_INTEGER[] = { 111 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */ 112 }; 113 114 // NumberFormat modifier list, default, currency, percent or integer 115 static const UChar * const NUMBER_STYLE_IDS[] = { 116 ID_EMPTY, 117 ID_CURRENCY, 118 ID_PERCENT, 119 ID_INTEGER, 120 NULL, 121 }; 122 123 static const UChar ID_SHORT[] = { 124 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */ 125 }; 126 static const UChar ID_MEDIUM[] = { 127 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */ 128 }; 129 static const UChar ID_LONG[] = { 130 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */ 131 }; 132 static const UChar ID_FULL[] = { 133 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */ 134 }; 135 136 // DateFormat modifier list, default, short, medium, long or full 137 static const UChar * const DATE_STYLE_IDS[] = { 138 ID_EMPTY, 139 ID_SHORT, 140 ID_MEDIUM, 141 ID_LONG, 142 ID_FULL, 143 NULL, 144 }; 145 146 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = { 147 U_NAMESPACE_QUALIFIER DateFormat::kDefault, 148 U_NAMESPACE_QUALIFIER DateFormat::kShort, 149 U_NAMESPACE_QUALIFIER DateFormat::kMedium, 150 U_NAMESPACE_QUALIFIER DateFormat::kLong, 151 U_NAMESPACE_QUALIFIER DateFormat::kFull, 152 }; 153 154 static const int32_t DEFAULT_INITIAL_CAPACITY = 10; 155 156 U_NAMESPACE_BEGIN 157 158 // ------------------------------------- 159 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat) 160 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) 161 162 //-------------------------------------------------------------------- 163 164 /** 165 * Convert a string to an unsigned decimal, ignoring rule whitespace. 166 * @return a non-negative number if successful, or a negative number 167 * upon failure. 168 */ 169 static int32_t stou(const UnicodeString& string) { 170 int32_t n = 0; 171 int32_t count = 0; 172 UChar32 c; 173 for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) { 174 c = string.char32At(i); 175 if (uprv_isRuleWhiteSpace(c)) { 176 continue; 177 } 178 int32_t d = u_digit(c, 10); 179 if (d < 0 || ++count > 10) { 180 return -1; 181 } 182 n = 10*n + d; 183 } 184 return n; 185 } 186 187 /** 188 * Convert an integer value to a string and append the result to 189 * the given UnicodeString. 190 */ 191 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) { 192 UChar temp[16]; 193 uprv_itou(temp,16,i,10,0); // 10 == radix 194 appendTo.append(temp); 195 return appendTo; 196 } 197 198 /* 199 * A structure representing one subformat of this MessageFormat. 200 * Each subformat has a Format object, an offset into the plain 201 * pattern text fPattern, and an argument number. The argument 202 * number corresponds to the array of arguments to be formatted. 203 * @internal 204 */ 205 class MessageFormat::Subformat : public UMemory { 206 public: 207 /** 208 * @internal 209 */ 210 Format* format; // formatter 211 /** 212 * @internal 213 */ 214 int32_t offset; // offset into fPattern 215 /** 216 * @internal 217 */ 218 // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number 219 int32_t argNum; // 0-based argument number 220 /** 221 * @internal 222 */ 223 UnicodeString* argName; // argument name or number 224 225 /** 226 * Clone that.format and assign it to this.format 227 * Do NOT delete this.format 228 * @internal 229 */ 230 Subformat& operator=(const Subformat& that) { 231 if (this != &that) { 232 format = that.format ? that.format->clone() : NULL; 233 offset = that.offset; 234 argNum = that.argNum; 235 argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL; 236 } 237 return *this; 238 } 239 240 /** 241 * @internal 242 */ 243 UBool operator==(const Subformat& that) const { 244 // Do cheap comparisons first 245 return offset == that.offset && 246 argNum == that.argNum && 247 ((argName == that.argName) || 248 (*argName == *that.argName)) && 249 ((format == that.format) || // handles NULL 250 (*format == *that.format)); 251 } 252 253 /** 254 * @internal 255 */ 256 UBool operator!=(const Subformat& that) const { 257 return !operator==(that); 258 } 259 }; 260 261 // ------------------------------------- 262 // Creates a MessageFormat instance based on the pattern. 263 264 MessageFormat::MessageFormat(const UnicodeString& pattern, 265 UErrorCode& success) 266 : fLocale(Locale::getDefault()), // Uses the default locale 267 formatAliases(NULL), 268 formatAliasesCapacity(0), 269 idStart(UCHAR_ID_START), 270 idContinue(UCHAR_ID_CONTINUE), 271 subformats(NULL), 272 subformatCount(0), 273 subformatCapacity(0), 274 argTypes(NULL), 275 argTypeCount(0), 276 argTypeCapacity(0), 277 isArgNumeric(TRUE), 278 defaultNumberFormat(NULL), 279 defaultDateFormat(NULL) 280 { 281 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 282 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 283 success = U_MEMORY_ALLOCATION_ERROR; 284 return; 285 } 286 applyPattern(pattern, success); 287 setLocaleIDs(fLocale.getName(), fLocale.getName()); 288 } 289 290 MessageFormat::MessageFormat(const UnicodeString& pattern, 291 const Locale& newLocale, 292 UErrorCode& success) 293 : fLocale(newLocale), 294 formatAliases(NULL), 295 formatAliasesCapacity(0), 296 idStart(UCHAR_ID_START), 297 idContinue(UCHAR_ID_CONTINUE), 298 subformats(NULL), 299 subformatCount(0), 300 subformatCapacity(0), 301 argTypes(NULL), 302 argTypeCount(0), 303 argTypeCapacity(0), 304 isArgNumeric(TRUE), 305 defaultNumberFormat(NULL), 306 defaultDateFormat(NULL) 307 { 308 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 309 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 310 success = U_MEMORY_ALLOCATION_ERROR; 311 return; 312 } 313 applyPattern(pattern, success); 314 setLocaleIDs(fLocale.getName(), fLocale.getName()); 315 } 316 317 MessageFormat::MessageFormat(const UnicodeString& pattern, 318 const Locale& newLocale, 319 UParseError& parseError, 320 UErrorCode& success) 321 : fLocale(newLocale), 322 formatAliases(NULL), 323 formatAliasesCapacity(0), 324 idStart(UCHAR_ID_START), 325 idContinue(UCHAR_ID_CONTINUE), 326 subformats(NULL), 327 subformatCount(0), 328 subformatCapacity(0), 329 argTypes(NULL), 330 argTypeCount(0), 331 argTypeCapacity(0), 332 isArgNumeric(TRUE), 333 defaultNumberFormat(NULL), 334 defaultDateFormat(NULL) 335 { 336 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 337 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 338 success = U_MEMORY_ALLOCATION_ERROR; 339 return; 340 } 341 applyPattern(pattern, parseError, success); 342 setLocaleIDs(fLocale.getName(), fLocale.getName()); 343 } 344 345 MessageFormat::MessageFormat(const MessageFormat& that) 346 : Format(that), 347 formatAliases(NULL), 348 formatAliasesCapacity(0), 349 idStart(UCHAR_ID_START), 350 idContinue(UCHAR_ID_CONTINUE), 351 subformats(NULL), 352 subformatCount(0), 353 subformatCapacity(0), 354 argTypes(NULL), 355 argTypeCount(0), 356 argTypeCapacity(0), 357 isArgNumeric(TRUE), 358 defaultNumberFormat(NULL), 359 defaultDateFormat(NULL) 360 { 361 *this = that; 362 } 363 364 MessageFormat::~MessageFormat() 365 { 366 int32_t idx; 367 for (idx = 0; idx < subformatCount; idx++) { 368 delete subformats[idx].format; 369 delete subformats[idx].argName; 370 } 371 uprv_free(subformats); 372 subformats = NULL; 373 subformatCount = subformatCapacity = 0; 374 375 uprv_free(argTypes); 376 argTypes = NULL; 377 argTypeCount = argTypeCapacity = 0; 378 379 uprv_free(formatAliases); 380 381 delete defaultNumberFormat; 382 delete defaultDateFormat; 383 } 384 385 //-------------------------------------------------------------------- 386 // Variable-size array management 387 388 /** 389 * Allocate subformats[] to at least the given capacity and return 390 * TRUE if successful. If not, leave subformats[] unchanged. 391 * 392 * If subformats is NULL, allocate it. If it is not NULL, enlarge it 393 * if necessary to be at least as large as specified. 394 */ 395 UBool MessageFormat::allocateSubformats(int32_t capacity) { 396 if (subformats == NULL) { 397 subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity); 398 subformatCapacity = capacity; 399 subformatCount = 0; 400 if (subformats == NULL) { 401 subformatCapacity = 0; 402 return FALSE; 403 } 404 } else if (subformatCapacity < capacity) { 405 if (capacity < 2*subformatCapacity) { 406 capacity = 2*subformatCapacity; 407 } 408 Subformat* a = (Subformat*) 409 uprv_realloc(subformats, sizeof(*subformats) * capacity); 410 if (a == NULL) { 411 return FALSE; // request failed 412 } 413 subformats = a; 414 subformatCapacity = capacity; 415 } 416 return TRUE; 417 } 418 419 /** 420 * Allocate argTypes[] to at least the given capacity and return 421 * TRUE if successful. If not, leave argTypes[] unchanged. 422 * 423 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it 424 * if necessary to be at least as large as specified. 425 */ 426 UBool MessageFormat::allocateArgTypes(int32_t capacity) { 427 if (argTypes == NULL) { 428 argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity); 429 argTypeCount = 0; 430 argTypeCapacity = capacity; 431 if (argTypes == NULL) { 432 argTypeCapacity = 0; 433 return FALSE; 434 } 435 for (int32_t i=0; i<capacity; ++i) { 436 argTypes[i] = Formattable::kString; 437 } 438 } else if (argTypeCapacity < capacity) { 439 if (capacity < 2*argTypeCapacity) { 440 capacity = 2*argTypeCapacity; 441 } 442 Formattable::Type* a = (Formattable::Type*) 443 uprv_realloc(argTypes, sizeof(*argTypes) * capacity); 444 if (a == NULL) { 445 return FALSE; // request failed 446 } 447 for (int32_t i=argTypeCapacity; i<capacity; ++i) { 448 a[i] = Formattable::kString; 449 } 450 argTypes = a; 451 argTypeCapacity = capacity; 452 } 453 return TRUE; 454 } 455 456 // ------------------------------------- 457 // assignment operator 458 459 const MessageFormat& 460 MessageFormat::operator=(const MessageFormat& that) 461 { 462 // Reallocate the arrays BEFORE changing this object 463 if (this != &that && 464 allocateSubformats(that.subformatCount) && 465 allocateArgTypes(that.argTypeCount)) { 466 467 // Calls the super class for assignment first. 468 Format::operator=(that); 469 470 fPattern = that.fPattern; 471 setLocale(that.fLocale); 472 isArgNumeric = that.isArgNumeric; 473 int32_t j; 474 for (j=0; j<subformatCount; ++j) { 475 delete subformats[j].format; 476 } 477 subformatCount = 0; 478 479 for (j=0; j<that.subformatCount; ++j) { 480 // Subformat::operator= does NOT delete this.format 481 subformats[j] = that.subformats[j]; 482 } 483 subformatCount = that.subformatCount; 484 485 for (j=0; j<that.argTypeCount; ++j) { 486 argTypes[j] = that.argTypes[j]; 487 } 488 argTypeCount = that.argTypeCount; 489 } 490 return *this; 491 } 492 493 UBool 494 MessageFormat::operator==(const Format& rhs) const 495 { 496 if (this == &rhs) return TRUE; 497 498 MessageFormat& that = (MessageFormat&)rhs; 499 500 // Check class ID before checking MessageFormat members 501 if (!Format::operator==(rhs) || 502 fPattern != that.fPattern || 503 fLocale != that.fLocale || 504 isArgNumeric != that.isArgNumeric) { 505 return FALSE; 506 } 507 508 int32_t j; 509 for (j=0; j<subformatCount; ++j) { 510 if (subformats[j] != that.subformats[j]) { 511 return FALSE; 512 } 513 } 514 515 return TRUE; 516 } 517 518 // ------------------------------------- 519 // Creates a copy of this MessageFormat, the caller owns the copy. 520 521 Format* 522 MessageFormat::clone() const 523 { 524 return new MessageFormat(*this); 525 } 526 527 // ------------------------------------- 528 // Sets the locale of this MessageFormat object to theLocale. 529 530 void 531 MessageFormat::setLocale(const Locale& theLocale) 532 { 533 if (fLocale != theLocale) { 534 delete defaultNumberFormat; 535 defaultNumberFormat = NULL; 536 delete defaultDateFormat; 537 defaultDateFormat = NULL; 538 } 539 fLocale = theLocale; 540 setLocaleIDs(fLocale.getName(), fLocale.getName()); 541 } 542 543 // ------------------------------------- 544 // Gets the locale of this MessageFormat object. 545 546 const Locale& 547 MessageFormat::getLocale() const 548 { 549 return fLocale; 550 } 551 552 553 554 555 void 556 MessageFormat::applyPattern(const UnicodeString& newPattern, 557 UErrorCode& status) 558 { 559 UParseError parseError; 560 applyPattern(newPattern,parseError,status); 561 } 562 563 564 // ------------------------------------- 565 // Applies the new pattern and returns an error if the pattern 566 // is not correct. 567 void 568 MessageFormat::applyPattern(const UnicodeString& pattern, 569 UParseError& parseError, 570 UErrorCode& ec) 571 { 572 if(U_FAILURE(ec)) { 573 return; 574 } 575 // The pattern is broken up into segments. Each time a subformat 576 // is encountered, 4 segments are recorded. For example, consider 577 // the pattern: 578 // "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}." 579 // The first set of segments is: 580 // segments[0] = "There " 581 // segments[1] = "0" 582 // segments[2] = "choice" 583 // segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files" 584 585 // During parsing, the plain text is accumulated into segments[0]. 586 // Segments 1..3 are used to parse each subpattern. Each time a 587 // subpattern is parsed, it creates a format object that is stored 588 // in the subformats array, together with an offset and argument 589 // number. The offset into the plain text stored in 590 // segments[0]. 591 592 // Quotes in segment 0 are handled normally. They are removed. 593 // Quotes may not occur in segments 1 or 2. 594 // Quotes in segment 3 are parsed and _copied_. This makes 595 // subformat patterns work, e.g., {1,number,'#'.##} passes 596 // the pattern "'#'.##" to DecimalFormat. 597 598 UnicodeString segments[4]; 599 int32_t part = 0; // segment we are in, 0..3 600 // Record the highest argument number in the pattern. (In the 601 // subpattern {3,number} the argument number is 3.) 602 int32_t formatNumber = 0; 603 UBool inQuote = FALSE; 604 int32_t braceStack = 0; 605 // Clear error struct 606 parseError.offset = -1; 607 parseError.preContext[0] = parseError.postContext[0] = (UChar)0; 608 int32_t patLen = pattern.length(); 609 int32_t i; 610 611 for (i=0; i<subformatCount; ++i) { 612 delete subformats[i].format; 613 } 614 subformatCount = 0; 615 argTypeCount = 0; 616 617 for (i=0; i<patLen; ++i) { 618 UChar ch = pattern[i]; 619 if (part == 0) { 620 // In segment 0, recognize and remove quotes 621 if (ch == SINGLE_QUOTE) { 622 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) { 623 segments[0] += ch; 624 ++i; 625 } else { 626 inQuote = !inQuote; 627 } 628 } else if (ch == LEFT_CURLY_BRACE && !inQuote) { 629 // The only way we get from segment 0 to 1 is via an 630 // unquoted '{'. 631 part = 1; 632 } else { 633 segments[0] += ch; 634 } 635 } else if (inQuote) { 636 // In segments 1..3, recognize quoted matter, and copy it 637 // into the segment, together with the quotes. This takes 638 // care of '' as well. 639 segments[part] += ch; 640 if (ch == SINGLE_QUOTE) { 641 inQuote = FALSE; 642 } 643 } else { 644 // We have an unquoted character in segment 1..3 645 switch (ch) { 646 case COMMA: 647 // Commas bump us to the next segment, except for segment 3, 648 // which can contain commas. See example above. 649 if (part < 3) 650 part += 1; 651 else 652 segments[3] += ch; 653 break; 654 case LEFT_CURLY_BRACE: 655 // Handle '{' within segment 3. The initial '{' 656 // before segment 1 is handled above. 657 if (part != 3) { 658 ec = U_PATTERN_SYNTAX_ERROR; 659 goto SYNTAX_ERROR; 660 } 661 ++braceStack; 662 segments[part] += ch; 663 break; 664 case RIGHT_CURLY_BRACE: 665 if (braceStack == 0) { 666 makeFormat(formatNumber, segments, parseError,ec); 667 if (U_FAILURE(ec)){ 668 goto SYNTAX_ERROR; 669 } 670 formatNumber++; 671 672 segments[1].remove(); 673 segments[2].remove(); 674 segments[3].remove(); 675 part = 0; 676 } else { 677 --braceStack; 678 segments[part] += ch; 679 } 680 break; 681 case SINGLE_QUOTE: 682 inQuote = TRUE; 683 // fall through (copy quote chars in segments 1..3) 684 default: 685 segments[part] += ch; 686 break; 687 } 688 } 689 } 690 if (braceStack != 0 || part != 0) { 691 // Unmatched braces in the pattern 692 ec = U_UNMATCHED_BRACES; 693 goto SYNTAX_ERROR; 694 } 695 fPattern = segments[0]; 696 return; 697 698 SYNTAX_ERROR: 699 syntaxError(pattern, i, parseError); 700 for (i=0; i<subformatCount; ++i) { 701 delete subformats[i].format; 702 } 703 argTypeCount = subformatCount = 0; 704 } 705 // ------------------------------------- 706 // Converts this MessageFormat instance to a pattern. 707 708 UnicodeString& 709 MessageFormat::toPattern(UnicodeString& appendTo) const { 710 // later, make this more extensible 711 int32_t lastOffset = 0; 712 int32_t i; 713 for (i=0; i<subformatCount; ++i) { 714 copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo); 715 lastOffset = subformats[i].offset; 716 appendTo += LEFT_CURLY_BRACE; 717 if (isArgNumeric) { 718 itos(subformats[i].argNum, appendTo); 719 } 720 else { 721 appendTo += *subformats[i].argName; 722 } 723 Format* fmt = subformats[i].format; 724 DecimalFormat* decfmt; 725 SimpleDateFormat* sdtfmt; 726 ChoiceFormat* chcfmt; 727 PluralFormat* plfmt; 728 SelectFormat* selfmt; 729 if (fmt == NULL) { 730 // do nothing, string format 731 } 732 else if ((decfmt = dynamic_cast<DecimalFormat*>(fmt)) != NULL) { 733 UErrorCode ec = U_ZERO_ERROR; 734 NumberFormat& formatAlias = *decfmt; 735 NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec); 736 NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec); 737 NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec); 738 NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec); 739 740 appendTo += COMMA; 741 appendTo += ID_NUMBER; 742 if (formatAlias != *defaultTemplate) { 743 appendTo += COMMA; 744 if (formatAlias == *currencyTemplate) { 745 appendTo += ID_CURRENCY; 746 } 747 else if (formatAlias == *percentTemplate) { 748 appendTo += ID_PERCENT; 749 } 750 else if (formatAlias == *integerTemplate) { 751 appendTo += ID_INTEGER; 752 } 753 else { 754 UnicodeString buffer; 755 appendTo += decfmt->toPattern(buffer); 756 } 757 } 758 759 delete defaultTemplate; 760 delete currencyTemplate; 761 delete percentTemplate; 762 delete integerTemplate; 763 } 764 else if ((sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt)) != NULL) { 765 DateFormat& formatAlias = *sdtfmt; 766 DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale); 767 DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale); 768 DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale); 769 DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale); 770 DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale); 771 DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale); 772 DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale); 773 DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale); 774 775 776 appendTo += COMMA; 777 if (formatAlias == *defaultDateTemplate) { 778 // default is medium. no need to handle medium separately. 779 appendTo += ID_DATE; 780 } 781 else if (formatAlias == *shortDateTemplate) { 782 appendTo += ID_DATE; 783 appendTo += COMMA; 784 appendTo += ID_SHORT; 785 } 786 else if (formatAlias == *longDateTemplate) { 787 appendTo += ID_DATE; 788 appendTo += COMMA; 789 appendTo += ID_LONG; 790 } 791 else if (formatAlias == *fullDateTemplate) { 792 appendTo += ID_DATE; 793 appendTo += COMMA; 794 appendTo += ID_FULL; 795 } 796 else if (formatAlias == *defaultTimeTemplate) { 797 // default is medium. no need to handle medium separately. 798 appendTo += ID_TIME; 799 } 800 else if (formatAlias == *shortTimeTemplate) { 801 appendTo += ID_TIME; 802 appendTo += COMMA; 803 appendTo += ID_SHORT; 804 } 805 else if (formatAlias == *longTimeTemplate) { 806 appendTo += ID_TIME; 807 appendTo += COMMA; 808 appendTo += ID_LONG; 809 } 810 else if (formatAlias == *fullTimeTemplate) { 811 appendTo += ID_TIME; 812 appendTo += COMMA; 813 appendTo += ID_FULL; 814 } 815 else { 816 UnicodeString buffer; 817 appendTo += ID_DATE; 818 appendTo += COMMA; 819 appendTo += sdtfmt->toPattern(buffer); 820 } 821 822 delete defaultDateTemplate; 823 delete shortDateTemplate; 824 delete longDateTemplate; 825 delete fullDateTemplate; 826 delete defaultTimeTemplate; 827 delete shortTimeTemplate; 828 delete longTimeTemplate; 829 delete fullTimeTemplate; 830 // {sfb} there should be a more efficient way to do this! 831 } 832 else if ((chcfmt = dynamic_cast<ChoiceFormat*>(fmt)) != NULL) { 833 UnicodeString buffer; 834 appendTo += COMMA; 835 appendTo += ID_CHOICE; 836 appendTo += COMMA; 837 appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer); 838 } 839 else if ((plfmt = dynamic_cast<PluralFormat*>(fmt)) != NULL) { 840 UnicodeString buffer; 841 appendTo += plfmt->toPattern(buffer); 842 } 843 else if ((selfmt = dynamic_cast<SelectFormat*>(fmt)) != NULL) { 844 UnicodeString buffer; 845 appendTo += ((SelectFormat*)fmt)->toPattern(buffer); 846 } 847 else { 848 //appendTo += ", unknown"; 849 } 850 appendTo += RIGHT_CURLY_BRACE; 851 } 852 copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo); 853 return appendTo; 854 } 855 856 // ------------------------------------- 857 // Adopts the new formats array and updates the array count. 858 // This MessageFormat instance owns the new formats. 859 860 void 861 MessageFormat::adoptFormats(Format** newFormats, 862 int32_t count) { 863 if (newFormats == NULL || count < 0) { 864 return; 865 } 866 867 int32_t i; 868 if (allocateSubformats(count)) { 869 for (i=0; i<subformatCount; ++i) { 870 delete subformats[i].format; 871 } 872 for (i=0; i<count; ++i) { 873 subformats[i].format = newFormats[i]; 874 } 875 subformatCount = count; 876 } else { 877 // An adopt method must always take ownership. Delete 878 // the incoming format objects and return unchanged. 879 for (i=0; i<count; ++i) { 880 delete newFormats[i]; 881 } 882 } 883 884 // TODO: What about the .offset and .argNum fields? 885 } 886 887 // ------------------------------------- 888 // Sets the new formats array and updates the array count. 889 // This MessageFormat instance maks a copy of the new formats. 890 891 void 892 MessageFormat::setFormats(const Format** newFormats, 893 int32_t count) { 894 if (newFormats == NULL || count < 0) { 895 return; 896 } 897 898 if (allocateSubformats(count)) { 899 int32_t i; 900 for (i=0; i<subformatCount; ++i) { 901 delete subformats[i].format; 902 } 903 subformatCount = 0; 904 905 for (i=0; i<count; ++i) { 906 subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL; 907 } 908 subformatCount = count; 909 } 910 911 // TODO: What about the .offset and .arg fields? 912 } 913 914 // ------------------------------------- 915 // Adopt a single format by format number. 916 // Do nothing if the format number is not less than the array count. 917 918 void 919 MessageFormat::adoptFormat(int32_t n, Format *newFormat) { 920 if (n < 0 || n >= subformatCount) { 921 delete newFormat; 922 } else { 923 delete subformats[n].format; 924 subformats[n].format = newFormat; 925 } 926 } 927 928 // ------------------------------------- 929 // Adopt a single format by format name. 930 // Do nothing if there is no match of formatName. 931 void 932 MessageFormat::adoptFormat(const UnicodeString& formatName, 933 Format* formatToAdopt, 934 UErrorCode& status) { 935 if (isArgNumeric ) { 936 int32_t argumentNumber = stou(formatName); 937 if (argumentNumber<0) { 938 status = U_ARGUMENT_TYPE_MISMATCH; 939 return; 940 } 941 adoptFormat(argumentNumber, formatToAdopt); 942 return; 943 } 944 for (int32_t i=0; i<subformatCount; ++i) { 945 if (formatName==*subformats[i].argName) { 946 delete subformats[i].format; 947 if ( formatToAdopt== NULL) { 948 // This should never happen -- but we'll be nice if it does 949 subformats[i].format = NULL; 950 } else { 951 subformats[i].format = formatToAdopt; 952 } 953 } 954 } 955 } 956 957 // ------------------------------------- 958 // Set a single format. 959 // Do nothing if the variable is not less than the array count. 960 961 void 962 MessageFormat::setFormat(int32_t n, const Format& newFormat) { 963 if (n >= 0 && n < subformatCount) { 964 delete subformats[n].format; 965 if (&newFormat == NULL) { 966 // This should never happen -- but we'll be nice if it does 967 subformats[n].format = NULL; 968 } else { 969 subformats[n].format = newFormat.clone(); 970 } 971 } 972 } 973 974 // ------------------------------------- 975 // Get a single format by format name. 976 // Do nothing if the variable is not less than the array count. 977 Format * 978 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) { 979 980 if (U_FAILURE(status)) return NULL; 981 982 if (isArgNumeric ) { 983 int32_t argumentNumber = stou(formatName); 984 if (argumentNumber<0) { 985 status = U_ARGUMENT_TYPE_MISMATCH; 986 return NULL; 987 } 988 if (argumentNumber < 0 || argumentNumber >= subformatCount) { 989 return subformats[argumentNumber].format; 990 } 991 else { 992 return NULL; 993 } 994 } 995 996 for (int32_t i=0; i<subformatCount; ++i) { 997 if (formatName==*subformats[i].argName) 998 { 999 return subformats[i].format; 1000 } 1001 } 1002 return NULL; 1003 } 1004 1005 // ------------------------------------- 1006 // Set a single format by format name 1007 // Do nothing if the variable is not less than the array count. 1008 void 1009 MessageFormat::setFormat(const UnicodeString& formatName, 1010 const Format& newFormat, 1011 UErrorCode& status) { 1012 if (isArgNumeric) { 1013 status = U_ARGUMENT_TYPE_MISMATCH; 1014 return; 1015 } 1016 for (int32_t i=0; i<subformatCount; ++i) { 1017 if (formatName==*subformats[i].argName) 1018 { 1019 delete subformats[i].format; 1020 if (&newFormat == NULL) { 1021 // This should never happen -- but we'll be nice if it does 1022 subformats[i].format = NULL; 1023 } else { 1024 subformats[i].format = newFormat.clone(); 1025 } 1026 break; 1027 } 1028 } 1029 } 1030 1031 // ------------------------------------- 1032 // Gets the format array. 1033 1034 const Format** 1035 MessageFormat::getFormats(int32_t& cnt) const 1036 { 1037 // This old API returns an array (which we hold) of Format* 1038 // pointers. The array is valid up to the next call to any 1039 // method on this object. We construct and resize an array 1040 // on demand that contains aliases to the subformats[i].format 1041 // pointers. 1042 MessageFormat* t = (MessageFormat*) this; 1043 cnt = 0; 1044 if (formatAliases == NULL) { 1045 t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount; 1046 Format** a = (Format**) 1047 uprv_malloc(sizeof(Format*) * formatAliasesCapacity); 1048 if (a == NULL) { 1049 return NULL; 1050 } 1051 t->formatAliases = a; 1052 } else if (subformatCount > formatAliasesCapacity) { 1053 Format** a = (Format**) 1054 uprv_realloc(formatAliases, sizeof(Format*) * subformatCount); 1055 if (a == NULL) { 1056 return NULL; 1057 } 1058 t->formatAliases = a; 1059 t->formatAliasesCapacity = subformatCount; 1060 } 1061 for (int32_t i=0; i<subformatCount; ++i) { 1062 t->formatAliases[i] = subformats[i].format; 1063 } 1064 cnt = subformatCount; 1065 return (const Format**)formatAliases; 1066 } 1067 1068 1069 StringEnumeration* 1070 MessageFormat::getFormatNames(UErrorCode& status) { 1071 if (U_FAILURE(status)) return NULL; 1072 1073 if (isArgNumeric) { 1074 status = U_ARGUMENT_TYPE_MISMATCH; 1075 return NULL; 1076 } 1077 UVector *fFormatNames = new UVector(status); 1078 if (U_FAILURE(status)) { 1079 status = U_MEMORY_ALLOCATION_ERROR; 1080 return NULL; 1081 } 1082 for (int32_t i=0; i<subformatCount; ++i) { 1083 fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status); 1084 } 1085 1086 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); 1087 return nameEnumerator; 1088 } 1089 1090 // ------------------------------------- 1091 // Formats the source Formattable array and copy into the result buffer. 1092 // Ignore the FieldPosition result for error checking. 1093 1094 UnicodeString& 1095 MessageFormat::format(const Formattable* source, 1096 int32_t cnt, 1097 UnicodeString& appendTo, 1098 FieldPosition& ignore, 1099 UErrorCode& success) const 1100 { 1101 if (U_FAILURE(success)) 1102 return appendTo; 1103 1104 return format(source, cnt, appendTo, ignore, 0, success); 1105 } 1106 1107 // ------------------------------------- 1108 // Internally creates a MessageFormat instance based on the 1109 // pattern and formats the arguments Formattable array and 1110 // copy into the appendTo buffer. 1111 1112 UnicodeString& 1113 MessageFormat::format( const UnicodeString& pattern, 1114 const Formattable* arguments, 1115 int32_t cnt, 1116 UnicodeString& appendTo, 1117 UErrorCode& success) 1118 { 1119 MessageFormat temp(pattern, success); 1120 FieldPosition ignore(0); 1121 temp.format(arguments, cnt, appendTo, ignore, success); 1122 return appendTo; 1123 } 1124 1125 // ------------------------------------- 1126 // Formats the source Formattable object and copy into the 1127 // appendTo buffer. The Formattable object must be an array 1128 // of Formattable instances, returns error otherwise. 1129 1130 UnicodeString& 1131 MessageFormat::format(const Formattable& source, 1132 UnicodeString& appendTo, 1133 FieldPosition& ignore, 1134 UErrorCode& success) const 1135 { 1136 int32_t cnt; 1137 1138 if (U_FAILURE(success)) 1139 return appendTo; 1140 if (source.getType() != Formattable::kArray) { 1141 success = U_ILLEGAL_ARGUMENT_ERROR; 1142 return appendTo; 1143 } 1144 const Formattable* tmpPtr = source.getArray(cnt); 1145 1146 return format(tmpPtr, cnt, appendTo, ignore, 0, success); 1147 } 1148 1149 1150 UnicodeString& 1151 MessageFormat::format(const UnicodeString* argumentNames, 1152 const Formattable* arguments, 1153 int32_t count, 1154 UnicodeString& appendTo, 1155 UErrorCode& success) const { 1156 FieldPosition ignore(0); 1157 return format(arguments, argumentNames, count, appendTo, ignore, 0, success); 1158 } 1159 1160 UnicodeString& 1161 MessageFormat::format(const Formattable* arguments, 1162 int32_t cnt, 1163 UnicodeString& appendTo, 1164 FieldPosition& status, 1165 int32_t recursionProtection, 1166 UErrorCode& success) const 1167 { 1168 return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); 1169 } 1170 1171 // ------------------------------------- 1172 // Formats the arguments Formattable array and copy into the appendTo buffer. 1173 // Ignore the FieldPosition result for error checking. 1174 1175 UnicodeString& 1176 MessageFormat::format(const Formattable* arguments, 1177 const UnicodeString *argumentNames, 1178 int32_t cnt, 1179 UnicodeString& appendTo, 1180 FieldPosition& status, 1181 int32_t recursionProtection, 1182 UErrorCode& success) const 1183 { 1184 int32_t lastOffset = 0; 1185 int32_t argumentNumber=0; 1186 if (cnt < 0 || (cnt && arguments == NULL)) { 1187 success = U_ILLEGAL_ARGUMENT_ERROR; 1188 return appendTo; 1189 } 1190 1191 if ( !isArgNumeric && argumentNames== NULL ) { 1192 success = U_ILLEGAL_ARGUMENT_ERROR; 1193 return appendTo; 1194 } 1195 1196 const Formattable *obj=NULL; 1197 for (int32_t i=0; i<subformatCount; ++i) { 1198 // Append the prefix of current format element. 1199 appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset); 1200 lastOffset = subformats[i].offset; 1201 obj = NULL; 1202 if (isArgNumeric) { 1203 argumentNumber = subformats[i].argNum; 1204 1205 // Checks the scope of the argument number. 1206 if (argumentNumber >= cnt) { 1207 appendTo += LEFT_CURLY_BRACE; 1208 itos(argumentNumber, appendTo); 1209 appendTo += RIGHT_CURLY_BRACE; 1210 continue; 1211 } 1212 obj = arguments+argumentNumber; 1213 } 1214 else { 1215 for (int32_t j=0; j<cnt; ++j) { 1216 if (argumentNames[j]== *subformats[i].argName ) { 1217 obj = arguments+j; 1218 break; 1219 } 1220 } 1221 if (obj == NULL ) { 1222 appendTo += LEFT_CURLY_BRACE; 1223 appendTo += *subformats[i].argName; 1224 appendTo += RIGHT_CURLY_BRACE; 1225 continue; 1226 1227 } 1228 } 1229 Formattable::Type type = obj->getType(); 1230 1231 // Recursively calling the format process only if the current 1232 // format argument refers to either of the following: 1233 // a ChoiceFormat object, a PluralFormat object, a SelectFormat object. 1234 Format* fmt = subformats[i].format; 1235 if (fmt != NULL) { 1236 UnicodeString argNum; 1237 fmt->format(*obj, argNum, success); 1238 1239 // Needs to reprocess the ChoiceFormat and PluralFormat and SelectFormat option by using the 1240 // MessageFormat pattern application. 1241 if ((dynamic_cast<ChoiceFormat*>(fmt) != NULL || 1242 dynamic_cast<PluralFormat*>(fmt) != NULL || 1243 dynamic_cast<SelectFormat*>(fmt) != NULL) && 1244 argNum.indexOf(LEFT_CURLY_BRACE) >= 0 1245 ) { 1246 MessageFormat temp(argNum, fLocale, success); 1247 // TODO: Implement recursion protection 1248 if ( isArgNumeric ) { 1249 temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); 1250 } 1251 else { 1252 temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success); 1253 } 1254 if (U_FAILURE(success)) { 1255 return appendTo; 1256 } 1257 } 1258 else { 1259 appendTo += argNum; 1260 } 1261 } 1262 // If the obj data type is a number, use a NumberFormat instance. 1263 else if ((type == Formattable::kDouble) || 1264 (type == Formattable::kLong) || 1265 (type == Formattable::kInt64)) { 1266 1267 const NumberFormat* nf = getDefaultNumberFormat(success); 1268 if (nf == NULL) { 1269 return appendTo; 1270 } 1271 if (type == Formattable::kDouble) { 1272 nf->format(obj->getDouble(), appendTo); 1273 } else if (type == Formattable::kLong) { 1274 nf->format(obj->getLong(), appendTo); 1275 } else { 1276 nf->format(obj->getInt64(), appendTo); 1277 } 1278 } 1279 // If the obj data type is a Date instance, use a DateFormat instance. 1280 else if (type == Formattable::kDate) { 1281 const DateFormat* df = getDefaultDateFormat(success); 1282 if (df == NULL) { 1283 return appendTo; 1284 } 1285 df->format(obj->getDate(), appendTo); 1286 } 1287 else if (type == Formattable::kString) { 1288 appendTo += obj->getString(); 1289 } 1290 else { 1291 success = U_ILLEGAL_ARGUMENT_ERROR; 1292 return appendTo; 1293 } 1294 } 1295 // Appends the rest of the pattern characters after the real last offset. 1296 appendTo.append(fPattern, lastOffset, 0x7fffffff); 1297 return appendTo; 1298 } 1299 1300 1301 // ------------------------------------- 1302 // Parses the source pattern and returns the Formattable objects array, 1303 // the array count and the ending parse position. The caller of this method 1304 // owns the array. 1305 1306 Formattable* 1307 MessageFormat::parse(const UnicodeString& source, 1308 ParsePosition& pos, 1309 int32_t& count) const 1310 { 1311 // Allocate at least one element. Allocating an array of length 1312 // zero causes problems on some platforms (e.g. Win32). 1313 Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1]; 1314 int32_t patternOffset = 0; 1315 int32_t sourceOffset = pos.getIndex(); 1316 ParsePosition tempPos(0); 1317 count = 0; // {sfb} reset to zero 1318 int32_t len; 1319 // If resultArray could not be created, exit out. 1320 // Avoid crossing initialization of variables above. 1321 if (resultArray == NULL) { 1322 goto PARSE_ERROR; 1323 } 1324 for (int32_t i = 0; i < subformatCount; ++i) { 1325 // match up to format 1326 len = subformats[i].offset - patternOffset; 1327 if (len == 0 || 1328 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { 1329 sourceOffset += len; 1330 patternOffset += len; 1331 } 1332 else { 1333 goto PARSE_ERROR; 1334 } 1335 1336 // now use format 1337 Format* fmt = subformats[i].format; 1338 int32_t argNum = subformats[i].argNum; 1339 if (fmt == NULL) { // string format 1340 // if at end, use longest possible match 1341 // otherwise uses first match to intervening string 1342 // does NOT recursively try all possibilities 1343 int32_t tempLength = (i+1<subformatCount) ? 1344 subformats[i+1].offset : fPattern.length(); 1345 1346 int32_t next; 1347 if (patternOffset >= tempLength) { 1348 next = source.length(); 1349 } 1350 else { 1351 UnicodeString buffer; 1352 fPattern.extract(patternOffset,tempLength - patternOffset, buffer); 1353 next = source.indexOf(buffer, sourceOffset); 1354 } 1355 1356 if (next < 0) { 1357 goto PARSE_ERROR; 1358 } 1359 else { 1360 UnicodeString buffer; 1361 source.extract(sourceOffset,next - sourceOffset, buffer); 1362 UnicodeString strValue = buffer; 1363 UnicodeString temp(LEFT_CURLY_BRACE); 1364 // {sfb} check this later 1365 if (isArgNumeric) { 1366 itos(argNum, temp); 1367 } 1368 else { 1369 temp+=(*subformats[i].argName); 1370 } 1371 temp += RIGHT_CURLY_BRACE; 1372 if (strValue != temp) { 1373 source.extract(sourceOffset,next - sourceOffset, buffer); 1374 resultArray[argNum].setString(buffer); 1375 // {sfb} not sure about this 1376 if ((argNum + 1) > count) { 1377 count = argNum + 1; 1378 } 1379 } 1380 sourceOffset = next; 1381 } 1382 } 1383 else { 1384 tempPos.setIndex(sourceOffset); 1385 fmt->parseObject(source, resultArray[argNum], tempPos); 1386 if (tempPos.getIndex() == sourceOffset) { 1387 goto PARSE_ERROR; 1388 } 1389 1390 if ((argNum + 1) > count) { 1391 count = argNum + 1; 1392 } 1393 sourceOffset = tempPos.getIndex(); // update 1394 } 1395 } 1396 len = fPattern.length() - patternOffset; 1397 if (len == 0 || 1398 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { 1399 pos.setIndex(sourceOffset + len); 1400 return resultArray; 1401 } 1402 // else fall through... 1403 1404 PARSE_ERROR: 1405 pos.setErrorIndex(sourceOffset); 1406 delete [] resultArray; 1407 count = 0; 1408 return NULL; // leave index as is to signal error 1409 } 1410 1411 // ------------------------------------- 1412 // Parses the source string and returns the array of 1413 // Formattable objects and the array count. The caller 1414 // owns the returned array. 1415 1416 Formattable* 1417 MessageFormat::parse(const UnicodeString& source, 1418 int32_t& cnt, 1419 UErrorCode& success) const 1420 { 1421 if (!isArgNumeric ) { 1422 success = U_ARGUMENT_TYPE_MISMATCH; 1423 return NULL; 1424 } 1425 ParsePosition status(0); 1426 // Calls the actual implementation method and starts 1427 // from zero offset of the source text. 1428 Formattable* result = parse(source, status, cnt); 1429 if (status.getIndex() == 0) { 1430 success = U_MESSAGE_PARSE_ERROR; 1431 delete[] result; 1432 return NULL; 1433 } 1434 return result; 1435 } 1436 1437 // ------------------------------------- 1438 // Parses the source text and copy into the result buffer. 1439 1440 void 1441 MessageFormat::parseObject( const UnicodeString& source, 1442 Formattable& result, 1443 ParsePosition& status) const 1444 { 1445 int32_t cnt = 0; 1446 Formattable* tmpResult = parse(source, status, cnt); 1447 if (tmpResult != NULL) 1448 result.adoptArray(tmpResult, cnt); 1449 } 1450 1451 UnicodeString 1452 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) { 1453 UnicodeString result; 1454 if (U_SUCCESS(status)) { 1455 int32_t plen = pattern.length(); 1456 const UChar* pat = pattern.getBuffer(); 1457 int32_t blen = plen * 2 + 1; // space for null termination, convenience 1458 UChar* buf = result.getBuffer(blen); 1459 if (buf == NULL) { 1460 status = U_MEMORY_ALLOCATION_ERROR; 1461 } else { 1462 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); 1463 result.releaseBuffer(U_SUCCESS(status) ? len : 0); 1464 } 1465 } 1466 if (U_FAILURE(status)) { 1467 result.setToBogus(); 1468 } 1469 return result; 1470 } 1471 1472 // ------------------------------------- 1473 1474 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) { 1475 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec); 1476 if (fmt == NULL) { 1477 ec = U_MEMORY_ALLOCATION_ERROR; 1478 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) { 1479 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set 1480 fmt->setDefaultRuleSet(defaultRuleSet, localStatus); 1481 } 1482 return fmt; 1483 } 1484 1485 /** 1486 * Reads the segments[] array (see applyPattern()) and parses the 1487 * segments[1..3] into a Format* object. Stores the format object in 1488 * the subformats[] array. Updates the argTypes[] array type 1489 * information for the corresponding argument. 1490 * 1491 * @param formatNumber index into subformats[] for this format 1492 * @param segments array of strings with the parsed pattern segments 1493 * @param parseError parse error data (output param) 1494 * @param ec error code 1495 */ 1496 void 1497 MessageFormat::makeFormat(int32_t formatNumber, 1498 UnicodeString* segments, 1499 UParseError& parseError, 1500 UErrorCode& ec) { 1501 if (U_FAILURE(ec)) { 1502 return; 1503 } 1504 1505 // Parse the argument number 1506 int32_t argumentNumber = stou(segments[1]); // always unlocalized! 1507 UnicodeString argumentName; 1508 if (argumentNumber < 0) { 1509 if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) { 1510 ec = U_INVALID_FORMAT_ERROR; 1511 return; 1512 } 1513 isArgNumeric = FALSE; 1514 argumentNumber=formatNumber; 1515 } 1516 if (!isArgNumeric) { 1517 if ( !isLegalArgName(segments[1]) ) { 1518 ec = U_INVALID_FORMAT_ERROR; 1519 return; 1520 } 1521 argumentName = segments[1]; 1522 } 1523 1524 // Parse the format, recording the argument type and creating a 1525 // new Format object (except for string arguments). 1526 Formattable::Type argType; 1527 Format *fmt = NULL; 1528 int32_t typeID, styleID; 1529 DateFormat::EStyle style; 1530 UnicodeString unquotedPattern, quotedPattern; 1531 UBool inQuote = FALSE; 1532 1533 switch (typeID = findKeyword(segments[2], TYPE_IDS)) { 1534 1535 case 0: // string 1536 argType = Formattable::kString; 1537 break; 1538 1539 case 1: // number 1540 argType = Formattable::kDouble; 1541 1542 switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) { 1543 case 0: // default 1544 fmt = NumberFormat::createInstance(fLocale, ec); 1545 break; 1546 case 1: // currency 1547 fmt = NumberFormat::createCurrencyInstance(fLocale, ec); 1548 break; 1549 case 2: // percent 1550 fmt = NumberFormat::createPercentInstance(fLocale, ec); 1551 break; 1552 case 3: // integer 1553 argType = Formattable::kLong; 1554 fmt = createIntegerFormat(fLocale, ec); 1555 break; 1556 default: // pattern 1557 fmt = NumberFormat::createInstance(fLocale, ec); 1558 if (fmt) { 1559 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt); 1560 if (decfmt != NULL) { 1561 decfmt->applyPattern(segments[3],parseError,ec); 1562 } 1563 } 1564 break; 1565 } 1566 break; 1567 1568 case 2: // date 1569 case 3: // time 1570 argType = Formattable::kDate; 1571 styleID = findKeyword(segments[3], DATE_STYLE_IDS); 1572 style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; 1573 1574 if (typeID == 2) { 1575 fmt = DateFormat::createDateInstance(style, fLocale); 1576 } else { 1577 fmt = DateFormat::createTimeInstance(style, fLocale); 1578 } 1579 1580 if (styleID < 0 && fmt != NULL) { 1581 SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt); 1582 if (sdtfmt != NULL) { 1583 sdtfmt->applyPattern(segments[3]); 1584 } 1585 } 1586 break; 1587 1588 case 4: // choice 1589 argType = Formattable::kDouble; 1590 1591 fmt = new ChoiceFormat(segments[3], parseError, ec); 1592 break; 1593 1594 case 5: // spellout 1595 argType = Formattable::kDouble; 1596 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec); 1597 break; 1598 case 6: // ordinal 1599 argType = Formattable::kDouble; 1600 fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec); 1601 break; 1602 case 7: // duration 1603 argType = Formattable::kDouble; 1604 fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec); 1605 break; 1606 case 8: // plural 1607 case 9: // Select 1608 if(typeID == 8) 1609 argType = Formattable::kDouble; 1610 else 1611 argType = Formattable::kString; 1612 quotedPattern = segments[3]; 1613 for (int32_t i = 0; i < quotedPattern.length(); ++i) { 1614 UChar ch = quotedPattern.charAt(i); 1615 if (ch == SINGLE_QUOTE) { 1616 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) { 1617 unquotedPattern+=ch; 1618 ++i; 1619 } 1620 else { 1621 inQuote = !inQuote; 1622 } 1623 } 1624 else { 1625 unquotedPattern += ch; 1626 } 1627 } 1628 if(typeID == 8) 1629 fmt = new PluralFormat(fLocale, unquotedPattern, ec); 1630 else 1631 fmt = new SelectFormat(unquotedPattern, ec); 1632 break; 1633 default: 1634 argType = Formattable::kString; 1635 ec = U_ILLEGAL_ARGUMENT_ERROR; 1636 break; 1637 } 1638 1639 if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) { 1640 ec = U_MEMORY_ALLOCATION_ERROR; 1641 } 1642 1643 if (!allocateSubformats(formatNumber+1) || 1644 !allocateArgTypes(argumentNumber+1)) { 1645 ec = U_MEMORY_ALLOCATION_ERROR; 1646 } 1647 1648 if (U_FAILURE(ec)) { 1649 delete fmt; 1650 return; 1651 } 1652 1653 // Parse succeeded; record results in our arrays 1654 subformats[formatNumber].format = fmt; 1655 subformats[formatNumber].offset = segments[0].length(); 1656 if (isArgNumeric) { 1657 subformats[formatNumber].argName = NULL; 1658 subformats[formatNumber].argNum = argumentNumber; 1659 } 1660 else { 1661 subformats[formatNumber].argName = new UnicodeString(argumentName); 1662 subformats[formatNumber].argNum = -1; 1663 } 1664 subformatCount = formatNumber+1; 1665 1666 // Careful here: argumentNumber may in general arrive out of 1667 // sequence, e.g., "There was {2} on {0,date} (see {1,number})." 1668 argTypes[argumentNumber] = argType; 1669 if (argumentNumber+1 > argTypeCount) { 1670 argTypeCount = argumentNumber+1; 1671 } 1672 } 1673 1674 // ------------------------------------- 1675 // Finds the string, s, in the string array, list. 1676 int32_t MessageFormat::findKeyword(const UnicodeString& s, 1677 const UChar * const *list) 1678 { 1679 if (s.length() == 0) 1680 return 0; // default 1681 1682 UnicodeString buffer = s; 1683 // Trims the space characters and turns all characters 1684 // in s to lower case. 1685 buffer.trim().toLower(""); 1686 for (int32_t i = 0; list[i]; ++i) { 1687 if (!buffer.compare(list[i], u_strlen(list[i]))) { 1688 return i; 1689 } 1690 } 1691 return -1; 1692 } 1693 1694 // ------------------------------------- 1695 // Checks the range of the source text to quote the special 1696 // characters, { and ' and copy to target buffer. 1697 1698 void 1699 MessageFormat::copyAndFixQuotes(const UnicodeString& source, 1700 int32_t start, 1701 int32_t end, 1702 UnicodeString& appendTo) 1703 { 1704 UBool gotLB = FALSE; 1705 1706 for (int32_t i = start; i < end; ++i) { 1707 UChar ch = source[i]; 1708 if (ch == LEFT_CURLY_BRACE) { 1709 appendTo += SINGLE_QUOTE; 1710 appendTo += LEFT_CURLY_BRACE; 1711 appendTo += SINGLE_QUOTE; 1712 gotLB = TRUE; 1713 } 1714 else if (ch == RIGHT_CURLY_BRACE) { 1715 if(gotLB) { 1716 appendTo += RIGHT_CURLY_BRACE; 1717 gotLB = FALSE; 1718 } 1719 else { 1720 // orig code. 1721 appendTo += SINGLE_QUOTE; 1722 appendTo += RIGHT_CURLY_BRACE; 1723 appendTo += SINGLE_QUOTE; 1724 } 1725 } 1726 else if (ch == SINGLE_QUOTE) { 1727 appendTo += SINGLE_QUOTE; 1728 appendTo += SINGLE_QUOTE; 1729 } 1730 else { 1731 appendTo += ch; 1732 } 1733 } 1734 } 1735 1736 /** 1737 * Convenience method that ought to be in NumberFormat 1738 */ 1739 NumberFormat* 1740 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const { 1741 NumberFormat *temp = NumberFormat::createInstance(locale, status); 1742 DecimalFormat *temp2; 1743 if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) { 1744 temp2->setMaximumFractionDigits(0); 1745 temp2->setDecimalSeparatorAlwaysShown(FALSE); 1746 temp2->setParseIntegerOnly(TRUE); 1747 } 1748 1749 return temp; 1750 } 1751 1752 /** 1753 * Return the default number format. Used to format a numeric 1754 * argument when subformats[i].format is NULL. Returns NULL 1755 * on failure. 1756 * 1757 * Semantically const but may modify *this. 1758 */ 1759 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const { 1760 if (defaultNumberFormat == NULL) { 1761 MessageFormat* t = (MessageFormat*) this; 1762 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec); 1763 if (U_FAILURE(ec)) { 1764 delete t->defaultNumberFormat; 1765 t->defaultNumberFormat = NULL; 1766 } else if (t->defaultNumberFormat == NULL) { 1767 ec = U_MEMORY_ALLOCATION_ERROR; 1768 } 1769 } 1770 return defaultNumberFormat; 1771 } 1772 1773 /** 1774 * Return the default date format. Used to format a date 1775 * argument when subformats[i].format is NULL. Returns NULL 1776 * on failure. 1777 * 1778 * Semantically const but may modify *this. 1779 */ 1780 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { 1781 if (defaultDateFormat == NULL) { 1782 MessageFormat* t = (MessageFormat*) this; 1783 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale); 1784 if (t->defaultDateFormat == NULL) { 1785 ec = U_MEMORY_ALLOCATION_ERROR; 1786 } 1787 } 1788 return defaultDateFormat; 1789 } 1790 1791 UBool 1792 MessageFormat::usesNamedArguments() const { 1793 return !isArgNumeric; 1794 } 1795 1796 UBool 1797 MessageFormat::isLegalArgName(const UnicodeString& argName) const { 1798 if(!u_hasBinaryProperty(argName.charAt(0), idStart)) { 1799 return FALSE; 1800 } 1801 for (int32_t i=1; i<argName.length(); ++i) { 1802 if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) { 1803 return FALSE; 1804 } 1805 } 1806 return TRUE; 1807 } 1808 1809 int32_t 1810 MessageFormat::getArgTypeCount() const { 1811 return argTypeCount; 1812 } 1813 1814 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { 1815 pos=0; 1816 fFormatNames = fNameList; 1817 } 1818 1819 const UnicodeString* 1820 FormatNameEnumeration::snext(UErrorCode& status) { 1821 if (U_SUCCESS(status) && pos < fFormatNames->size()) { 1822 return (const UnicodeString*)fFormatNames->elementAt(pos++); 1823 } 1824 return NULL; 1825 } 1826 1827 void 1828 FormatNameEnumeration::reset(UErrorCode& /*status*/) { 1829 pos=0; 1830 } 1831 1832 int32_t 1833 FormatNameEnumeration::count(UErrorCode& /*status*/) const { 1834 return (fFormatNames==NULL) ? 0 : fFormatNames->size(); 1835 } 1836 1837 FormatNameEnumeration::~FormatNameEnumeration() { 1838 UnicodeString *s; 1839 for (int32_t i=0; i<fFormatNames->size(); ++i) { 1840 if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) { 1841 delete s; 1842 } 1843 } 1844 delete fFormatNames; 1845 } 1846 U_NAMESPACE_END 1847 1848 #endif /* #if !UCONFIG_NO_FORMATTING */ 1849 1850 //eof 1851