1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************** 6 * 7 * File MSGFMT.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/19/97 aliu Converted from java. 13 * 03/20/97 helena Finished first cut of implementation. 14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. 15 * 06/11/97 helena Fixed addPattern to take the pattern correctly. 16 * 06/17/97 helena Fixed the getPattern to return the correct pattern. 17 * 07/09/97 helena Made ParsePosition into a class. 18 * 02/22/99 stephen Removed character literals for EBCDIC safety 19 * 11/01/09 kirtig Added SelectFormat 20 ********************************************************************/ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_FORMATTING 25 26 #include "unicode/msgfmt.h" 27 #include "unicode/decimfmt.h" 28 #include "unicode/datefmt.h" 29 #include "unicode/smpdtfmt.h" 30 #include "unicode/choicfmt.h" 31 #include "unicode/plurfmt.h" 32 #include "unicode/selfmt.h" 33 #include "unicode/ustring.h" 34 #include "unicode/ucnv_err.h" 35 #include "unicode/uchar.h" 36 #include "unicode/umsg.h" 37 #include "unicode/rbnf.h" 38 #include "cmemory.h" 39 #include "msgfmt_impl.h" 40 #include "../common/util.h" 41 #include "uassert.h" 42 #include "ustrfmt.h" 43 #include "uvector.h" 44 45 //Todo:remove stdio 46 #include "stdio.h" 47 48 49 // ***************************************************************************** 50 // class MessageFormat 51 // ***************************************************************************** 52 53 #define COMMA ((UChar)0x002C) 54 #define SINGLE_QUOTE ((UChar)0x0027) 55 #define LEFT_CURLY_BRACE ((UChar)0x007B) 56 #define RIGHT_CURLY_BRACE ((UChar)0x007D) 57 58 //--------------------------------------- 59 // static data 60 61 static const UChar ID_EMPTY[] = { 62 0 /* empty string, used for default so that null can mark end of list */ 63 }; 64 65 static const UChar ID_NUMBER[] = { 66 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */ 67 }; 68 static const UChar ID_DATE[] = { 69 0x64, 0x61, 0x74, 0x65, 0 /* "date" */ 70 }; 71 static const UChar ID_TIME[] = { 72 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */ 73 }; 74 static const UChar ID_CHOICE[] = { 75 0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */ 76 }; 77 static const UChar ID_SPELLOUT[] = { 78 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */ 79 }; 80 static const UChar ID_ORDINAL[] = { 81 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */ 82 }; 83 static const UChar ID_DURATION[] = { 84 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ 85 }; 86 static const UChar ID_PLURAL[] = { 87 0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0 /* "plural" */ 88 }; 89 static const UChar ID_SELECT[] = { 90 0x73, 0x65, 0x6C, 0x65, 0x63, 0x74, 0 /* "select" */ 91 }; 92 93 // MessageFormat Type List Number, Date, Time or Choice 94 static const UChar * const TYPE_IDS[] = { 95 ID_EMPTY, 96 ID_NUMBER, 97 ID_DATE, 98 ID_TIME, 99 ID_CHOICE, 100 ID_SPELLOUT, 101 ID_ORDINAL, 102 ID_DURATION, 103 ID_PLURAL, 104 ID_SELECT, 105 NULL, 106 }; 107 108 static const UChar ID_CURRENCY[] = { 109 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */ 110 }; 111 static const UChar ID_PERCENT[] = { 112 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */ 113 }; 114 static const UChar ID_INTEGER[] = { 115 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */ 116 }; 117 118 // NumberFormat modifier list, default, currency, percent or integer 119 static const UChar * const NUMBER_STYLE_IDS[] = { 120 ID_EMPTY, 121 ID_CURRENCY, 122 ID_PERCENT, 123 ID_INTEGER, 124 NULL, 125 }; 126 127 static const UChar ID_SHORT[] = { 128 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */ 129 }; 130 static const UChar ID_MEDIUM[] = { 131 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */ 132 }; 133 static const UChar ID_LONG[] = { 134 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */ 135 }; 136 static const UChar ID_FULL[] = { 137 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */ 138 }; 139 140 // DateFormat modifier list, default, short, medium, long or full 141 static const UChar * const DATE_STYLE_IDS[] = { 142 ID_EMPTY, 143 ID_SHORT, 144 ID_MEDIUM, 145 ID_LONG, 146 ID_FULL, 147 NULL, 148 }; 149 150 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = { 151 U_NAMESPACE_QUALIFIER DateFormat::kDefault, 152 U_NAMESPACE_QUALIFIER DateFormat::kShort, 153 U_NAMESPACE_QUALIFIER DateFormat::kMedium, 154 U_NAMESPACE_QUALIFIER DateFormat::kLong, 155 U_NAMESPACE_QUALIFIER DateFormat::kFull, 156 }; 157 158 static const int32_t DEFAULT_INITIAL_CAPACITY = 10; 159 160 U_NAMESPACE_BEGIN 161 162 // ------------------------------------- 163 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat) 164 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) 165 166 //-------------------------------------------------------------------- 167 168 /** 169 * Convert a string to an unsigned decimal, ignoring rule whitespace. 170 * @return a non-negative number if successful, or a negative number 171 * upon failure. 172 */ 173 static int32_t stou(const UnicodeString& string) { 174 int32_t n = 0; 175 int32_t count = 0; 176 UChar32 c; 177 for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) { 178 c = string.char32At(i); 179 if (uprv_isRuleWhiteSpace(c)) { 180 continue; 181 } 182 int32_t d = u_digit(c, 10); 183 if (d < 0 || ++count > 10) { 184 return -1; 185 } 186 n = 10*n + d; 187 } 188 return n; 189 } 190 191 /** 192 * Convert an integer value to a string and append the result to 193 * the given UnicodeString. 194 */ 195 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) { 196 UChar temp[16]; 197 uprv_itou(temp,16,i,10,0); // 10 == radix 198 appendTo.append(temp); 199 return appendTo; 200 } 201 202 /* 203 * A structure representing one subformat of this MessageFormat. 204 * Each subformat has a Format object, an offset into the plain 205 * pattern text fPattern, and an argument number. The argument 206 * number corresponds to the array of arguments to be formatted. 207 * @internal 208 */ 209 class MessageFormat::Subformat : public UMemory { 210 public: 211 /** 212 * @internal 213 */ 214 Format* format; // formatter 215 /** 216 * @internal 217 */ 218 int32_t offset; // offset into fPattern 219 /** 220 * @internal 221 */ 222 // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number 223 int32_t argNum; // 0-based argument number 224 /** 225 * @internal 226 */ 227 UnicodeString* argName; // argument name or number 228 229 /** 230 * Clone that.format and assign it to this.format 231 * Do NOT delete this.format 232 * @internal 233 */ 234 Subformat& operator=(const Subformat& that) { 235 if (this != &that) { 236 format = that.format ? that.format->clone() : NULL; 237 offset = that.offset; 238 argNum = that.argNum; 239 argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL; 240 } 241 return *this; 242 } 243 244 /** 245 * @internal 246 */ 247 UBool operator==(const Subformat& that) const { 248 // Do cheap comparisons first 249 return offset == that.offset && 250 argNum == that.argNum && 251 ((argName == that.argName) || 252 (*argName == *that.argName)) && 253 ((format == that.format) || // handles NULL 254 (*format == *that.format)); 255 } 256 257 /** 258 * @internal 259 */ 260 UBool operator!=(const Subformat& that) const { 261 return !operator==(that); 262 } 263 }; 264 265 // ------------------------------------- 266 // Creates a MessageFormat instance based on the pattern. 267 268 MessageFormat::MessageFormat(const UnicodeString& pattern, 269 UErrorCode& success) 270 : fLocale(Locale::getDefault()), // Uses the default locale 271 formatAliases(NULL), 272 formatAliasesCapacity(0), 273 idStart(UCHAR_ID_START), 274 idContinue(UCHAR_ID_CONTINUE), 275 subformats(NULL), 276 subformatCount(0), 277 subformatCapacity(0), 278 argTypes(NULL), 279 argTypeCount(0), 280 argTypeCapacity(0), 281 isArgNumeric(TRUE), 282 defaultNumberFormat(NULL), 283 defaultDateFormat(NULL) 284 { 285 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 286 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 287 success = U_MEMORY_ALLOCATION_ERROR; 288 return; 289 } 290 applyPattern(pattern, success); 291 setLocaleIDs(fLocale.getName(), fLocale.getName()); 292 } 293 294 MessageFormat::MessageFormat(const UnicodeString& pattern, 295 const Locale& newLocale, 296 UErrorCode& success) 297 : fLocale(newLocale), 298 formatAliases(NULL), 299 formatAliasesCapacity(0), 300 idStart(UCHAR_ID_START), 301 idContinue(UCHAR_ID_CONTINUE), 302 subformats(NULL), 303 subformatCount(0), 304 subformatCapacity(0), 305 argTypes(NULL), 306 argTypeCount(0), 307 argTypeCapacity(0), 308 isArgNumeric(TRUE), 309 defaultNumberFormat(NULL), 310 defaultDateFormat(NULL) 311 { 312 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 313 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 314 success = U_MEMORY_ALLOCATION_ERROR; 315 return; 316 } 317 applyPattern(pattern, success); 318 setLocaleIDs(fLocale.getName(), fLocale.getName()); 319 } 320 321 MessageFormat::MessageFormat(const UnicodeString& pattern, 322 const Locale& newLocale, 323 UParseError& parseError, 324 UErrorCode& success) 325 : fLocale(newLocale), 326 formatAliases(NULL), 327 formatAliasesCapacity(0), 328 idStart(UCHAR_ID_START), 329 idContinue(UCHAR_ID_CONTINUE), 330 subformats(NULL), 331 subformatCount(0), 332 subformatCapacity(0), 333 argTypes(NULL), 334 argTypeCount(0), 335 argTypeCapacity(0), 336 isArgNumeric(TRUE), 337 defaultNumberFormat(NULL), 338 defaultDateFormat(NULL) 339 { 340 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || 341 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { 342 success = U_MEMORY_ALLOCATION_ERROR; 343 return; 344 } 345 applyPattern(pattern, parseError, success); 346 setLocaleIDs(fLocale.getName(), fLocale.getName()); 347 } 348 349 MessageFormat::MessageFormat(const MessageFormat& that) 350 : Format(that), 351 formatAliases(NULL), 352 formatAliasesCapacity(0), 353 idStart(UCHAR_ID_START), 354 idContinue(UCHAR_ID_CONTINUE), 355 subformats(NULL), 356 subformatCount(0), 357 subformatCapacity(0), 358 argTypes(NULL), 359 argTypeCount(0), 360 argTypeCapacity(0), 361 isArgNumeric(TRUE), 362 defaultNumberFormat(NULL), 363 defaultDateFormat(NULL) 364 { 365 *this = that; 366 } 367 368 MessageFormat::~MessageFormat() 369 { 370 int32_t idx; 371 for (idx = 0; idx < subformatCount; idx++) { 372 delete subformats[idx].format; 373 delete subformats[idx].argName; 374 } 375 uprv_free(subformats); 376 subformats = NULL; 377 subformatCount = subformatCapacity = 0; 378 379 uprv_free(argTypes); 380 argTypes = NULL; 381 argTypeCount = argTypeCapacity = 0; 382 383 uprv_free(formatAliases); 384 385 delete defaultNumberFormat; 386 delete defaultDateFormat; 387 } 388 389 //-------------------------------------------------------------------- 390 // Variable-size array management 391 392 /** 393 * Allocate subformats[] to at least the given capacity and return 394 * TRUE if successful. If not, leave subformats[] unchanged. 395 * 396 * If subformats is NULL, allocate it. If it is not NULL, enlarge it 397 * if necessary to be at least as large as specified. 398 */ 399 UBool MessageFormat::allocateSubformats(int32_t capacity) { 400 if (subformats == NULL) { 401 subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity); 402 subformatCapacity = capacity; 403 subformatCount = 0; 404 if (subformats == NULL) { 405 subformatCapacity = 0; 406 return FALSE; 407 } 408 } else if (subformatCapacity < capacity) { 409 if (capacity < 2*subformatCapacity) { 410 capacity = 2*subformatCapacity; 411 } 412 Subformat* a = (Subformat*) 413 uprv_realloc(subformats, sizeof(*subformats) * capacity); 414 if (a == NULL) { 415 return FALSE; // request failed 416 } 417 subformats = a; 418 subformatCapacity = capacity; 419 } 420 return TRUE; 421 } 422 423 /** 424 * Allocate argTypes[] to at least the given capacity and return 425 * TRUE if successful. If not, leave argTypes[] unchanged. 426 * 427 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it 428 * if necessary to be at least as large as specified. 429 */ 430 UBool MessageFormat::allocateArgTypes(int32_t capacity) { 431 if (argTypes == NULL) { 432 argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity); 433 argTypeCount = 0; 434 argTypeCapacity = capacity; 435 if (argTypes == NULL) { 436 argTypeCapacity = 0; 437 return FALSE; 438 } 439 for (int32_t i=0; i<capacity; ++i) { 440 argTypes[i] = Formattable::kString; 441 } 442 } else if (argTypeCapacity < capacity) { 443 if (capacity < 2*argTypeCapacity) { 444 capacity = 2*argTypeCapacity; 445 } 446 Formattable::Type* a = (Formattable::Type*) 447 uprv_realloc(argTypes, sizeof(*argTypes) * capacity); 448 if (a == NULL) { 449 return FALSE; // request failed 450 } 451 for (int32_t i=argTypeCapacity; i<capacity; ++i) { 452 a[i] = Formattable::kString; 453 } 454 argTypes = a; 455 argTypeCapacity = capacity; 456 } 457 return TRUE; 458 } 459 460 // ------------------------------------- 461 // assignment operator 462 463 const MessageFormat& 464 MessageFormat::operator=(const MessageFormat& that) 465 { 466 // Reallocate the arrays BEFORE changing this object 467 if (this != &that && 468 allocateSubformats(that.subformatCount) && 469 allocateArgTypes(that.argTypeCount)) { 470 471 // Calls the super class for assignment first. 472 Format::operator=(that); 473 474 fPattern = that.fPattern; 475 setLocale(that.fLocale); 476 isArgNumeric = that.isArgNumeric; 477 int32_t j; 478 for (j=0; j<subformatCount; ++j) { 479 delete subformats[j].format; 480 } 481 subformatCount = 0; 482 483 for (j=0; j<that.subformatCount; ++j) { 484 // Subformat::operator= does NOT delete this.format 485 subformats[j] = that.subformats[j]; 486 } 487 subformatCount = that.subformatCount; 488 489 for (j=0; j<that.argTypeCount; ++j) { 490 argTypes[j] = that.argTypes[j]; 491 } 492 argTypeCount = that.argTypeCount; 493 } 494 return *this; 495 } 496 497 UBool 498 MessageFormat::operator==(const Format& rhs) const 499 { 500 if (this == &rhs) return TRUE; 501 502 MessageFormat& that = (MessageFormat&)rhs; 503 504 // Check class ID before checking MessageFormat members 505 if (!Format::operator==(rhs) || 506 fPattern != that.fPattern || 507 fLocale != that.fLocale || 508 isArgNumeric != that.isArgNumeric) { 509 return FALSE; 510 } 511 512 int32_t j; 513 for (j=0; j<subformatCount; ++j) { 514 if (subformats[j] != that.subformats[j]) { 515 return FALSE; 516 } 517 } 518 519 return TRUE; 520 } 521 522 // ------------------------------------- 523 // Creates a copy of this MessageFormat, the caller owns the copy. 524 525 Format* 526 MessageFormat::clone() const 527 { 528 return new MessageFormat(*this); 529 } 530 531 // ------------------------------------- 532 // Sets the locale of this MessageFormat object to theLocale. 533 534 void 535 MessageFormat::setLocale(const Locale& theLocale) 536 { 537 if (fLocale != theLocale) { 538 delete defaultNumberFormat; 539 defaultNumberFormat = NULL; 540 delete defaultDateFormat; 541 defaultDateFormat = NULL; 542 } 543 fLocale = theLocale; 544 setLocaleIDs(fLocale.getName(), fLocale.getName()); 545 } 546 547 // ------------------------------------- 548 // Gets the locale of this MessageFormat object. 549 550 const Locale& 551 MessageFormat::getLocale() const 552 { 553 return fLocale; 554 } 555 556 557 558 559 void 560 MessageFormat::applyPattern(const UnicodeString& newPattern, 561 UErrorCode& status) 562 { 563 UParseError parseError; 564 applyPattern(newPattern,parseError,status); 565 } 566 567 568 // ------------------------------------- 569 // Applies the new pattern and returns an error if the pattern 570 // is not correct. 571 void 572 MessageFormat::applyPattern(const UnicodeString& pattern, 573 UParseError& parseError, 574 UErrorCode& ec) 575 { 576 if(U_FAILURE(ec)) { 577 return; 578 } 579 // The pattern is broken up into segments. Each time a subformat 580 // is encountered, 4 segments are recorded. For example, consider 581 // the pattern: 582 // "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}." 583 // The first set of segments is: 584 // segments[0] = "There " 585 // segments[1] = "0" 586 // segments[2] = "choice" 587 // segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files" 588 589 // During parsing, the plain text is accumulated into segments[0]. 590 // Segments 1..3 are used to parse each subpattern. Each time a 591 // subpattern is parsed, it creates a format object that is stored 592 // in the subformats array, together with an offset and argument 593 // number. The offset into the plain text stored in 594 // segments[0]. 595 596 // Quotes in segment 0 are handled normally. They are removed. 597 // Quotes may not occur in segments 1 or 2. 598 // Quotes in segment 3 are parsed and _copied_. This makes 599 // subformat patterns work, e.g., {1,number,'#'.##} passes 600 // the pattern "'#'.##" to DecimalFormat. 601 602 UnicodeString segments[4]; 603 int32_t part = 0; // segment we are in, 0..3 604 // Record the highest argument number in the pattern. (In the 605 // subpattern {3,number} the argument number is 3.) 606 int32_t formatNumber = 0; 607 UBool inQuote = FALSE; 608 int32_t braceStack = 0; 609 // Clear error struct 610 parseError.offset = -1; 611 parseError.preContext[0] = parseError.postContext[0] = (UChar)0; 612 int32_t patLen = pattern.length(); 613 int32_t i; 614 615 for (i=0; i<subformatCount; ++i) { 616 delete subformats[i].format; 617 } 618 subformatCount = 0; 619 argTypeCount = 0; 620 621 for (i=0; i<patLen; ++i) { 622 UChar ch = pattern[i]; 623 if (part == 0) { 624 // In segment 0, recognize and remove quotes 625 if (ch == SINGLE_QUOTE) { 626 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) { 627 segments[0] += ch; 628 ++i; 629 } else { 630 inQuote = !inQuote; 631 } 632 } else if (ch == LEFT_CURLY_BRACE && !inQuote) { 633 // The only way we get from segment 0 to 1 is via an 634 // unquoted '{'. 635 part = 1; 636 } else { 637 segments[0] += ch; 638 } 639 } else if (inQuote) { 640 // In segments 1..3, recognize quoted matter, and copy it 641 // into the segment, together with the quotes. This takes 642 // care of '' as well. 643 segments[part] += ch; 644 if (ch == SINGLE_QUOTE) { 645 inQuote = FALSE; 646 } 647 } else { 648 // We have an unquoted character in segment 1..3 649 switch (ch) { 650 case COMMA: 651 // Commas bump us to the next segment, except for segment 3, 652 // which can contain commas. See example above. 653 if (part < 3) 654 part += 1; 655 else 656 segments[3] += ch; 657 break; 658 case LEFT_CURLY_BRACE: 659 // Handle '{' within segment 3. The initial '{' 660 // before segment 1 is handled above. 661 if (part != 3) { 662 ec = U_PATTERN_SYNTAX_ERROR; 663 goto SYNTAX_ERROR; 664 } 665 ++braceStack; 666 segments[part] += ch; 667 break; 668 case RIGHT_CURLY_BRACE: 669 if (braceStack == 0) { 670 makeFormat(formatNumber, segments, parseError,ec); 671 if (U_FAILURE(ec)){ 672 goto SYNTAX_ERROR; 673 } 674 formatNumber++; 675 676 segments[1].remove(); 677 segments[2].remove(); 678 segments[3].remove(); 679 part = 0; 680 } else { 681 --braceStack; 682 segments[part] += ch; 683 } 684 break; 685 case SINGLE_QUOTE: 686 inQuote = TRUE; 687 // fall through (copy quote chars in segments 1..3) 688 default: 689 segments[part] += ch; 690 break; 691 } 692 } 693 } 694 if (braceStack != 0 || part != 0) { 695 // Unmatched braces in the pattern 696 ec = U_UNMATCHED_BRACES; 697 goto SYNTAX_ERROR; 698 } 699 fPattern = segments[0]; 700 return; 701 702 SYNTAX_ERROR: 703 syntaxError(pattern, i, parseError); 704 for (i=0; i<subformatCount; ++i) { 705 delete subformats[i].format; 706 } 707 argTypeCount = subformatCount = 0; 708 } 709 // ------------------------------------- 710 // Converts this MessageFormat instance to a pattern. 711 712 UnicodeString& 713 MessageFormat::toPattern(UnicodeString& appendTo) const { 714 // later, make this more extensible 715 int32_t lastOffset = 0; 716 int32_t i; 717 for (i=0; i<subformatCount; ++i) { 718 copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo); 719 lastOffset = subformats[i].offset; 720 appendTo += LEFT_CURLY_BRACE; 721 if (isArgNumeric) { 722 itos(subformats[i].argNum, appendTo); 723 } 724 else { 725 appendTo += *subformats[i].argName; 726 } 727 Format* fmt = subformats[i].format; 728 if (fmt == NULL) { 729 // do nothing, string format 730 } 731 else if (fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) { 732 733 UErrorCode ec = U_ZERO_ERROR; 734 NumberFormat& formatAlias = *(NumberFormat*)fmt; 735 NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec); 736 NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec); 737 NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec); 738 NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec); 739 740 appendTo += COMMA; 741 appendTo += ID_NUMBER; 742 if (formatAlias != *defaultTemplate) { 743 appendTo += COMMA; 744 if (formatAlias == *currencyTemplate) { 745 appendTo += ID_CURRENCY; 746 } 747 else if (formatAlias == *percentTemplate) { 748 appendTo += ID_PERCENT; 749 } 750 else if (formatAlias == *integerTemplate) { 751 appendTo += ID_INTEGER; 752 } 753 else { 754 UnicodeString buffer; 755 appendTo += ((DecimalFormat*)fmt)->toPattern(buffer); 756 } 757 } 758 759 delete defaultTemplate; 760 delete currencyTemplate; 761 delete percentTemplate; 762 delete integerTemplate; 763 } 764 else if (fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) { 765 DateFormat& formatAlias = *(DateFormat*)fmt; 766 DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale); 767 DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale); 768 DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale); 769 DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale); 770 DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale); 771 DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale); 772 DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale); 773 DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale); 774 775 776 appendTo += COMMA; 777 if (formatAlias == *defaultDateTemplate) { 778 appendTo += ID_DATE; 779 } 780 else if (formatAlias == *shortDateTemplate) { 781 appendTo += ID_DATE; 782 appendTo += COMMA; 783 appendTo += ID_SHORT; 784 } 785 else if (formatAlias == *defaultDateTemplate) { 786 appendTo += ID_DATE; 787 appendTo += COMMA; 788 appendTo += ID_MEDIUM; 789 } 790 else if (formatAlias == *longDateTemplate) { 791 appendTo += ID_DATE; 792 appendTo += COMMA; 793 appendTo += ID_LONG; 794 } 795 else if (formatAlias == *fullDateTemplate) { 796 appendTo += ID_DATE; 797 appendTo += COMMA; 798 appendTo += ID_FULL; 799 } 800 else if (formatAlias == *defaultTimeTemplate) { 801 appendTo += ID_TIME; 802 } 803 else if (formatAlias == *shortTimeTemplate) { 804 appendTo += ID_TIME; 805 appendTo += COMMA; 806 appendTo += ID_SHORT; 807 } 808 else if (formatAlias == *defaultTimeTemplate) { 809 appendTo += ID_TIME; 810 appendTo += COMMA; 811 appendTo += ID_MEDIUM; 812 } 813 else if (formatAlias == *longTimeTemplate) { 814 appendTo += ID_TIME; 815 appendTo += COMMA; 816 appendTo += ID_LONG; 817 } 818 else if (formatAlias == *fullTimeTemplate) { 819 appendTo += ID_TIME; 820 appendTo += COMMA; 821 appendTo += ID_FULL; 822 } 823 else { 824 UnicodeString buffer; 825 appendTo += ID_DATE; 826 appendTo += COMMA; 827 appendTo += ((SimpleDateFormat*)fmt)->toPattern(buffer); 828 } 829 830 delete defaultDateTemplate; 831 delete shortDateTemplate; 832 delete longDateTemplate; 833 delete fullDateTemplate; 834 delete defaultTimeTemplate; 835 delete shortTimeTemplate; 836 delete longTimeTemplate; 837 delete fullTimeTemplate; 838 // {sfb} there should be a more efficient way to do this! 839 } 840 else if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID()) { 841 UnicodeString buffer; 842 appendTo += COMMA; 843 appendTo += ID_CHOICE; 844 appendTo += COMMA; 845 appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer); 846 } 847 else if (fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) { 848 UnicodeString buffer; 849 appendTo += ((PluralFormat*)fmt)->toPattern(buffer); 850 } 851 else if (fmt->getDynamicClassID() == SelectFormat::getStaticClassID()) { 852 UnicodeString buffer; 853 appendTo += ((SelectFormat*)fmt)->toPattern(buffer); 854 } 855 else { 856 //appendTo += ", unknown"; 857 } 858 appendTo += RIGHT_CURLY_BRACE; 859 } 860 copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo); 861 return appendTo; 862 } 863 864 // ------------------------------------- 865 // Adopts the new formats array and updates the array count. 866 // This MessageFormat instance owns the new formats. 867 868 void 869 MessageFormat::adoptFormats(Format** newFormats, 870 int32_t count) { 871 if (newFormats == NULL || count < 0) { 872 return; 873 } 874 875 int32_t i; 876 if (allocateSubformats(count)) { 877 for (i=0; i<subformatCount; ++i) { 878 delete subformats[i].format; 879 } 880 for (i=0; i<count; ++i) { 881 subformats[i].format = newFormats[i]; 882 } 883 subformatCount = count; 884 } else { 885 // An adopt method must always take ownership. Delete 886 // the incoming format objects and return unchanged. 887 for (i=0; i<count; ++i) { 888 delete newFormats[i]; 889 } 890 } 891 892 // TODO: What about the .offset and .argNum fields? 893 } 894 895 // ------------------------------------- 896 // Sets the new formats array and updates the array count. 897 // This MessageFormat instance maks a copy of the new formats. 898 899 void 900 MessageFormat::setFormats(const Format** newFormats, 901 int32_t count) { 902 if (newFormats == NULL || count < 0) { 903 return; 904 } 905 906 if (allocateSubformats(count)) { 907 int32_t i; 908 for (i=0; i<subformatCount; ++i) { 909 delete subformats[i].format; 910 } 911 subformatCount = 0; 912 913 for (i=0; i<count; ++i) { 914 subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL; 915 } 916 subformatCount = count; 917 } 918 919 // TODO: What about the .offset and .arg fields? 920 } 921 922 // ------------------------------------- 923 // Adopt a single format by format number. 924 // Do nothing if the format number is not less than the array count. 925 926 void 927 MessageFormat::adoptFormat(int32_t n, Format *newFormat) { 928 if (n < 0 || n >= subformatCount) { 929 delete newFormat; 930 } else { 931 delete subformats[n].format; 932 subformats[n].format = newFormat; 933 } 934 } 935 936 // ------------------------------------- 937 // Adopt a single format by format name. 938 // Do nothing if there is no match of formatName. 939 void 940 MessageFormat::adoptFormat(const UnicodeString& formatName, 941 Format* formatToAdopt, 942 UErrorCode& status) { 943 if (isArgNumeric ) { 944 int32_t argumentNumber = stou(formatName); 945 if (argumentNumber<0) { 946 status = U_ARGUMENT_TYPE_MISMATCH; 947 return; 948 } 949 adoptFormat(argumentNumber, formatToAdopt); 950 return; 951 } 952 for (int32_t i=0; i<subformatCount; ++i) { 953 if (formatName==*subformats[i].argName) { 954 delete subformats[i].format; 955 if ( formatToAdopt== NULL) { 956 // This should never happen -- but we'll be nice if it does 957 subformats[i].format = NULL; 958 } else { 959 subformats[i].format = formatToAdopt; 960 } 961 } 962 } 963 } 964 965 // ------------------------------------- 966 // Set a single format. 967 // Do nothing if the variable is not less than the array count. 968 969 void 970 MessageFormat::setFormat(int32_t n, const Format& newFormat) { 971 if (n >= 0 && n < subformatCount) { 972 delete subformats[n].format; 973 if (&newFormat == NULL) { 974 // This should never happen -- but we'll be nice if it does 975 subformats[n].format = NULL; 976 } else { 977 subformats[n].format = newFormat.clone(); 978 } 979 } 980 } 981 982 // ------------------------------------- 983 // Get a single format by format name. 984 // Do nothing if the variable is not less than the array count. 985 Format * 986 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) { 987 988 if (U_FAILURE(status)) return NULL; 989 990 if (isArgNumeric ) { 991 int32_t argumentNumber = stou(formatName); 992 if (argumentNumber<0) { 993 status = U_ARGUMENT_TYPE_MISMATCH; 994 return NULL; 995 } 996 if (argumentNumber < 0 || argumentNumber >= subformatCount) { 997 return subformats[argumentNumber].format; 998 } 999 else { 1000 return NULL; 1001 } 1002 } 1003 1004 for (int32_t i=0; i<subformatCount; ++i) { 1005 if (formatName==*subformats[i].argName) 1006 { 1007 return subformats[i].format; 1008 } 1009 } 1010 return NULL; 1011 } 1012 1013 // ------------------------------------- 1014 // Set a single format by format name 1015 // Do nothing if the variable is not less than the array count. 1016 void 1017 MessageFormat::setFormat(const UnicodeString& formatName, 1018 const Format& newFormat, 1019 UErrorCode& status) { 1020 if (isArgNumeric) { 1021 status = U_ARGUMENT_TYPE_MISMATCH; 1022 return; 1023 } 1024 for (int32_t i=0; i<subformatCount; ++i) { 1025 if (formatName==*subformats[i].argName) 1026 { 1027 delete subformats[i].format; 1028 if (&newFormat == NULL) { 1029 // This should never happen -- but we'll be nice if it does 1030 subformats[i].format = NULL; 1031 } else { 1032 subformats[i].format = newFormat.clone(); 1033 } 1034 break; 1035 } 1036 } 1037 } 1038 1039 // ------------------------------------- 1040 // Gets the format array. 1041 1042 const Format** 1043 MessageFormat::getFormats(int32_t& cnt) const 1044 { 1045 // This old API returns an array (which we hold) of Format* 1046 // pointers. The array is valid up to the next call to any 1047 // method on this object. We construct and resize an array 1048 // on demand that contains aliases to the subformats[i].format 1049 // pointers. 1050 MessageFormat* t = (MessageFormat*) this; 1051 cnt = 0; 1052 if (formatAliases == NULL) { 1053 t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount; 1054 Format** a = (Format**) 1055 uprv_malloc(sizeof(Format*) * formatAliasesCapacity); 1056 if (a == NULL) { 1057 return NULL; 1058 } 1059 t->formatAliases = a; 1060 } else if (subformatCount > formatAliasesCapacity) { 1061 Format** a = (Format**) 1062 uprv_realloc(formatAliases, sizeof(Format*) * subformatCount); 1063 if (a == NULL) { 1064 return NULL; 1065 } 1066 t->formatAliases = a; 1067 t->formatAliasesCapacity = subformatCount; 1068 } 1069 for (int32_t i=0; i<subformatCount; ++i) { 1070 t->formatAliases[i] = subformats[i].format; 1071 } 1072 cnt = subformatCount; 1073 return (const Format**)formatAliases; 1074 } 1075 1076 1077 StringEnumeration* 1078 MessageFormat::getFormatNames(UErrorCode& status) { 1079 if (U_FAILURE(status)) return NULL; 1080 1081 if (isArgNumeric) { 1082 status = U_ARGUMENT_TYPE_MISMATCH; 1083 return NULL; 1084 } 1085 UVector *fFormatNames = new UVector(status); 1086 if (U_FAILURE(status)) { 1087 status = U_MEMORY_ALLOCATION_ERROR; 1088 return NULL; 1089 } 1090 for (int32_t i=0; i<subformatCount; ++i) { 1091 fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status); 1092 } 1093 1094 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); 1095 return nameEnumerator; 1096 } 1097 1098 // ------------------------------------- 1099 // Formats the source Formattable array and copy into the result buffer. 1100 // Ignore the FieldPosition result for error checking. 1101 1102 UnicodeString& 1103 MessageFormat::format(const Formattable* source, 1104 int32_t cnt, 1105 UnicodeString& appendTo, 1106 FieldPosition& ignore, 1107 UErrorCode& success) const 1108 { 1109 if (U_FAILURE(success)) 1110 return appendTo; 1111 1112 return format(source, cnt, appendTo, ignore, 0, success); 1113 } 1114 1115 // ------------------------------------- 1116 // Internally creates a MessageFormat instance based on the 1117 // pattern and formats the arguments Formattable array and 1118 // copy into the appendTo buffer. 1119 1120 UnicodeString& 1121 MessageFormat::format( const UnicodeString& pattern, 1122 const Formattable* arguments, 1123 int32_t cnt, 1124 UnicodeString& appendTo, 1125 UErrorCode& success) 1126 { 1127 MessageFormat temp(pattern, success); 1128 FieldPosition ignore(0); 1129 temp.format(arguments, cnt, appendTo, ignore, success); 1130 return appendTo; 1131 } 1132 1133 // ------------------------------------- 1134 // Formats the source Formattable object and copy into the 1135 // appendTo buffer. The Formattable object must be an array 1136 // of Formattable instances, returns error otherwise. 1137 1138 UnicodeString& 1139 MessageFormat::format(const Formattable& source, 1140 UnicodeString& appendTo, 1141 FieldPosition& ignore, 1142 UErrorCode& success) const 1143 { 1144 int32_t cnt; 1145 1146 if (U_FAILURE(success)) 1147 return appendTo; 1148 if (source.getType() != Formattable::kArray) { 1149 success = U_ILLEGAL_ARGUMENT_ERROR; 1150 return appendTo; 1151 } 1152 const Formattable* tmpPtr = source.getArray(cnt); 1153 1154 return format(tmpPtr, cnt, appendTo, ignore, 0, success); 1155 } 1156 1157 1158 UnicodeString& 1159 MessageFormat::format(const UnicodeString* argumentNames, 1160 const Formattable* arguments, 1161 int32_t count, 1162 UnicodeString& appendTo, 1163 UErrorCode& success) const { 1164 FieldPosition ignore(0); 1165 return format(arguments, argumentNames, count, appendTo, ignore, 0, success); 1166 } 1167 1168 UnicodeString& 1169 MessageFormat::format(const Formattable* arguments, 1170 int32_t cnt, 1171 UnicodeString& appendTo, 1172 FieldPosition& status, 1173 int32_t recursionProtection, 1174 UErrorCode& success) const 1175 { 1176 return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); 1177 } 1178 1179 // ------------------------------------- 1180 // Formats the arguments Formattable array and copy into the appendTo buffer. 1181 // Ignore the FieldPosition result for error checking. 1182 1183 UnicodeString& 1184 MessageFormat::format(const Formattable* arguments, 1185 const UnicodeString *argumentNames, 1186 int32_t cnt, 1187 UnicodeString& appendTo, 1188 FieldPosition& status, 1189 int32_t recursionProtection, 1190 UErrorCode& success) const 1191 { 1192 int32_t lastOffset = 0; 1193 int32_t argumentNumber=0; 1194 if (cnt < 0 || (cnt && arguments == NULL)) { 1195 success = U_ILLEGAL_ARGUMENT_ERROR; 1196 return appendTo; 1197 } 1198 1199 if ( !isArgNumeric && argumentNames== NULL ) { 1200 success = U_ILLEGAL_ARGUMENT_ERROR; 1201 return appendTo; 1202 } 1203 1204 const Formattable *obj=NULL; 1205 for (int32_t i=0; i<subformatCount; ++i) { 1206 // Append the prefix of current format element. 1207 appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset); 1208 lastOffset = subformats[i].offset; 1209 obj = NULL; 1210 if (isArgNumeric) { 1211 argumentNumber = subformats[i].argNum; 1212 1213 // Checks the scope of the argument number. 1214 if (argumentNumber >= cnt) { 1215 appendTo += LEFT_CURLY_BRACE; 1216 itos(argumentNumber, appendTo); 1217 appendTo += RIGHT_CURLY_BRACE; 1218 continue; 1219 } 1220 obj = arguments+argumentNumber; 1221 } 1222 else { 1223 for (int32_t j=0; j<cnt; ++j) { 1224 if (argumentNames[j]== *subformats[i].argName ) { 1225 obj = arguments+j; 1226 break; 1227 } 1228 } 1229 if (obj == NULL ) { 1230 appendTo += LEFT_CURLY_BRACE; 1231 appendTo += *subformats[i].argName; 1232 appendTo += RIGHT_CURLY_BRACE; 1233 continue; 1234 1235 } 1236 } 1237 Formattable::Type type = obj->getType(); 1238 1239 // Recursively calling the format process only if the current 1240 // format argument refers to either of the following: 1241 // a ChoiceFormat object ,a PluralFormat object, a SelectFormat object. 1242 Format* fmt = subformats[i].format; 1243 if (fmt != NULL) { 1244 UnicodeString argNum; 1245 fmt->format(*obj, argNum, success); 1246 1247 // Needs to reprocess the ChoiceFormat and PluralFormat and SelectFormat option by using the 1248 // MessageFormat pattern application. 1249 if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() || 1250 fmt->getDynamicClassID() == PluralFormat::getStaticClassID() || 1251 fmt->getDynamicClassID() == SelectFormat::getStaticClassID() 1252 ) && 1253 argNum.indexOf(LEFT_CURLY_BRACE) >= 0) { 1254 MessageFormat temp(argNum, fLocale, success); 1255 // TODO: Implement recursion protection 1256 if ( isArgNumeric ) { 1257 temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); 1258 } 1259 else { 1260 temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success); 1261 } 1262 if (U_FAILURE(success)) { 1263 return appendTo; 1264 } 1265 } 1266 else { 1267 appendTo += argNum; 1268 } 1269 } 1270 // If the obj data type is a number, use a NumberFormat instance. 1271 else if ((type == Formattable::kDouble) || 1272 (type == Formattable::kLong) || 1273 (type == Formattable::kInt64)) { 1274 1275 const NumberFormat* nf = getDefaultNumberFormat(success); 1276 if (nf == NULL) { 1277 return appendTo; 1278 } 1279 if (type == Formattable::kDouble) { 1280 nf->format(obj->getDouble(), appendTo); 1281 } else if (type == Formattable::kLong) { 1282 nf->format(obj->getLong(), appendTo); 1283 } else { 1284 nf->format(obj->getInt64(), appendTo); 1285 } 1286 } 1287 // If the obj data type is a Date instance, use a DateFormat instance. 1288 else if (type == Formattable::kDate) { 1289 const DateFormat* df = getDefaultDateFormat(success); 1290 if (df == NULL) { 1291 return appendTo; 1292 } 1293 df->format(obj->getDate(), appendTo); 1294 } 1295 else if (type == Formattable::kString) { 1296 appendTo += obj->getString(); 1297 } 1298 else { 1299 success = U_ILLEGAL_ARGUMENT_ERROR; 1300 return appendTo; 1301 } 1302 } 1303 // Appends the rest of the pattern characters after the real last offset. 1304 appendTo.append(fPattern, lastOffset, 0x7fffffff); 1305 return appendTo; 1306 } 1307 1308 1309 // ------------------------------------- 1310 // Parses the source pattern and returns the Formattable objects array, 1311 // the array count and the ending parse position. The caller of this method 1312 // owns the array. 1313 1314 Formattable* 1315 MessageFormat::parse(const UnicodeString& source, 1316 ParsePosition& pos, 1317 int32_t& count) const 1318 { 1319 // Allocate at least one element. Allocating an array of length 1320 // zero causes problems on some platforms (e.g. Win32). 1321 Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1]; 1322 int32_t patternOffset = 0; 1323 int32_t sourceOffset = pos.getIndex(); 1324 ParsePosition tempPos(0); 1325 count = 0; // {sfb} reset to zero 1326 int32_t len; 1327 // If resultArray could not be created, exit out. 1328 // Avoid crossing initialization of variables above. 1329 if (resultArray == NULL) { 1330 goto PARSE_ERROR; 1331 } 1332 for (int32_t i = 0; i < subformatCount; ++i) { 1333 // match up to format 1334 len = subformats[i].offset - patternOffset; 1335 if (len == 0 || 1336 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { 1337 sourceOffset += len; 1338 patternOffset += len; 1339 } 1340 else { 1341 goto PARSE_ERROR; 1342 } 1343 1344 // now use format 1345 Format* fmt = subformats[i].format; 1346 int32_t argNum = subformats[i].argNum; 1347 if (fmt == NULL) { // string format 1348 // if at end, use longest possible match 1349 // otherwise uses first match to intervening string 1350 // does NOT recursively try all possibilities 1351 int32_t tempLength = (i+1<subformatCount) ? 1352 subformats[i+1].offset : fPattern.length(); 1353 1354 int32_t next; 1355 if (patternOffset >= tempLength) { 1356 next = source.length(); 1357 } 1358 else { 1359 UnicodeString buffer; 1360 fPattern.extract(patternOffset,tempLength - patternOffset, buffer); 1361 next = source.indexOf(buffer, sourceOffset); 1362 } 1363 1364 if (next < 0) { 1365 goto PARSE_ERROR; 1366 } 1367 else { 1368 UnicodeString buffer; 1369 source.extract(sourceOffset,next - sourceOffset, buffer); 1370 UnicodeString strValue = buffer; 1371 UnicodeString temp(LEFT_CURLY_BRACE); 1372 // {sfb} check this later 1373 if (isArgNumeric) { 1374 itos(argNum, temp); 1375 } 1376 else { 1377 temp+=(*subformats[i].argName); 1378 } 1379 temp += RIGHT_CURLY_BRACE; 1380 if (strValue != temp) { 1381 source.extract(sourceOffset,next - sourceOffset, buffer); 1382 resultArray[argNum].setString(buffer); 1383 // {sfb} not sure about this 1384 if ((argNum + 1) > count) { 1385 count = argNum + 1; 1386 } 1387 } 1388 sourceOffset = next; 1389 } 1390 } 1391 else { 1392 tempPos.setIndex(sourceOffset); 1393 fmt->parseObject(source, resultArray[argNum], tempPos); 1394 if (tempPos.getIndex() == sourceOffset) { 1395 goto PARSE_ERROR; 1396 } 1397 1398 if ((argNum + 1) > count) { 1399 count = argNum + 1; 1400 } 1401 sourceOffset = tempPos.getIndex(); // update 1402 } 1403 } 1404 len = fPattern.length() - patternOffset; 1405 if (len == 0 || 1406 fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { 1407 pos.setIndex(sourceOffset + len); 1408 return resultArray; 1409 } 1410 // else fall through... 1411 1412 PARSE_ERROR: 1413 pos.setErrorIndex(sourceOffset); 1414 delete [] resultArray; 1415 count = 0; 1416 return NULL; // leave index as is to signal error 1417 } 1418 1419 // ------------------------------------- 1420 // Parses the source string and returns the array of 1421 // Formattable objects and the array count. The caller 1422 // owns the returned array. 1423 1424 Formattable* 1425 MessageFormat::parse(const UnicodeString& source, 1426 int32_t& cnt, 1427 UErrorCode& success) const 1428 { 1429 if (!isArgNumeric ) { 1430 success = U_ARGUMENT_TYPE_MISMATCH; 1431 return NULL; 1432 } 1433 ParsePosition status(0); 1434 // Calls the actual implementation method and starts 1435 // from zero offset of the source text. 1436 Formattable* result = parse(source, status, cnt); 1437 if (status.getIndex() == 0) { 1438 success = U_MESSAGE_PARSE_ERROR; 1439 delete[] result; 1440 return NULL; 1441 } 1442 return result; 1443 } 1444 1445 // ------------------------------------- 1446 // Parses the source text and copy into the result buffer. 1447 1448 void 1449 MessageFormat::parseObject( const UnicodeString& source, 1450 Formattable& result, 1451 ParsePosition& status) const 1452 { 1453 int32_t cnt = 0; 1454 Formattable* tmpResult = parse(source, status, cnt); 1455 if (tmpResult != NULL) 1456 result.adoptArray(tmpResult, cnt); 1457 } 1458 1459 UnicodeString 1460 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) { 1461 UnicodeString result; 1462 if (U_SUCCESS(status)) { 1463 int32_t plen = pattern.length(); 1464 const UChar* pat = pattern.getBuffer(); 1465 int32_t blen = plen * 2 + 1; // space for null termination, convenience 1466 UChar* buf = result.getBuffer(blen); 1467 if (buf == NULL) { 1468 status = U_MEMORY_ALLOCATION_ERROR; 1469 } else { 1470 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); 1471 result.releaseBuffer(U_SUCCESS(status) ? len : 0); 1472 } 1473 } 1474 if (U_FAILURE(status)) { 1475 result.setToBogus(); 1476 } 1477 return result; 1478 } 1479 1480 // ------------------------------------- 1481 1482 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) { 1483 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec); 1484 if (fmt == NULL) { 1485 ec = U_MEMORY_ALLOCATION_ERROR; 1486 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) { 1487 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set 1488 fmt->setDefaultRuleSet(defaultRuleSet, localStatus); 1489 } 1490 return fmt; 1491 } 1492 1493 /** 1494 * Reads the segments[] array (see applyPattern()) and parses the 1495 * segments[1..3] into a Format* object. Stores the format object in 1496 * the subformats[] array. Updates the argTypes[] array type 1497 * information for the corresponding argument. 1498 * 1499 * @param formatNumber index into subformats[] for this format 1500 * @param segments array of strings with the parsed pattern segments 1501 * @param parseError parse error data (output param) 1502 * @param ec error code 1503 */ 1504 void 1505 MessageFormat::makeFormat(int32_t formatNumber, 1506 UnicodeString* segments, 1507 UParseError& parseError, 1508 UErrorCode& ec) { 1509 if (U_FAILURE(ec)) { 1510 return; 1511 } 1512 1513 // Parse the argument number 1514 int32_t argumentNumber = stou(segments[1]); // always unlocalized! 1515 UnicodeString argumentName; 1516 if (argumentNumber < 0) { 1517 if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) { 1518 ec = U_INVALID_FORMAT_ERROR; 1519 return; 1520 } 1521 isArgNumeric = FALSE; 1522 argumentNumber=formatNumber; 1523 } 1524 if (!isArgNumeric) { 1525 if ( !isLegalArgName(segments[1]) ) { 1526 ec = U_INVALID_FORMAT_ERROR; 1527 return; 1528 } 1529 argumentName = segments[1]; 1530 } 1531 1532 // Parse the format, recording the argument type and creating a 1533 // new Format object (except for string arguments). 1534 Formattable::Type argType; 1535 Format *fmt = NULL; 1536 int32_t typeID, styleID; 1537 DateFormat::EStyle style; 1538 UnicodeString unquotedPattern, quotedPattern; 1539 UBool inQuote = FALSE; 1540 1541 switch (typeID = findKeyword(segments[2], TYPE_IDS)) { 1542 1543 case 0: // string 1544 argType = Formattable::kString; 1545 break; 1546 1547 case 1: // number 1548 argType = Formattable::kDouble; 1549 1550 switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) { 1551 case 0: // default 1552 fmt = NumberFormat::createInstance(fLocale, ec); 1553 break; 1554 case 1: // currency 1555 fmt = NumberFormat::createCurrencyInstance(fLocale, ec); 1556 break; 1557 case 2: // percent 1558 fmt = NumberFormat::createPercentInstance(fLocale, ec); 1559 break; 1560 case 3: // integer 1561 argType = Formattable::kLong; 1562 fmt = createIntegerFormat(fLocale, ec); 1563 break; 1564 default: // pattern 1565 fmt = NumberFormat::createInstance(fLocale, ec); 1566 if (fmt && 1567 fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) { 1568 ((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec); 1569 } 1570 break; 1571 } 1572 break; 1573 1574 case 2: // date 1575 case 3: // time 1576 argType = Formattable::kDate; 1577 styleID = findKeyword(segments[3], DATE_STYLE_IDS); 1578 style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; 1579 1580 if (typeID == 2) { 1581 fmt = DateFormat::createDateInstance(style, fLocale); 1582 } else { 1583 fmt = DateFormat::createTimeInstance(style, fLocale); 1584 } 1585 1586 if (styleID < 0 && 1587 fmt != NULL && 1588 fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) { 1589 ((SimpleDateFormat*)fmt)->applyPattern(segments[3]); 1590 } 1591 break; 1592 1593 case 4: // choice 1594 argType = Formattable::kDouble; 1595 1596 fmt = new ChoiceFormat(segments[3], parseError, ec); 1597 break; 1598 1599 case 5: // spellout 1600 argType = Formattable::kDouble; 1601 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec); 1602 break; 1603 case 6: // ordinal 1604 argType = Formattable::kDouble; 1605 fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec); 1606 break; 1607 case 7: // duration 1608 argType = Formattable::kDouble; 1609 fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec); 1610 break; 1611 case 8: // plural 1612 case 9: // Select 1613 if(typeID == 8) 1614 argType = Formattable::kDouble; 1615 else 1616 argType = Formattable::kString; 1617 quotedPattern = segments[3]; 1618 for (int32_t i = 0; i < quotedPattern.length(); ++i) { 1619 UChar ch = quotedPattern.charAt(i); 1620 if (ch == SINGLE_QUOTE) { 1621 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) { 1622 unquotedPattern+=ch; 1623 ++i; 1624 } 1625 else { 1626 inQuote = !inQuote; 1627 } 1628 } 1629 else { 1630 unquotedPattern += ch; 1631 } 1632 } 1633 if(typeID == 8) 1634 fmt = new PluralFormat(fLocale, unquotedPattern, ec); 1635 else 1636 fmt = new SelectFormat(unquotedPattern, ec); 1637 break; 1638 default: 1639 argType = Formattable::kString; 1640 ec = U_ILLEGAL_ARGUMENT_ERROR; 1641 break; 1642 } 1643 1644 if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) { 1645 ec = U_MEMORY_ALLOCATION_ERROR; 1646 } 1647 1648 if (!allocateSubformats(formatNumber+1) || 1649 !allocateArgTypes(argumentNumber+1)) { 1650 ec = U_MEMORY_ALLOCATION_ERROR; 1651 } 1652 1653 if (U_FAILURE(ec)) { 1654 delete fmt; 1655 return; 1656 } 1657 1658 // Parse succeeded; record results in our arrays 1659 subformats[formatNumber].format = fmt; 1660 subformats[formatNumber].offset = segments[0].length(); 1661 if (isArgNumeric) { 1662 subformats[formatNumber].argName = NULL; 1663 subformats[formatNumber].argNum = argumentNumber; 1664 } 1665 else { 1666 subformats[formatNumber].argName = new UnicodeString(argumentName); 1667 subformats[formatNumber].argNum = -1; 1668 } 1669 subformatCount = formatNumber+1; 1670 1671 // Careful here: argumentNumber may in general arrive out of 1672 // sequence, e.g., "There was {2} on {0,date} (see {1,number})." 1673 argTypes[argumentNumber] = argType; 1674 if (argumentNumber+1 > argTypeCount) { 1675 argTypeCount = argumentNumber+1; 1676 } 1677 } 1678 1679 // ------------------------------------- 1680 // Finds the string, s, in the string array, list. 1681 int32_t MessageFormat::findKeyword(const UnicodeString& s, 1682 const UChar * const *list) 1683 { 1684 if (s.length() == 0) 1685 return 0; // default 1686 1687 UnicodeString buffer = s; 1688 // Trims the space characters and turns all characters 1689 // in s to lower case. 1690 buffer.trim().toLower(""); 1691 for (int32_t i = 0; list[i]; ++i) { 1692 if (!buffer.compare(list[i], u_strlen(list[i]))) { 1693 return i; 1694 } 1695 } 1696 return -1; 1697 } 1698 1699 // ------------------------------------- 1700 // Checks the range of the source text to quote the special 1701 // characters, { and ' and copy to target buffer. 1702 1703 void 1704 MessageFormat::copyAndFixQuotes(const UnicodeString& source, 1705 int32_t start, 1706 int32_t end, 1707 UnicodeString& appendTo) 1708 { 1709 UBool gotLB = FALSE; 1710 1711 for (int32_t i = start; i < end; ++i) { 1712 UChar ch = source[i]; 1713 if (ch == LEFT_CURLY_BRACE) { 1714 appendTo += SINGLE_QUOTE; 1715 appendTo += LEFT_CURLY_BRACE; 1716 appendTo += SINGLE_QUOTE; 1717 gotLB = TRUE; 1718 } 1719 else if (ch == RIGHT_CURLY_BRACE) { 1720 if(gotLB) { 1721 appendTo += RIGHT_CURLY_BRACE; 1722 gotLB = FALSE; 1723 } 1724 else { 1725 // orig code. 1726 appendTo += SINGLE_QUOTE; 1727 appendTo += RIGHT_CURLY_BRACE; 1728 appendTo += SINGLE_QUOTE; 1729 } 1730 } 1731 else if (ch == SINGLE_QUOTE) { 1732 appendTo += SINGLE_QUOTE; 1733 appendTo += SINGLE_QUOTE; 1734 } 1735 else { 1736 appendTo += ch; 1737 } 1738 } 1739 } 1740 1741 /** 1742 * Convenience method that ought to be in NumberFormat 1743 */ 1744 NumberFormat* 1745 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const { 1746 NumberFormat *temp = NumberFormat::createInstance(locale, status); 1747 if (temp != NULL && temp->getDynamicClassID() == DecimalFormat::getStaticClassID()) { 1748 DecimalFormat *temp2 = (DecimalFormat*) temp; 1749 temp2->setMaximumFractionDigits(0); 1750 temp2->setDecimalSeparatorAlwaysShown(FALSE); 1751 temp2->setParseIntegerOnly(TRUE); 1752 } 1753 1754 return temp; 1755 } 1756 1757 /** 1758 * Return the default number format. Used to format a numeric 1759 * argument when subformats[i].format is NULL. Returns NULL 1760 * on failure. 1761 * 1762 * Semantically const but may modify *this. 1763 */ 1764 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const { 1765 if (defaultNumberFormat == NULL) { 1766 MessageFormat* t = (MessageFormat*) this; 1767 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec); 1768 if (U_FAILURE(ec)) { 1769 delete t->defaultNumberFormat; 1770 t->defaultNumberFormat = NULL; 1771 } else if (t->defaultNumberFormat == NULL) { 1772 ec = U_MEMORY_ALLOCATION_ERROR; 1773 } 1774 } 1775 return defaultNumberFormat; 1776 } 1777 1778 /** 1779 * Return the default date format. Used to format a date 1780 * argument when subformats[i].format is NULL. Returns NULL 1781 * on failure. 1782 * 1783 * Semantically const but may modify *this. 1784 */ 1785 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { 1786 if (defaultDateFormat == NULL) { 1787 MessageFormat* t = (MessageFormat*) this; 1788 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale); 1789 if (t->defaultDateFormat == NULL) { 1790 ec = U_MEMORY_ALLOCATION_ERROR; 1791 } 1792 } 1793 return defaultDateFormat; 1794 } 1795 1796 UBool 1797 MessageFormat::usesNamedArguments() const { 1798 return !isArgNumeric; 1799 } 1800 1801 UBool 1802 MessageFormat::isLegalArgName(const UnicodeString& argName) const { 1803 if(!u_hasBinaryProperty(argName.charAt(0), idStart)) { 1804 return FALSE; 1805 } 1806 for (int32_t i=1; i<argName.length(); ++i) { 1807 if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) { 1808 return FALSE; 1809 } 1810 } 1811 return TRUE; 1812 } 1813 1814 int32_t 1815 MessageFormat::getArgTypeCount() const { 1816 return argTypeCount; 1817 } 1818 1819 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { 1820 pos=0; 1821 fFormatNames = fNameList; 1822 } 1823 1824 const UnicodeString* 1825 FormatNameEnumeration::snext(UErrorCode& status) { 1826 if (U_SUCCESS(status) && pos < fFormatNames->size()) { 1827 return (const UnicodeString*)fFormatNames->elementAt(pos++); 1828 } 1829 return NULL; 1830 } 1831 1832 void 1833 FormatNameEnumeration::reset(UErrorCode& /*status*/) { 1834 pos=0; 1835 } 1836 1837 int32_t 1838 FormatNameEnumeration::count(UErrorCode& /*status*/) const { 1839 return (fFormatNames==NULL) ? 0 : fFormatNames->size(); 1840 } 1841 1842 FormatNameEnumeration::~FormatNameEnumeration() { 1843 UnicodeString *s; 1844 for (int32_t i=0; i<fFormatNames->size(); ++i) { 1845 if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) { 1846 delete s; 1847 } 1848 } 1849 delete fFormatNames; 1850 } 1851 U_NAMESPACE_END 1852 1853 #endif /* #if !UCONFIG_NO_FORMATTING */ 1854 1855 //eof 1856