1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 * File CHOICFMT.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/19/97 aliu Converted from java. 13 * 03/20/97 helena Finished first cut of implementation and got rid 14 * of nextDouble/previousDouble and replaced with 15 * boolean array. 16 * 4/10/97 aliu Clean up. Modified to work on AIX. 17 * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include 18 * wchar.h. 19 * 07/09/97 helena Made ParsePosition into a class. 20 * 08/06/97 nos removed overloaded constructor, fixed 'format(array)' 21 * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) 22 * 02/22/99 stephen Removed character literals for EBCDIC safety 23 ******************************************************************************** 24 */ 25 26 #include "unicode/utypes.h" 27 28 #if !UCONFIG_NO_FORMATTING 29 30 #include "unicode/choicfmt.h" 31 #include "unicode/numfmt.h" 32 #include "unicode/locid.h" 33 #include "cpputils.h" 34 #include "cstring.h" 35 #include "messageimpl.h" 36 #include "putilimp.h" 37 #include "uassert.h" 38 #include <stdio.h> 39 #include <float.h> 40 41 // ***************************************************************************** 42 // class ChoiceFormat 43 // ***************************************************************************** 44 45 U_NAMESPACE_BEGIN 46 47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) 48 49 // Special characters used by ChoiceFormat. There are two characters 50 // used interchangeably to indicate <=. Either is parsed, but only 51 // LESS_EQUAL is generated by toPattern(). 52 #define SINGLE_QUOTE ((UChar)0x0027) /*'*/ 53 #define LESS_THAN ((UChar)0x003C) /*<*/ 54 #define LESS_EQUAL ((UChar)0x0023) /*#*/ 55 #define LESS_EQUAL2 ((UChar)0x2264) 56 #define VERTICAL_BAR ((UChar)0x007C) /*|*/ 57 #define MINUS ((UChar)0x002D) /*-*/ 58 59 static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/ 60 static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/ 61 62 #ifdef INFINITY 63 #undef INFINITY 64 #endif 65 #define INFINITY ((UChar)0x221E) 66 67 //static const UChar gPositiveInfinity[] = {INFINITY, 0}; 68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0}; 69 #define POSITIVE_INF_STRLEN 1 70 #define NEGATIVE_INF_STRLEN 2 71 72 // ------------------------------------- 73 // Creates a ChoiceFormat instance based on the pattern. 74 75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 76 UErrorCode& status) 77 : constructorErrorCode(status), 78 msgPattern(status) 79 { 80 applyPattern(newPattern, status); 81 } 82 83 // ------------------------------------- 84 // Creates a ChoiceFormat instance with the limit array and 85 // format strings for each limit. 86 87 ChoiceFormat::ChoiceFormat(const double* limits, 88 const UnicodeString* formats, 89 int32_t cnt ) 90 : constructorErrorCode(U_ZERO_ERROR), 91 msgPattern(constructorErrorCode) 92 { 93 setChoices(limits, NULL, formats, cnt, constructorErrorCode); 94 } 95 96 // ------------------------------------- 97 98 ChoiceFormat::ChoiceFormat(const double* limits, 99 const UBool* closures, 100 const UnicodeString* formats, 101 int32_t cnt ) 102 : constructorErrorCode(U_ZERO_ERROR), 103 msgPattern(constructorErrorCode) 104 { 105 setChoices(limits, closures, formats, cnt, constructorErrorCode); 106 } 107 108 // ------------------------------------- 109 // copy constructor 110 111 ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) 112 : NumberFormat(that), 113 constructorErrorCode(that.constructorErrorCode), 114 msgPattern(that.msgPattern) 115 { 116 } 117 118 // ------------------------------------- 119 // Private constructor that creates a 120 // ChoiceFormat instance based on the 121 // pattern and populates UParseError 122 123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 124 UParseError& parseError, 125 UErrorCode& status) 126 : constructorErrorCode(status), 127 msgPattern(status) 128 { 129 applyPattern(newPattern,parseError, status); 130 } 131 // ------------------------------------- 132 133 UBool 134 ChoiceFormat::operator==(const Format& that) const 135 { 136 if (this == &that) return TRUE; 137 if (!NumberFormat::operator==(that)) return FALSE; 138 ChoiceFormat& thatAlias = (ChoiceFormat&)that; 139 return msgPattern == thatAlias.msgPattern; 140 } 141 142 // ------------------------------------- 143 // copy constructor 144 145 const ChoiceFormat& 146 ChoiceFormat::operator=(const ChoiceFormat& that) 147 { 148 if (this != &that) { 149 NumberFormat::operator=(that); 150 constructorErrorCode = that.constructorErrorCode; 151 msgPattern = that.msgPattern; 152 } 153 return *this; 154 } 155 156 // ------------------------------------- 157 158 ChoiceFormat::~ChoiceFormat() 159 { 160 } 161 162 // ------------------------------------- 163 164 /** 165 * Convert a double value to a string without the overhead of NumberFormat. 166 */ 167 UnicodeString& 168 ChoiceFormat::dtos(double value, 169 UnicodeString& string) 170 { 171 /* Buffer to contain the digits and any extra formatting stuff. */ 172 char temp[DBL_DIG + 16]; 173 char *itrPtr = temp; 174 char *expPtr; 175 176 sprintf(temp, "%.*g", DBL_DIG, value); 177 178 /* Find and convert the decimal point. 179 Using setlocale on some machines will cause sprintf to use a comma for certain locales. 180 */ 181 while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { 182 itrPtr++; 183 } 184 if (*itrPtr != 0 && *itrPtr != 'e') { 185 /* We reached something that looks like a decimal point. 186 In case someone used setlocale(), which changes the decimal point. */ 187 *itrPtr = '.'; 188 itrPtr++; 189 } 190 /* Search for the exponent */ 191 while (*itrPtr && *itrPtr != 'e') { 192 itrPtr++; 193 } 194 if (*itrPtr == 'e') { 195 itrPtr++; 196 /* Verify the exponent sign */ 197 if (*itrPtr == '+' || *itrPtr == '-') { 198 itrPtr++; 199 } 200 /* Remove leading zeros. You will see this on Windows machines. */ 201 expPtr = itrPtr; 202 while (*itrPtr == '0') { 203 itrPtr++; 204 } 205 if (*itrPtr && expPtr != itrPtr) { 206 /* Shift the exponent without zeros. */ 207 while (*itrPtr) { 208 *(expPtr++) = *(itrPtr++); 209 } 210 // NULL terminate 211 *expPtr = 0; 212 } 213 } 214 215 string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ 216 return string; 217 } 218 219 // ------------------------------------- 220 // calls the overloaded applyPattern method. 221 222 void 223 ChoiceFormat::applyPattern(const UnicodeString& pattern, 224 UErrorCode& status) 225 { 226 msgPattern.parseChoiceStyle(pattern, NULL, status); 227 constructorErrorCode = status; 228 } 229 230 // ------------------------------------- 231 // Applies the pattern to this ChoiceFormat instance. 232 233 void 234 ChoiceFormat::applyPattern(const UnicodeString& pattern, 235 UParseError& parseError, 236 UErrorCode& status) 237 { 238 msgPattern.parseChoiceStyle(pattern, &parseError, status); 239 constructorErrorCode = status; 240 } 241 // ------------------------------------- 242 // Returns the input pattern string. 243 244 UnicodeString& 245 ChoiceFormat::toPattern(UnicodeString& result) const 246 { 247 return result = msgPattern.getPatternString(); 248 } 249 250 // ------------------------------------- 251 // Sets the limit and format arrays. 252 void 253 ChoiceFormat::setChoices( const double* limits, 254 const UnicodeString* formats, 255 int32_t cnt ) 256 { 257 UErrorCode errorCode = U_ZERO_ERROR; 258 setChoices(limits, NULL, formats, cnt, errorCode); 259 } 260 261 // ------------------------------------- 262 // Sets the limit and format arrays. 263 void 264 ChoiceFormat::setChoices( const double* limits, 265 const UBool* closures, 266 const UnicodeString* formats, 267 int32_t cnt ) 268 { 269 UErrorCode errorCode = U_ZERO_ERROR; 270 setChoices(limits, closures, formats, cnt, errorCode); 271 } 272 273 void 274 ChoiceFormat::setChoices(const double* limits, 275 const UBool* closures, 276 const UnicodeString* formats, 277 int32_t count, 278 UErrorCode &errorCode) { 279 if (U_FAILURE(errorCode)) { 280 return; 281 } 282 if (limits == NULL || formats == NULL) { 283 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 284 return; 285 } 286 // Reconstruct the original input pattern. 287 // Modified version of the pre-ICU 4.8 toPattern() implementation. 288 UnicodeString result; 289 for (int32_t i = 0; i < count; ++i) { 290 if (i != 0) { 291 result += VERTICAL_BAR; 292 } 293 UnicodeString buf; 294 if (uprv_isPositiveInfinity(limits[i])) { 295 result += INFINITY; 296 } else if (uprv_isNegativeInfinity(limits[i])) { 297 result += MINUS; 298 result += INFINITY; 299 } else { 300 result += dtos(limits[i], buf); 301 } 302 if (closures != NULL && closures[i]) { 303 result += LESS_THAN; 304 } else { 305 result += LESS_EQUAL; 306 } 307 // Append formats[i], using quotes if there are special 308 // characters. Single quotes themselves must be escaped in 309 // either case. 310 const UnicodeString& text = formats[i]; 311 int32_t textLength = text.length(); 312 int32_t nestingLevel = 0; 313 for (int32_t j = 0; j < textLength; ++j) { 314 UChar c = text[j]; 315 if (c == SINGLE_QUOTE && nestingLevel == 0) { 316 // Double each top-level apostrophe. 317 result.append(c); 318 } else if (c == VERTICAL_BAR && nestingLevel == 0) { 319 // Surround each pipe symbol with apostrophes for quoting. 320 // If the next character is an apostrophe, then that will be doubled, 321 // and although the parser will see the apostrophe pairs beginning 322 // and ending one character earlier than our doubling, the result 323 // is as desired. 324 // | -> '|' 325 // |' -> '|''' 326 // |'' -> '|''''' etc. 327 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); 328 continue; // Skip the append(c) at the end of the loop body. 329 } else if (c == LEFT_CURLY_BRACE) { 330 ++nestingLevel; 331 } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { 332 --nestingLevel; 333 } 334 result.append(c); 335 } 336 } 337 // Apply the reconstructed pattern. 338 applyPattern(result, errorCode); 339 } 340 341 // ------------------------------------- 342 // Gets the limit array. 343 344 const double* 345 ChoiceFormat::getLimits(int32_t& cnt) const 346 { 347 cnt = 0; 348 return NULL; 349 } 350 351 // ------------------------------------- 352 // Gets the closures array. 353 354 const UBool* 355 ChoiceFormat::getClosures(int32_t& cnt) const 356 { 357 cnt = 0; 358 return NULL; 359 } 360 361 // ------------------------------------- 362 // Gets the format array. 363 364 const UnicodeString* 365 ChoiceFormat::getFormats(int32_t& cnt) const 366 { 367 cnt = 0; 368 return NULL; 369 } 370 371 // ------------------------------------- 372 // Formats an int64 number, it's actually formatted as 373 // a double. The returned format string may differ 374 // from the input number because of this. 375 376 UnicodeString& 377 ChoiceFormat::format(int64_t number, 378 UnicodeString& appendTo, 379 FieldPosition& status) const 380 { 381 return format((double) number, appendTo, status); 382 } 383 384 // ------------------------------------- 385 // Formats an int32_t number, it's actually formatted as 386 // a double. 387 388 UnicodeString& 389 ChoiceFormat::format(int32_t number, 390 UnicodeString& appendTo, 391 FieldPosition& status) const 392 { 393 return format((double) number, appendTo, status); 394 } 395 396 // ------------------------------------- 397 // Formats a double number. 398 399 UnicodeString& 400 ChoiceFormat::format(double number, 401 UnicodeString& appendTo, 402 FieldPosition& /*pos*/) const 403 { 404 if (msgPattern.countParts() == 0) { 405 // No pattern was applied, or it failed. 406 return appendTo; 407 } 408 // Get the appropriate sub-message. 409 int32_t msgStart = findSubMessage(msgPattern, 0, number); 410 if (!MessageImpl::jdkAposMode(msgPattern)) { 411 int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); 412 int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); 413 appendTo.append(msgPattern.getPatternString(), 414 patternStart, 415 msgPattern.getPatternIndex(msgLimit) - patternStart); 416 return appendTo; 417 } 418 // JDK compatibility mode: Remove SKIP_SYNTAX. 419 return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); 420 } 421 422 int32_t 423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { 424 int32_t count = pattern.countParts(); 425 int32_t msgStart; 426 // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples 427 // until ARG_LIMIT or end of choice-only pattern. 428 // Ignore the first number and selector and start the loop on the first message. 429 partIndex += 2; 430 for (;;) { 431 // Skip but remember the current sub-message. 432 msgStart = partIndex; 433 partIndex = pattern.getLimitPartIndex(partIndex); 434 if (++partIndex >= count) { 435 // Reached the end of the choice-only pattern. 436 // Return with the last sub-message. 437 break; 438 } 439 const MessagePattern::Part &part = pattern.getPart(partIndex++); 440 UMessagePatternPartType type = part.getType(); 441 if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { 442 // Reached the end of the ChoiceFormat style. 443 // Return with the last sub-message. 444 break; 445 } 446 // part is an ARG_INT or ARG_DOUBLE 447 U_ASSERT(MessagePattern::Part::hasNumericValue(type)); 448 double boundary = pattern.getNumericValue(part); 449 // Fetch the ARG_SELECTOR character. 450 int32_t selectorIndex = pattern.getPatternIndex(partIndex++); 451 UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex); 452 if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { 453 // The number is in the interval between the previous boundary and the current one. 454 // Return with the sub-message between them. 455 // The !(a>b) and !(a>=b) comparisons are equivalent to 456 // (a<=b) and (a<b) except they "catch" NaN. 457 break; 458 } 459 } 460 return msgStart; 461 } 462 463 // ------------------------------------- 464 // Formats an array of objects. Checks if the data type of the objects 465 // to get the right value for formatting. 466 467 UnicodeString& 468 ChoiceFormat::format(const Formattable* objs, 469 int32_t cnt, 470 UnicodeString& appendTo, 471 FieldPosition& pos, 472 UErrorCode& status) const 473 { 474 if(cnt < 0) { 475 status = U_ILLEGAL_ARGUMENT_ERROR; 476 return appendTo; 477 } 478 if (msgPattern.countParts() == 0) { 479 status = U_INVALID_STATE_ERROR; 480 return appendTo; 481 } 482 483 for (int32_t i = 0; i < cnt; i++) { 484 double objDouble = objs[i].getDouble(status); 485 if (U_SUCCESS(status)) { 486 format(objDouble, appendTo, pos); 487 } 488 } 489 490 return appendTo; 491 } 492 493 // ------------------------------------- 494 // Formats an array of objects. Checks if the data type of the objects 495 // to get the right value for formatting. 496 497 UnicodeString& 498 ChoiceFormat::format(const Formattable& obj, 499 UnicodeString& appendTo, 500 FieldPosition& pos, 501 UErrorCode& status) const 502 { 503 return NumberFormat::format(obj, appendTo, pos, status); 504 } 505 // ------------------------------------- 506 507 void 508 ChoiceFormat::parse(const UnicodeString& text, 509 Formattable& result, 510 ParsePosition& pos) const 511 { 512 result.setDouble(parseArgument(msgPattern, 0, text, pos)); 513 } 514 515 double 516 ChoiceFormat::parseArgument( 517 const MessagePattern &pattern, int32_t partIndex, 518 const UnicodeString &source, ParsePosition &pos) { 519 // find the best number (defined as the one with the longest parse) 520 int32_t start = pos.getIndex(); 521 int32_t furthest = start; 522 double bestNumber = uprv_getNaN(); 523 double tempNumber = 0.0; 524 int32_t count = pattern.countParts(); 525 while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { 526 tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); 527 partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR 528 int32_t msgLimit = pattern.getLimitPartIndex(partIndex); 529 int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); 530 if (len >= 0) { 531 int32_t newIndex = start + len; 532 if (newIndex > furthest) { 533 furthest = newIndex; 534 bestNumber = tempNumber; 535 if (furthest == source.length()) { 536 break; 537 } 538 } 539 } 540 partIndex = msgLimit + 1; 541 } 542 if (furthest == start) { 543 pos.setErrorIndex(start); 544 } else { 545 pos.setIndex(furthest); 546 } 547 return bestNumber; 548 } 549 550 int32_t 551 ChoiceFormat::matchStringUntilLimitPart( 552 const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, 553 const UnicodeString &source, int32_t sourceOffset) { 554 int32_t matchingSourceLength = 0; 555 const UnicodeString &msgString = pattern.getPatternString(); 556 int32_t prevIndex = pattern.getPart(partIndex).getLimit(); 557 for (;;) { 558 const MessagePattern::Part &part = pattern.getPart(++partIndex); 559 if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 560 int32_t index = part.getIndex(); 561 int32_t length = index - prevIndex; 562 if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { 563 return -1; // mismatch 564 } 565 matchingSourceLength += length; 566 if (partIndex == limitPartIndex) { 567 return matchingSourceLength; 568 } 569 prevIndex = part.getLimit(); // SKIP_SYNTAX 570 } 571 } 572 } 573 574 // ------------------------------------- 575 // Parses the text and return the Formattable object. 576 577 void 578 ChoiceFormat::parse(const UnicodeString& text, 579 Formattable& result, 580 UErrorCode& status) const 581 { 582 NumberFormat::parse(text, result, status); 583 } 584 585 // ------------------------------------- 586 587 Format* 588 ChoiceFormat::clone() const 589 { 590 ChoiceFormat *aCopy = new ChoiceFormat(*this); 591 return aCopy; 592 } 593 594 U_NAMESPACE_END 595 596 #endif /* #if !UCONFIG_NO_FORMATTING */ 597 598 //eof 599