1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2013, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 * 9 * File CHOICFMT.CPP 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 02/19/97 aliu Converted from java. 15 * 03/20/97 helena Finished first cut of implementation and got rid 16 * of nextDouble/previousDouble and replaced with 17 * boolean array. 18 * 4/10/97 aliu Clean up. Modified to work on AIX. 19 * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include 20 * wchar.h. 21 * 07/09/97 helena Made ParsePosition into a class. 22 * 08/06/97 nos removed overloaded constructor, fixed 'format(array)' 23 * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) 24 * 02/22/99 stephen Removed character literals for EBCDIC safety 25 ******************************************************************************** 26 */ 27 28 #include "unicode/utypes.h" 29 30 #if !UCONFIG_NO_FORMATTING 31 32 #include "unicode/choicfmt.h" 33 #include "unicode/numfmt.h" 34 #include "unicode/locid.h" 35 #include "cpputils.h" 36 #include "cstring.h" 37 #include "messageimpl.h" 38 #include "putilimp.h" 39 #include "uassert.h" 40 #include <stdio.h> 41 #include <float.h> 42 43 // ***************************************************************************** 44 // class ChoiceFormat 45 // ***************************************************************************** 46 47 U_NAMESPACE_BEGIN 48 49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) 50 51 // Special characters used by ChoiceFormat. There are two characters 52 // used interchangeably to indicate <=. Either is parsed, but only 53 // LESS_EQUAL is generated by toPattern(). 54 #define SINGLE_QUOTE ((UChar)0x0027) /*'*/ 55 #define LESS_THAN ((UChar)0x003C) /*<*/ 56 #define LESS_EQUAL ((UChar)0x0023) /*#*/ 57 #define LESS_EQUAL2 ((UChar)0x2264) 58 #define VERTICAL_BAR ((UChar)0x007C) /*|*/ 59 #define MINUS ((UChar)0x002D) /*-*/ 60 61 static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/ 62 static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/ 63 64 #ifdef INFINITY 65 #undef INFINITY 66 #endif 67 #define INFINITY ((UChar)0x221E) 68 69 //static const UChar gPositiveInfinity[] = {INFINITY, 0}; 70 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0}; 71 #define POSITIVE_INF_STRLEN 1 72 #define NEGATIVE_INF_STRLEN 2 73 74 // ------------------------------------- 75 // Creates a ChoiceFormat instance based on the pattern. 76 77 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 78 UErrorCode& status) 79 : constructorErrorCode(status), 80 msgPattern(status) 81 { 82 applyPattern(newPattern, status); 83 } 84 85 // ------------------------------------- 86 // Creates a ChoiceFormat instance with the limit array and 87 // format strings for each limit. 88 89 ChoiceFormat::ChoiceFormat(const double* limits, 90 const UnicodeString* formats, 91 int32_t cnt ) 92 : constructorErrorCode(U_ZERO_ERROR), 93 msgPattern(constructorErrorCode) 94 { 95 setChoices(limits, NULL, formats, cnt, constructorErrorCode); 96 } 97 98 // ------------------------------------- 99 100 ChoiceFormat::ChoiceFormat(const double* limits, 101 const UBool* closures, 102 const UnicodeString* formats, 103 int32_t cnt ) 104 : constructorErrorCode(U_ZERO_ERROR), 105 msgPattern(constructorErrorCode) 106 { 107 setChoices(limits, closures, formats, cnt, constructorErrorCode); 108 } 109 110 // ------------------------------------- 111 // copy constructor 112 113 ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) 114 : NumberFormat(that), 115 constructorErrorCode(that.constructorErrorCode), 116 msgPattern(that.msgPattern) 117 { 118 } 119 120 // ------------------------------------- 121 // Private constructor that creates a 122 // ChoiceFormat instance based on the 123 // pattern and populates UParseError 124 125 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 126 UParseError& parseError, 127 UErrorCode& status) 128 : constructorErrorCode(status), 129 msgPattern(status) 130 { 131 applyPattern(newPattern,parseError, status); 132 } 133 // ------------------------------------- 134 135 UBool 136 ChoiceFormat::operator==(const Format& that) const 137 { 138 if (this == &that) return TRUE; 139 if (!NumberFormat::operator==(that)) return FALSE; 140 ChoiceFormat& thatAlias = (ChoiceFormat&)that; 141 return msgPattern == thatAlias.msgPattern; 142 } 143 144 // ------------------------------------- 145 // copy constructor 146 147 const ChoiceFormat& 148 ChoiceFormat::operator=(const ChoiceFormat& that) 149 { 150 if (this != &that) { 151 NumberFormat::operator=(that); 152 constructorErrorCode = that.constructorErrorCode; 153 msgPattern = that.msgPattern; 154 } 155 return *this; 156 } 157 158 // ------------------------------------- 159 160 ChoiceFormat::~ChoiceFormat() 161 { 162 } 163 164 // ------------------------------------- 165 166 /** 167 * Convert a double value to a string without the overhead of NumberFormat. 168 */ 169 UnicodeString& 170 ChoiceFormat::dtos(double value, 171 UnicodeString& string) 172 { 173 /* Buffer to contain the digits and any extra formatting stuff. */ 174 char temp[DBL_DIG + 16]; 175 char *itrPtr = temp; 176 char *expPtr; 177 178 sprintf(temp, "%.*g", DBL_DIG, value); 179 180 /* Find and convert the decimal point. 181 Using setlocale on some machines will cause sprintf to use a comma for certain locales. 182 */ 183 while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { 184 itrPtr++; 185 } 186 if (*itrPtr != 0 && *itrPtr != 'e') { 187 /* We reached something that looks like a decimal point. 188 In case someone used setlocale(), which changes the decimal point. */ 189 *itrPtr = '.'; 190 itrPtr++; 191 } 192 /* Search for the exponent */ 193 while (*itrPtr && *itrPtr != 'e') { 194 itrPtr++; 195 } 196 if (*itrPtr == 'e') { 197 itrPtr++; 198 /* Verify the exponent sign */ 199 if (*itrPtr == '+' || *itrPtr == '-') { 200 itrPtr++; 201 } 202 /* Remove leading zeros. You will see this on Windows machines. */ 203 expPtr = itrPtr; 204 while (*itrPtr == '0') { 205 itrPtr++; 206 } 207 if (*itrPtr && expPtr != itrPtr) { 208 /* Shift the exponent without zeros. */ 209 while (*itrPtr) { 210 *(expPtr++) = *(itrPtr++); 211 } 212 // NULL terminate 213 *expPtr = 0; 214 } 215 } 216 217 string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ 218 return string; 219 } 220 221 // ------------------------------------- 222 // calls the overloaded applyPattern method. 223 224 void 225 ChoiceFormat::applyPattern(const UnicodeString& pattern, 226 UErrorCode& status) 227 { 228 msgPattern.parseChoiceStyle(pattern, NULL, status); 229 constructorErrorCode = status; 230 } 231 232 // ------------------------------------- 233 // Applies the pattern to this ChoiceFormat instance. 234 235 void 236 ChoiceFormat::applyPattern(const UnicodeString& pattern, 237 UParseError& parseError, 238 UErrorCode& status) 239 { 240 msgPattern.parseChoiceStyle(pattern, &parseError, status); 241 constructorErrorCode = status; 242 } 243 // ------------------------------------- 244 // Returns the input pattern string. 245 246 UnicodeString& 247 ChoiceFormat::toPattern(UnicodeString& result) const 248 { 249 return result = msgPattern.getPatternString(); 250 } 251 252 // ------------------------------------- 253 // Sets the limit and format arrays. 254 void 255 ChoiceFormat::setChoices( const double* limits, 256 const UnicodeString* formats, 257 int32_t cnt ) 258 { 259 UErrorCode errorCode = U_ZERO_ERROR; 260 setChoices(limits, NULL, formats, cnt, errorCode); 261 } 262 263 // ------------------------------------- 264 // Sets the limit and format arrays. 265 void 266 ChoiceFormat::setChoices( const double* limits, 267 const UBool* closures, 268 const UnicodeString* formats, 269 int32_t cnt ) 270 { 271 UErrorCode errorCode = U_ZERO_ERROR; 272 setChoices(limits, closures, formats, cnt, errorCode); 273 } 274 275 void 276 ChoiceFormat::setChoices(const double* limits, 277 const UBool* closures, 278 const UnicodeString* formats, 279 int32_t count, 280 UErrorCode &errorCode) { 281 if (U_FAILURE(errorCode)) { 282 return; 283 } 284 if (limits == NULL || formats == NULL) { 285 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 286 return; 287 } 288 // Reconstruct the original input pattern. 289 // Modified version of the pre-ICU 4.8 toPattern() implementation. 290 UnicodeString result; 291 for (int32_t i = 0; i < count; ++i) { 292 if (i != 0) { 293 result += VERTICAL_BAR; 294 } 295 UnicodeString buf; 296 if (uprv_isPositiveInfinity(limits[i])) { 297 result += INFINITY; 298 } else if (uprv_isNegativeInfinity(limits[i])) { 299 result += MINUS; 300 result += INFINITY; 301 } else { 302 result += dtos(limits[i], buf); 303 } 304 if (closures != NULL && closures[i]) { 305 result += LESS_THAN; 306 } else { 307 result += LESS_EQUAL; 308 } 309 // Append formats[i], using quotes if there are special 310 // characters. Single quotes themselves must be escaped in 311 // either case. 312 const UnicodeString& text = formats[i]; 313 int32_t textLength = text.length(); 314 int32_t nestingLevel = 0; 315 for (int32_t j = 0; j < textLength; ++j) { 316 UChar c = text[j]; 317 if (c == SINGLE_QUOTE && nestingLevel == 0) { 318 // Double each top-level apostrophe. 319 result.append(c); 320 } else if (c == VERTICAL_BAR && nestingLevel == 0) { 321 // Surround each pipe symbol with apostrophes for quoting. 322 // If the next character is an apostrophe, then that will be doubled, 323 // and although the parser will see the apostrophe pairs beginning 324 // and ending one character earlier than our doubling, the result 325 // is as desired. 326 // | -> '|' 327 // |' -> '|''' 328 // |'' -> '|''''' etc. 329 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); 330 continue; // Skip the append(c) at the end of the loop body. 331 } else if (c == LEFT_CURLY_BRACE) { 332 ++nestingLevel; 333 } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { 334 --nestingLevel; 335 } 336 result.append(c); 337 } 338 } 339 // Apply the reconstructed pattern. 340 applyPattern(result, errorCode); 341 } 342 343 // ------------------------------------- 344 // Gets the limit array. 345 346 const double* 347 ChoiceFormat::getLimits(int32_t& cnt) const 348 { 349 cnt = 0; 350 return NULL; 351 } 352 353 // ------------------------------------- 354 // Gets the closures array. 355 356 const UBool* 357 ChoiceFormat::getClosures(int32_t& cnt) const 358 { 359 cnt = 0; 360 return NULL; 361 } 362 363 // ------------------------------------- 364 // Gets the format array. 365 366 const UnicodeString* 367 ChoiceFormat::getFormats(int32_t& cnt) const 368 { 369 cnt = 0; 370 return NULL; 371 } 372 373 // ------------------------------------- 374 // Formats an int64 number, it's actually formatted as 375 // a double. The returned format string may differ 376 // from the input number because of this. 377 378 UnicodeString& 379 ChoiceFormat::format(int64_t number, 380 UnicodeString& appendTo, 381 FieldPosition& status) const 382 { 383 return format((double) number, appendTo, status); 384 } 385 386 // ------------------------------------- 387 // Formats an int32_t number, it's actually formatted as 388 // a double. 389 390 UnicodeString& 391 ChoiceFormat::format(int32_t number, 392 UnicodeString& appendTo, 393 FieldPosition& status) const 394 { 395 return format((double) number, appendTo, status); 396 } 397 398 // ------------------------------------- 399 // Formats a double number. 400 401 UnicodeString& 402 ChoiceFormat::format(double number, 403 UnicodeString& appendTo, 404 FieldPosition& /*pos*/) const 405 { 406 if (msgPattern.countParts() == 0) { 407 // No pattern was applied, or it failed. 408 return appendTo; 409 } 410 // Get the appropriate sub-message. 411 int32_t msgStart = findSubMessage(msgPattern, 0, number); 412 if (!MessageImpl::jdkAposMode(msgPattern)) { 413 int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); 414 int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); 415 appendTo.append(msgPattern.getPatternString(), 416 patternStart, 417 msgPattern.getPatternIndex(msgLimit) - patternStart); 418 return appendTo; 419 } 420 // JDK compatibility mode: Remove SKIP_SYNTAX. 421 return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); 422 } 423 424 int32_t 425 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { 426 int32_t count = pattern.countParts(); 427 int32_t msgStart; 428 // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples 429 // until ARG_LIMIT or end of choice-only pattern. 430 // Ignore the first number and selector and start the loop on the first message. 431 partIndex += 2; 432 for (;;) { 433 // Skip but remember the current sub-message. 434 msgStart = partIndex; 435 partIndex = pattern.getLimitPartIndex(partIndex); 436 if (++partIndex >= count) { 437 // Reached the end of the choice-only pattern. 438 // Return with the last sub-message. 439 break; 440 } 441 const MessagePattern::Part &part = pattern.getPart(partIndex++); 442 UMessagePatternPartType type = part.getType(); 443 if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { 444 // Reached the end of the ChoiceFormat style. 445 // Return with the last sub-message. 446 break; 447 } 448 // part is an ARG_INT or ARG_DOUBLE 449 U_ASSERT(MessagePattern::Part::hasNumericValue(type)); 450 double boundary = pattern.getNumericValue(part); 451 // Fetch the ARG_SELECTOR character. 452 int32_t selectorIndex = pattern.getPatternIndex(partIndex++); 453 UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex); 454 if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { 455 // The number is in the interval between the previous boundary and the current one. 456 // Return with the sub-message between them. 457 // The !(a>b) and !(a>=b) comparisons are equivalent to 458 // (a<=b) and (a<b) except they "catch" NaN. 459 break; 460 } 461 } 462 return msgStart; 463 } 464 465 // ------------------------------------- 466 // Formats an array of objects. Checks if the data type of the objects 467 // to get the right value for formatting. 468 469 UnicodeString& 470 ChoiceFormat::format(const Formattable* objs, 471 int32_t cnt, 472 UnicodeString& appendTo, 473 FieldPosition& pos, 474 UErrorCode& status) const 475 { 476 if(cnt < 0) { 477 status = U_ILLEGAL_ARGUMENT_ERROR; 478 return appendTo; 479 } 480 if (msgPattern.countParts() == 0) { 481 status = U_INVALID_STATE_ERROR; 482 return appendTo; 483 } 484 485 for (int32_t i = 0; i < cnt; i++) { 486 double objDouble = objs[i].getDouble(status); 487 if (U_SUCCESS(status)) { 488 format(objDouble, appendTo, pos); 489 } 490 } 491 492 return appendTo; 493 } 494 495 // ------------------------------------- 496 497 void 498 ChoiceFormat::parse(const UnicodeString& text, 499 Formattable& result, 500 ParsePosition& pos) const 501 { 502 result.setDouble(parseArgument(msgPattern, 0, text, pos)); 503 } 504 505 double 506 ChoiceFormat::parseArgument( 507 const MessagePattern &pattern, int32_t partIndex, 508 const UnicodeString &source, ParsePosition &pos) { 509 // find the best number (defined as the one with the longest parse) 510 int32_t start = pos.getIndex(); 511 int32_t furthest = start; 512 double bestNumber = uprv_getNaN(); 513 double tempNumber = 0.0; 514 int32_t count = pattern.countParts(); 515 while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { 516 tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); 517 partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR 518 int32_t msgLimit = pattern.getLimitPartIndex(partIndex); 519 int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); 520 if (len >= 0) { 521 int32_t newIndex = start + len; 522 if (newIndex > furthest) { 523 furthest = newIndex; 524 bestNumber = tempNumber; 525 if (furthest == source.length()) { 526 break; 527 } 528 } 529 } 530 partIndex = msgLimit + 1; 531 } 532 if (furthest == start) { 533 pos.setErrorIndex(start); 534 } else { 535 pos.setIndex(furthest); 536 } 537 return bestNumber; 538 } 539 540 int32_t 541 ChoiceFormat::matchStringUntilLimitPart( 542 const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, 543 const UnicodeString &source, int32_t sourceOffset) { 544 int32_t matchingSourceLength = 0; 545 const UnicodeString &msgString = pattern.getPatternString(); 546 int32_t prevIndex = pattern.getPart(partIndex).getLimit(); 547 for (;;) { 548 const MessagePattern::Part &part = pattern.getPart(++partIndex); 549 if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 550 int32_t index = part.getIndex(); 551 int32_t length = index - prevIndex; 552 if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { 553 return -1; // mismatch 554 } 555 matchingSourceLength += length; 556 if (partIndex == limitPartIndex) { 557 return matchingSourceLength; 558 } 559 prevIndex = part.getLimit(); // SKIP_SYNTAX 560 } 561 } 562 } 563 564 // ------------------------------------- 565 566 Format* 567 ChoiceFormat::clone() const 568 { 569 ChoiceFormat *aCopy = new ChoiceFormat(*this); 570 return aCopy; 571 } 572 573 U_NAMESPACE_END 574 575 #endif /* #if !UCONFIG_NO_FORMATTING */ 576 577 //eof 578