1 /* 2 * Copyright (C) 2015, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 * 5 * file name: affixpatternparser.cpp 6 */ 7 8 #include "unicode/utypes.h" 9 10 #if !UCONFIG_NO_FORMATTING 11 12 #include "unicode/dcfmtsym.h" 13 #include "unicode/plurrule.h" 14 #include "unicode/ucurr.h" 15 #include "affixpatternparser.h" 16 #include "charstr.h" 17 #include "precision.h" 18 #include "uassert.h" 19 #include "unistrappender.h" 20 21 static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4}; 22 23 static UChar gPercent = 0x25; 24 static UChar gPerMill = 0x2030; 25 static UChar gNegative = 0x2D; 26 static UChar gPositive = 0x2B; 27 28 #define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF))) 29 30 #define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F)) 31 32 #define UNPACK_LONG(c) (((c) >> 8) & 0x80) 33 34 #define UNPACK_LENGTH(c) ((c) & 0xFF) 35 36 U_NAMESPACE_BEGIN 37 38 static int32_t 39 nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { 40 if (buffer[idx] != 0x27 || idx + 1 == len) { 41 *token = buffer[idx]; 42 return 1; 43 } 44 *token = buffer[idx + 1]; 45 if (buffer[idx + 1] == 0xA4) { 46 int32_t i = 2; 47 for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i); 48 return i; 49 } 50 return 2; 51 } 52 53 static int32_t 54 nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { 55 *token = buffer[idx]; 56 int32_t max; 57 switch (buffer[idx]) { 58 case 0x27: 59 max = 2; 60 break; 61 case 0xA4: 62 max = 3; 63 break; 64 default: 65 max = 1; 66 break; 67 } 68 int32_t i = 1; 69 for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i); 70 return i; 71 } 72 73 CurrencyAffixInfo::CurrencyAffixInfo() 74 : fSymbol(gDefaultSymbols, 1), 75 fISO(gDefaultSymbols, 2), 76 fLong(DigitAffix(gDefaultSymbols, 3)), 77 fIsDefault(TRUE) { 78 } 79 80 void 81 CurrencyAffixInfo::set( 82 const char *locale, 83 const PluralRules *rules, 84 const UChar *currency, 85 UErrorCode &status) { 86 if (U_FAILURE(status)) { 87 return; 88 } 89 fIsDefault = FALSE; 90 if (currency == NULL) { 91 fSymbol.setTo(gDefaultSymbols, 1); 92 fISO.setTo(gDefaultSymbols, 2); 93 fLong.remove(); 94 fLong.append(gDefaultSymbols, 3); 95 fIsDefault = TRUE; 96 return; 97 } 98 int32_t len; 99 UBool unusedIsChoice; 100 const UChar *symbol = ucurr_getName( 101 currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice, 102 &len, &status); 103 if (U_FAILURE(status)) { 104 return; 105 } 106 fSymbol.setTo(symbol, len); 107 fISO.setTo(currency, u_strlen(currency)); 108 fLong.remove(); 109 StringEnumeration* keywords = rules->getKeywords(status); 110 if (U_FAILURE(status)) { 111 return; 112 } 113 const UnicodeString* pluralCount; 114 while ((pluralCount = keywords->snext(status)) != NULL) { 115 CharString pCount; 116 pCount.appendInvariantChars(*pluralCount, status); 117 const UChar *pluralName = ucurr_getPluralName( 118 currency, locale, &unusedIsChoice, pCount.data(), 119 &len, &status); 120 fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status); 121 } 122 delete keywords; 123 } 124 125 void 126 CurrencyAffixInfo::adjustPrecision( 127 const UChar *currency, const UCurrencyUsage usage, 128 FixedPrecision &precision, UErrorCode &status) { 129 if (U_FAILURE(status)) { 130 return; 131 } 132 133 int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage( 134 currency, usage, &status); 135 precision.fMin.setFracDigitCount(digitCount); 136 precision.fMax.setFracDigitCount(digitCount); 137 double increment = ucurr_getRoundingIncrementForUsage( 138 currency, usage, &status); 139 if (increment == 0.0) { 140 precision.fRoundingIncrement.clear(); 141 } else { 142 precision.fRoundingIncrement.set(increment); 143 // guard against round-off error 144 precision.fRoundingIncrement.round(6); 145 } 146 } 147 148 void 149 AffixPattern::addLiteral( 150 const UChar *literal, int32_t start, int32_t len) { 151 char32Count += u_countChar32(literal + start, len); 152 literals.append(literal, start, len); 153 int32_t tlen = tokens.length(); 154 // Takes 4 UChars to encode maximum literal length. 155 UChar *tokenChars = tokens.getBuffer(tlen + 4); 156 157 // find start of literal size. May be tlen if there is no literal. 158 // While finding start of literal size, compute literal length 159 int32_t literalLength = 0; 160 int32_t tLiteralStart = tlen; 161 while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) { 162 tLiteralStart--; 163 literalLength <<= 8; 164 literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]); 165 } 166 // Add number of chars we just added to literal 167 literalLength += len; 168 169 // Now encode the new length starting at tLiteralStart 170 tlen = tLiteralStart; 171 tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF); 172 literalLength >>= 8; 173 while (literalLength) { 174 tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF); 175 literalLength >>= 8; 176 } 177 tokens.releaseBuffer(tlen); 178 } 179 180 void 181 AffixPattern::add(ETokenType t) { 182 add(t, 1); 183 } 184 185 void 186 AffixPattern::addCurrency(uint8_t count) { 187 add(kCurrency, count); 188 } 189 190 void 191 AffixPattern::add(ETokenType t, uint8_t count) { 192 U_ASSERT(t != kLiteral); 193 char32Count += count; 194 switch (t) { 195 case kCurrency: 196 hasCurrencyToken = TRUE; 197 break; 198 case kPercent: 199 hasPercentToken = TRUE; 200 break; 201 case kPerMill: 202 hasPermillToken = TRUE; 203 break; 204 default: 205 // Do nothing 206 break; 207 } 208 tokens.append(PACK_TOKEN_AND_LENGTH(t, count)); 209 } 210 211 AffixPattern & 212 AffixPattern::append(const AffixPattern &other) { 213 AffixPatternIterator iter; 214 other.iterator(iter); 215 UnicodeString literal; 216 while (iter.nextToken()) { 217 switch (iter.getTokenType()) { 218 case kLiteral: 219 iter.getLiteral(literal); 220 addLiteral(literal.getBuffer(), 0, literal.length()); 221 break; 222 case kCurrency: 223 addCurrency(iter.getTokenLength()); 224 break; 225 default: 226 add(iter.getTokenType()); 227 break; 228 } 229 } 230 return *this; 231 } 232 233 void 234 AffixPattern::remove() { 235 tokens.remove(); 236 literals.remove(); 237 hasCurrencyToken = FALSE; 238 hasPercentToken = FALSE; 239 hasPermillToken = FALSE; 240 char32Count = 0; 241 } 242 243 // escapes literals for strings where special characters are NOT escaped 244 // except for apostrophe. 245 static void escapeApostropheInLiteral( 246 const UnicodeString &literal, UnicodeStringAppender &appender) { 247 int32_t len = literal.length(); 248 const UChar *buffer = literal.getBuffer(); 249 for (int32_t i = 0; i < len; ++i) { 250 UChar ch = buffer[i]; 251 switch (ch) { 252 case 0x27: 253 appender.append((UChar) 0x27); 254 appender.append((UChar) 0x27); 255 break; 256 default: 257 appender.append(ch); 258 break; 259 } 260 } 261 } 262 263 264 // escapes literals for user strings where special characters in literals 265 // are escaped with apostrophe. 266 static void escapeLiteral( 267 const UnicodeString &literal, UnicodeStringAppender &appender) { 268 int32_t len = literal.length(); 269 const UChar *buffer = literal.getBuffer(); 270 for (int32_t i = 0; i < len; ++i) { 271 UChar ch = buffer[i]; 272 switch (ch) { 273 case 0x27: 274 appender.append((UChar) 0x27); 275 appender.append((UChar) 0x27); 276 break; 277 case 0x25: 278 appender.append((UChar) 0x27); 279 appender.append((UChar) 0x25); 280 appender.append((UChar) 0x27); 281 break; 282 case 0x2030: 283 appender.append((UChar) 0x27); 284 appender.append((UChar) 0x2030); 285 appender.append((UChar) 0x27); 286 break; 287 case 0xA4: 288 appender.append((UChar) 0x27); 289 appender.append((UChar) 0xA4); 290 appender.append((UChar) 0x27); 291 break; 292 case 0x2D: 293 appender.append((UChar) 0x27); 294 appender.append((UChar) 0x2D); 295 appender.append((UChar) 0x27); 296 break; 297 case 0x2B: 298 appender.append((UChar) 0x27); 299 appender.append((UChar) 0x2B); 300 appender.append((UChar) 0x27); 301 break; 302 default: 303 appender.append(ch); 304 break; 305 } 306 } 307 } 308 309 UnicodeString & 310 AffixPattern::toString(UnicodeString &appendTo) const { 311 AffixPatternIterator iter; 312 iterator(iter); 313 UnicodeStringAppender appender(appendTo); 314 UnicodeString literal; 315 while (iter.nextToken()) { 316 switch (iter.getTokenType()) { 317 case kLiteral: 318 escapeApostropheInLiteral(iter.getLiteral(literal), appender); 319 break; 320 case kPercent: 321 appender.append((UChar) 0x27); 322 appender.append((UChar) 0x25); 323 break; 324 case kPerMill: 325 appender.append((UChar) 0x27); 326 appender.append((UChar) 0x2030); 327 break; 328 case kCurrency: 329 { 330 appender.append((UChar) 0x27); 331 int32_t cl = iter.getTokenLength(); 332 for (int32_t i = 0; i < cl; ++i) { 333 appender.append((UChar) 0xA4); 334 } 335 } 336 break; 337 case kNegative: 338 appender.append((UChar) 0x27); 339 appender.append((UChar) 0x2D); 340 break; 341 case kPositive: 342 appender.append((UChar) 0x27); 343 appender.append((UChar) 0x2B); 344 break; 345 default: 346 U_ASSERT(FALSE); 347 break; 348 } 349 } 350 return appendTo; 351 } 352 353 UnicodeString & 354 AffixPattern::toUserString(UnicodeString &appendTo) const { 355 AffixPatternIterator iter; 356 iterator(iter); 357 UnicodeStringAppender appender(appendTo); 358 UnicodeString literal; 359 while (iter.nextToken()) { 360 switch (iter.getTokenType()) { 361 case kLiteral: 362 escapeLiteral(iter.getLiteral(literal), appender); 363 break; 364 case kPercent: 365 appender.append((UChar) 0x25); 366 break; 367 case kPerMill: 368 appender.append((UChar) 0x2030); 369 break; 370 case kCurrency: 371 { 372 int32_t cl = iter.getTokenLength(); 373 for (int32_t i = 0; i < cl; ++i) { 374 appender.append((UChar) 0xA4); 375 } 376 } 377 break; 378 case kNegative: 379 appender.append((UChar) 0x2D); 380 break; 381 case kPositive: 382 appender.append((UChar) 0x2B); 383 break; 384 default: 385 U_ASSERT(FALSE); 386 break; 387 } 388 } 389 return appendTo; 390 } 391 392 class AffixPatternAppender : public UMemory { 393 public: 394 AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { } 395 396 inline void append(UChar x) { 397 if (fIdx == UPRV_LENGTHOF(fBuffer)) { 398 fDest->addLiteral(fBuffer, 0, fIdx); 399 fIdx = 0; 400 } 401 fBuffer[fIdx++] = x; 402 } 403 404 inline void append(UChar32 x) { 405 if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { 406 fDest->addLiteral(fBuffer, 0, fIdx); 407 fIdx = 0; 408 } 409 U16_APPEND_UNSAFE(fBuffer, fIdx, x); 410 } 411 412 inline void flush() { 413 if (fIdx) { 414 fDest->addLiteral(fBuffer, 0, fIdx); 415 } 416 fIdx = 0; 417 } 418 419 /** 420 * flush the buffer when we go out of scope. 421 */ 422 ~AffixPatternAppender() { 423 flush(); 424 } 425 private: 426 AffixPattern *fDest; 427 int32_t fIdx; 428 UChar fBuffer[32]; 429 AffixPatternAppender(const AffixPatternAppender &other); 430 AffixPatternAppender &operator=(const AffixPatternAppender &other); 431 }; 432 433 434 AffixPattern & 435 AffixPattern::parseUserAffixString( 436 const UnicodeString &affixStr, 437 AffixPattern &appendTo, 438 UErrorCode &status) { 439 if (U_FAILURE(status)) { 440 return appendTo; 441 } 442 int32_t len = affixStr.length(); 443 const UChar *buffer = affixStr.getBuffer(); 444 // 0 = not quoted; 1 = quoted. 445 int32_t state = 0; 446 AffixPatternAppender appender(appendTo); 447 for (int32_t i = 0; i < len; ) { 448 UChar token; 449 int32_t tokenSize = nextUserToken(buffer, i, len, &token); 450 i += tokenSize; 451 if (token == 0x27 && tokenSize == 1) { // quote 452 state = 1 - state; 453 continue; 454 } 455 if (state == 0) { 456 switch (token) { 457 case 0x25: 458 appender.flush(); 459 appendTo.add(kPercent, 1); 460 break; 461 case 0x27: // double quote 462 appender.append((UChar) 0x27); 463 break; 464 case 0x2030: 465 appender.flush(); 466 appendTo.add(kPerMill, 1); 467 break; 468 case 0x2D: 469 appender.flush(); 470 appendTo.add(kNegative, 1); 471 break; 472 case 0x2B: 473 appender.flush(); 474 appendTo.add(kPositive, 1); 475 break; 476 case 0xA4: 477 appender.flush(); 478 appendTo.add(kCurrency, tokenSize); 479 break; 480 default: 481 appender.append(token); 482 break; 483 } 484 } else { 485 switch (token) { 486 case 0x27: // double quote 487 appender.append((UChar) 0x27); 488 break; 489 case 0xA4: // included b/c tokenSize can be > 1 490 for (int32_t j = 0; j < tokenSize; ++j) { 491 appender.append((UChar) 0xA4); 492 } 493 break; 494 default: 495 appender.append(token); 496 break; 497 } 498 } 499 } 500 return appendTo; 501 } 502 503 AffixPattern & 504 AffixPattern::parseAffixString( 505 const UnicodeString &affixStr, 506 AffixPattern &appendTo, 507 UErrorCode &status) { 508 if (U_FAILURE(status)) { 509 return appendTo; 510 } 511 int32_t len = affixStr.length(); 512 const UChar *buffer = affixStr.getBuffer(); 513 for (int32_t i = 0; i < len; ) { 514 UChar token; 515 int32_t tokenSize = nextToken(buffer, i, len, &token); 516 if (tokenSize == 1) { 517 int32_t literalStart = i; 518 ++i; 519 while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) { 520 ++i; 521 } 522 appendTo.addLiteral(buffer, literalStart, i - literalStart); 523 524 // If we reached end of string, we are done 525 if (i == len) { 526 return appendTo; 527 } 528 } 529 i += tokenSize; 530 switch (token) { 531 case 0x25: 532 appendTo.add(kPercent, 1); 533 break; 534 case 0x2030: 535 appendTo.add(kPerMill, 1); 536 break; 537 case 0x2D: 538 appendTo.add(kNegative, 1); 539 break; 540 case 0x2B: 541 appendTo.add(kPositive, 1); 542 break; 543 case 0xA4: 544 { 545 if (tokenSize - 1 > 3) { 546 status = U_PARSE_ERROR; 547 return appendTo; 548 } 549 appendTo.add(kCurrency, tokenSize - 1); 550 } 551 break; 552 default: 553 appendTo.addLiteral(&token, 0, 1); 554 break; 555 } 556 } 557 return appendTo; 558 } 559 560 AffixPatternIterator & 561 AffixPattern::iterator(AffixPatternIterator &result) const { 562 result.nextLiteralIndex = 0; 563 result.lastLiteralLength = 0; 564 result.nextTokenIndex = 0; 565 result.tokens = &tokens; 566 result.literals = &literals; 567 return result; 568 } 569 570 UBool 571 AffixPatternIterator::nextToken() { 572 int32_t tlen = tokens->length(); 573 if (nextTokenIndex == tlen) { 574 return FALSE; 575 } 576 ++nextTokenIndex; 577 const UChar *tokenBuffer = tokens->getBuffer(); 578 if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) == 579 AffixPattern::kLiteral) { 580 while (nextTokenIndex < tlen && 581 UNPACK_LONG(tokenBuffer[nextTokenIndex])) { 582 ++nextTokenIndex; 583 } 584 lastLiteralLength = 0; 585 int32_t i = nextTokenIndex - 1; 586 for (; UNPACK_LONG(tokenBuffer[i]); --i) { 587 lastLiteralLength <<= 8; 588 lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); 589 } 590 lastLiteralLength <<= 8; 591 lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); 592 nextLiteralIndex += lastLiteralLength; 593 } 594 return TRUE; 595 } 596 597 AffixPattern::ETokenType 598 AffixPatternIterator::getTokenType() const { 599 return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1)); 600 } 601 602 UnicodeString & 603 AffixPatternIterator::getLiteral(UnicodeString &result) const { 604 const UChar *buffer = literals->getBuffer(); 605 result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength); 606 return result; 607 } 608 609 int32_t 610 AffixPatternIterator::getTokenLength() const { 611 const UChar *tokenBuffer = tokens->getBuffer(); 612 AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]); 613 return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]); 614 } 615 616 AffixPatternParser::AffixPatternParser() 617 : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) { 618 } 619 620 AffixPatternParser::AffixPatternParser( 621 const DecimalFormatSymbols &symbols) { 622 setDecimalFormatSymbols(symbols); 623 } 624 625 void 626 AffixPatternParser::setDecimalFormatSymbols( 627 const DecimalFormatSymbols &symbols) { 628 fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); 629 fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); 630 fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); 631 fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); 632 } 633 634 PluralAffix & 635 AffixPatternParser::parse( 636 const AffixPattern &affixPattern, 637 const CurrencyAffixInfo ¤cyAffixInfo, 638 PluralAffix &appendTo, 639 UErrorCode &status) const { 640 if (U_FAILURE(status)) { 641 return appendTo; 642 } 643 AffixPatternIterator iter; 644 affixPattern.iterator(iter); 645 UnicodeString literal; 646 while (iter.nextToken()) { 647 switch (iter.getTokenType()) { 648 case AffixPattern::kPercent: 649 appendTo.append(fPercent, UNUM_PERCENT_FIELD); 650 break; 651 case AffixPattern::kPerMill: 652 appendTo.append(fPermill, UNUM_PERMILL_FIELD); 653 break; 654 case AffixPattern::kNegative: 655 appendTo.append(fNegative, UNUM_SIGN_FIELD); 656 break; 657 case AffixPattern::kPositive: 658 appendTo.append(fPositive, UNUM_SIGN_FIELD); 659 break; 660 case AffixPattern::kCurrency: 661 switch (iter.getTokenLength()) { 662 case 1: 663 appendTo.append( 664 currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD); 665 break; 666 case 2: 667 appendTo.append( 668 currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD); 669 break; 670 case 3: 671 appendTo.append( 672 currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status); 673 break; 674 default: 675 U_ASSERT(FALSE); 676 break; 677 } 678 break; 679 case AffixPattern::kLiteral: 680 appendTo.append(iter.getLiteral(literal)); 681 break; 682 default: 683 U_ASSERT(FALSE); 684 break; 685 } 686 } 687 return appendTo; 688 } 689 690 691 U_NAMESPACE_END 692 #endif /* #if !UCONFIG_NO_FORMATTING */ 693