1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 package com.ibm.icu.impl; 4 5 import java.io.IOException; 6 import java.text.CharacterIterator; 7 import java.util.Locale; 8 9 import com.ibm.icu.lang.UCharacter; 10 import com.ibm.icu.lang.UCharacterCategory; 11 import com.ibm.icu.text.BreakIterator; 12 import com.ibm.icu.text.Edits; 13 import com.ibm.icu.util.ICUUncheckedIOException; 14 import com.ibm.icu.util.ULocale; 15 16 public final class CaseMapImpl { 17 /** 18 * Implementation of UCaseProps.ContextIterator, iterates over a String. 19 * See ustrcase.c/utf16_caseContextIterator(). 20 */ 21 public static final class StringContextIterator implements UCaseProps.ContextIterator { 22 /** 23 * Constructor. 24 * @param src String to iterate over. 25 */ 26 public StringContextIterator(CharSequence src) { 27 this.s=src; 28 limit=src.length(); 29 cpStart=cpLimit=index=0; 30 dir=0; 31 } 32 33 /** 34 * Set the iteration limit for nextCaseMapCP() to an index within the string. 35 * If the limit parameter is negative or past the string, then the 36 * string length is restored as the iteration limit. 37 * 38 * <p>This limit does not affect the next() function which always 39 * iterates to the very end of the string. 40 * 41 * @param lim The iteration limit. 42 */ 43 public void setLimit(int lim) { 44 if(0<=lim && lim<=s.length()) { 45 limit=lim; 46 } else { 47 limit=s.length(); 48 } 49 } 50 51 /** 52 * Move to the iteration limit without fetching code points up to there. 53 */ 54 public void moveToLimit() { 55 cpStart=cpLimit=limit; 56 } 57 58 /** 59 * Iterate forward through the string to fetch the next code point 60 * to be case-mapped, and set the context indexes for it. 61 * 62 * <p>When the iteration limit is reached (and -1 is returned), 63 * getCPStart() will be at the iteration limit. 64 * 65 * <p>Iteration with next() does not affect the position for nextCaseMapCP(). 66 * 67 * @return The next code point to be case-mapped, or <0 when the iteration is done. 68 */ 69 public int nextCaseMapCP() { 70 cpStart=cpLimit; 71 if(cpLimit<limit) { 72 int c=Character.codePointAt(s, cpLimit); 73 cpLimit+=Character.charCount(c); 74 return c; 75 } else { 76 return -1; 77 } 78 } 79 80 /** 81 * Returns the start of the code point that was last returned 82 * by nextCaseMapCP(). 83 */ 84 public int getCPStart() { 85 return cpStart; 86 } 87 88 /** 89 * Returns the limit of the code point that was last returned 90 * by nextCaseMapCP(). 91 */ 92 public int getCPLimit() { 93 return cpLimit; 94 } 95 96 public int getCPLength() { 97 return cpLimit-cpStart; 98 } 99 100 // implement UCaseProps.ContextIterator 101 // The following code is not used anywhere in this private class 102 @Override 103 public void reset(int direction) { 104 if(direction>0) { 105 /* reset for forward iteration */ 106 dir=1; 107 index=cpLimit; 108 } else if(direction<0) { 109 /* reset for backward iteration */ 110 dir=-1; 111 index=cpStart; 112 } else { 113 // not a valid direction 114 dir=0; 115 index=0; 116 } 117 } 118 119 @Override 120 public int next() { 121 int c; 122 123 if(dir>0 && index<s.length()) { 124 c=Character.codePointAt(s, index); 125 index+=Character.charCount(c); 126 return c; 127 } else if(dir<0 && index>0) { 128 c=Character.codePointBefore(s, index); 129 index-=Character.charCount(c); 130 return c; 131 } 132 return -1; 133 } 134 135 // variables 136 protected CharSequence s; 137 protected int index, limit, cpStart, cpLimit; 138 protected int dir; // 0=initial state >0=forward <0=backward 139 } 140 141 public static final int TITLECASE_WHOLE_STRING = 0x20; 142 public static final int TITLECASE_SENTENCES = 0x40; 143 144 /** 145 * Bit mask for the titlecasing iterator options bit field. 146 * Currently only 3 out of 8 values are used: 147 * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES. 148 * See stringoptions.h. 149 * @internal 150 */ 151 private static final int TITLECASE_ITERATOR_MASK = 0xe0; 152 153 public static final int TITLECASE_ADJUST_TO_CASED = 0x400; 154 155 /** 156 * Bit mask for the titlecasing index adjustment options bit set. 157 * Currently two bits are defined: 158 * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED. 159 * See stringoptions.h. 160 * @internal 161 */ 162 private static final int TITLECASE_ADJUSTMENT_MASK = 0x600; 163 164 public static int addTitleAdjustmentOption(int options, int newOption) { 165 int adjOptions = options & TITLECASE_ADJUSTMENT_MASK; 166 if (adjOptions !=0 && adjOptions != newOption) { 167 throw new IllegalArgumentException("multiple titlecasing index adjustment options"); 168 } 169 return options | newOption; 170 } 171 172 private static final int LNS = 173 (1 << UCharacterCategory.UPPERCASE_LETTER) | 174 (1 << UCharacterCategory.LOWERCASE_LETTER) | 175 (1 << UCharacterCategory.TITLECASE_LETTER) | 176 // Not MODIFIER_LETTER: We count only cased modifier letters. 177 (1 << UCharacterCategory.OTHER_LETTER) | 178 179 (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) | 180 (1 << UCharacterCategory.LETTER_NUMBER) | 181 (1 << UCharacterCategory.OTHER_NUMBER) | 182 183 (1 << UCharacterCategory.MATH_SYMBOL) | 184 (1 << UCharacterCategory.CURRENCY_SYMBOL) | 185 (1 << UCharacterCategory.MODIFIER_SYMBOL) | 186 (1 << UCharacterCategory.OTHER_SYMBOL) | 187 188 (1 << UCharacterCategory.PRIVATE_USE); 189 190 private static boolean isLNS(int c) { 191 // Letter, number, symbol, 192 // or a private use code point because those are typically used as letters or numbers. 193 // Consider modifier letters only if they are cased. 194 int gc = UCharacterProperty.INSTANCE.getType(c); 195 return ((1 << gc) & LNS) != 0 || 196 (gc == UCharacterCategory.MODIFIER_LETTER && 197 UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE); 198 } 199 200 public static int addTitleIteratorOption(int options, int newOption) { 201 int iterOptions = options & TITLECASE_ITERATOR_MASK; 202 if (iterOptions !=0 && iterOptions != newOption) { 203 throw new IllegalArgumentException("multiple titlecasing iterator options"); 204 } 205 return options | newOption; 206 } 207 208 public static BreakIterator getTitleBreakIterator( 209 Locale locale, int options, BreakIterator iter) { 210 options &= TITLECASE_ITERATOR_MASK; 211 if (options != 0 && iter != null) { 212 throw new IllegalArgumentException( 213 "titlecasing iterator option together with an explicit iterator"); 214 } 215 if (iter == null) { 216 switch (options) { 217 case 0: 218 iter = BreakIterator.getWordInstance(locale); 219 break; 220 case TITLECASE_WHOLE_STRING: 221 iter = new WholeStringBreakIterator(); 222 break; 223 case TITLECASE_SENTENCES: 224 iter = BreakIterator.getSentenceInstance(locale); 225 break; 226 default: 227 throw new IllegalArgumentException("unknown titlecasing iterator option"); 228 } 229 } 230 return iter; 231 } 232 233 public static BreakIterator getTitleBreakIterator( 234 ULocale locale, int options, BreakIterator iter) { 235 options &= TITLECASE_ITERATOR_MASK; 236 if (options != 0 && iter != null) { 237 throw new IllegalArgumentException( 238 "titlecasing iterator option together with an explicit iterator"); 239 } 240 if (iter == null) { 241 switch (options) { 242 case 0: 243 iter = BreakIterator.getWordInstance(locale); 244 break; 245 case TITLECASE_WHOLE_STRING: 246 iter = new WholeStringBreakIterator(); 247 break; 248 case TITLECASE_SENTENCES: 249 iter = BreakIterator.getSentenceInstance(locale); 250 break; 251 default: 252 throw new IllegalArgumentException("unknown titlecasing iterator option"); 253 } 254 } 255 return iter; 256 } 257 258 /** 259 * Omit unchanged text when case-mapping with Edits. 260 */ 261 public static final int OMIT_UNCHANGED_TEXT = 0x4000; 262 263 private static final class WholeStringBreakIterator extends BreakIterator { 264 private int length; 265 266 private static void notImplemented() { 267 throw new UnsupportedOperationException("should not occur"); 268 } 269 270 @Override 271 public int first() { 272 return 0; 273 } 274 275 @Override 276 public int last() { 277 notImplemented(); 278 return 0; 279 } 280 281 @Override 282 public int next(int n) { 283 notImplemented(); 284 return 0; 285 } 286 287 @Override 288 public int next() { 289 return length; 290 } 291 292 @Override 293 public int previous() { 294 notImplemented(); 295 return 0; 296 } 297 298 @Override 299 public int following(int offset) { 300 notImplemented(); 301 return 0; 302 } 303 304 @Override 305 public int current() { 306 notImplemented(); 307 return 0; 308 } 309 310 @Override 311 public CharacterIterator getText() { 312 notImplemented(); 313 return null; 314 } 315 316 @Override 317 public void setText(CharacterIterator newText) { 318 length = newText.getEndIndex(); 319 } 320 321 @Override 322 public void setText(CharSequence newText) { 323 length = newText.length(); 324 } 325 326 @Override 327 public void setText(String newText) { 328 length = newText.length(); 329 } 330 } 331 332 private static int appendCodePoint(Appendable a, int c) throws IOException { 333 if (c <= Character.MAX_VALUE) { 334 a.append((char)c); 335 return 1; 336 } else { 337 a.append((char)(0xd7c0 + (c >> 10))); 338 a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff))); 339 return 2; 340 } 341 } 342 343 /** 344 * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. 345 * @throws IOException 346 */ 347 private static void appendResult(int result, Appendable dest, 348 int cpLength, int options, Edits edits) throws IOException { 349 // Decode the result. 350 if (result < 0) { 351 // (not) original code point 352 if (edits != null) { 353 edits.addUnchanged(cpLength); 354 } 355 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 356 return; 357 } 358 appendCodePoint(dest, ~result); 359 } else if (result <= UCaseProps.MAX_STRING_LENGTH) { 360 // The mapping has already been appended to result. 361 if (edits != null) { 362 edits.addReplace(cpLength, result); 363 } 364 } else { 365 // Append the single-code point mapping. 366 int length = appendCodePoint(dest, result); 367 if (edits != null) { 368 edits.addReplace(cpLength, length); 369 } 370 } 371 } 372 373 private static final void appendUnchanged(CharSequence src, int start, int length, 374 Appendable dest, int options, Edits edits) throws IOException { 375 if (length > 0) { 376 if (edits != null) { 377 edits.addUnchanged(length); 378 } 379 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 380 return; 381 } 382 dest.append(src, start, start + length); 383 } 384 } 385 386 private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) { 387 if (!edits.hasChanges()) { 388 return src.toString(); 389 } 390 StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta()); 391 for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { 392 if (ei.hasChange()) { 393 int i = ei.replacementIndex(); 394 result.append(replacementChars, i, i + ei.newLength()); 395 } else { 396 int i = ei.sourceIndex(); 397 result.append(src, i, i + ei.oldLength()); 398 } 399 } 400 return result.toString(); 401 } 402 403 private static void internalToLower(int caseLocale, int options, StringContextIterator iter, 404 Appendable dest, Edits edits) throws IOException { 405 int c; 406 while ((c = iter.nextCaseMapCP()) >= 0) { 407 c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale); 408 appendResult(c, dest, iter.getCPLength(), options, edits); 409 } 410 } 411 412 public static String toLower(int caseLocale, int options, CharSequence src) { 413 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 414 if (src.length() == 0) { 415 return src.toString(); 416 } 417 // Collect and apply only changes. 418 // Good if no or few changes. Bad (slow) if many changes. 419 Edits edits = new Edits(); 420 StringBuilder replacementChars = toLower( 421 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 422 return applyEdits(src, replacementChars, edits); 423 } else { 424 return toLower(caseLocale, options, src, 425 new StringBuilder(src.length()), null).toString(); 426 } 427 } 428 429 public static <A extends Appendable> A toLower(int caseLocale, int options, 430 CharSequence src, A dest, Edits edits) { 431 try { 432 if (edits != null) { 433 edits.reset(); 434 } 435 StringContextIterator iter = new StringContextIterator(src); 436 internalToLower(caseLocale, options, iter, dest, edits); 437 return dest; 438 } catch (IOException e) { 439 throw new ICUUncheckedIOException(e); 440 } 441 } 442 443 public static String toUpper(int caseLocale, int options, CharSequence src) { 444 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 445 if (src.length() == 0) { 446 return src.toString(); 447 } 448 // Collect and apply only changes. 449 // Good if no or few changes. Bad (slow) if many changes. 450 Edits edits = new Edits(); 451 StringBuilder replacementChars = toUpper( 452 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 453 return applyEdits(src, replacementChars, edits); 454 } else { 455 return toUpper(caseLocale, options, src, 456 new StringBuilder(src.length()), null).toString(); 457 } 458 } 459 460 public static <A extends Appendable> A toUpper(int caseLocale, int options, 461 CharSequence src, A dest, Edits edits) { 462 try { 463 if (edits != null) { 464 edits.reset(); 465 } 466 if (caseLocale == UCaseProps.LOC_GREEK) { 467 return GreekUpper.toUpper(options, src, dest, edits); 468 } 469 StringContextIterator iter = new StringContextIterator(src); 470 int c; 471 while ((c = iter.nextCaseMapCP()) >= 0) { 472 c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale); 473 appendResult(c, dest, iter.getCPLength(), options, edits); 474 } 475 return dest; 476 } catch (IOException e) { 477 throw new ICUUncheckedIOException(e); 478 } 479 } 480 481 public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) { 482 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 483 if (src.length() == 0) { 484 return src.toString(); 485 } 486 // Collect and apply only changes. 487 // Good if no or few changes. Bad (slow) if many changes. 488 Edits edits = new Edits(); 489 StringBuilder replacementChars = toTitle( 490 caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src, 491 new StringBuilder(), edits); 492 return applyEdits(src, replacementChars, edits); 493 } else { 494 return toTitle(caseLocale, options, iter, src, 495 new StringBuilder(src.length()), null).toString(); 496 } 497 } 498 499 public static <A extends Appendable> A toTitle( 500 int caseLocale, int options, BreakIterator titleIter, 501 CharSequence src, A dest, Edits edits) { 502 try { 503 if (edits != null) { 504 edits.reset(); 505 } 506 507 /* set up local variables */ 508 StringContextIterator iter = new StringContextIterator(src); 509 int srcLength = src.length(); 510 int prev=0; 511 boolean isFirstIndex=true; 512 513 /* titlecasing loop */ 514 while(prev<srcLength) { 515 /* find next index where to titlecase */ 516 int index; 517 if(isFirstIndex) { 518 isFirstIndex=false; 519 index=titleIter.first(); 520 } else { 521 index=titleIter.next(); 522 } 523 if(index==BreakIterator.DONE || index>srcLength) { 524 index=srcLength; 525 } 526 527 /* 528 * Segment [prev..index[ into 3 parts: 529 * a) skipped characters (copy as-is) [prev..titleStart[ 530 * b) first letter (titlecase) [titleStart..titleLimit[ 531 * c) subsequent characters (lowercase) [titleLimit..index[ 532 */ 533 if(prev<index) { 534 // Find and copy skipped characters [prev..titleStart[ 535 int titleStart=prev; 536 iter.setLimit(index); 537 int c=iter.nextCaseMapCP(); 538 if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) { 539 // Adjust the titlecasing index to the next cased character, 540 // or to the next letter/number/symbol/private use. 541 // Stop with titleStart<titleLimit<=index 542 // if there is a character to be titlecased, 543 // or else stop with titleStart==titleLimit==index. 544 boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0; 545 while ((toCased ? 546 UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) : 547 !CaseMapImpl.isLNS(c)) && 548 (c=iter.nextCaseMapCP())>=0) {} 549 // If c<0 then we have only uncased characters in [prev..index[ 550 // and stopped with titleStart==titleLimit==index. 551 titleStart=iter.getCPStart(); 552 if (prev < titleStart) { 553 appendUnchanged(src, prev, titleStart-prev, dest, options, edits); 554 } 555 } 556 557 if(titleStart<index) { 558 int titleLimit=iter.getCPLimit(); 559 // titlecase c which is from [titleStart..titleLimit[ 560 c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale); 561 appendResult(c, dest, iter.getCPLength(), options, edits); 562 563 // Special case Dutch IJ titlecasing 564 if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) { 565 char c1 = src.charAt(titleStart); 566 if ((c1 == 'i' || c1 == 'I')) { 567 char c2 = src.charAt(titleStart+1); 568 if (c2 == 'j') { 569 dest.append('J'); 570 if (edits != null) { 571 edits.addReplace(1, 1); 572 } 573 c = iter.nextCaseMapCP(); 574 titleLimit++; 575 assert c == c2; 576 assert titleLimit == iter.getCPLimit(); 577 } else if (c2 == 'J') { 578 // Keep the capital J from getting lowercased. 579 appendUnchanged(src, titleStart + 1, 1, dest, options, edits); 580 c = iter.nextCaseMapCP(); 581 titleLimit++; 582 assert c == c2; 583 assert titleLimit == iter.getCPLimit(); 584 } 585 } 586 } 587 588 // lowercase [titleLimit..index[ 589 if(titleLimit<index) { 590 if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) { 591 // Normal operation: Lowercase the rest of the word. 592 internalToLower(caseLocale, options, iter, dest, edits); 593 } else { 594 // Optionally just copy the rest of the word unchanged. 595 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits); 596 iter.moveToLimit(); 597 } 598 } 599 } 600 } 601 602 prev=index; 603 } 604 return dest; 605 } catch (IOException e) { 606 throw new ICUUncheckedIOException(e); 607 } 608 } 609 610 public static String fold(int options, CharSequence src) { 611 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 612 if (src.length() == 0) { 613 return src.toString(); 614 } 615 // Collect and apply only changes. 616 // Good if no or few changes. Bad (slow) if many changes. 617 Edits edits = new Edits(); 618 StringBuilder replacementChars = fold( 619 options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 620 return applyEdits(src, replacementChars, edits); 621 } else { 622 return fold(options, src, new StringBuilder(src.length()), null).toString(); 623 } 624 } 625 626 public static <A extends Appendable> A fold(int options, 627 CharSequence src, A dest, Edits edits) { 628 try { 629 if (edits != null) { 630 edits.reset(); 631 } 632 int length = src.length(); 633 for (int i = 0; i < length;) { 634 int c = Character.codePointAt(src, i); 635 int cpLength = Character.charCount(c); 636 i += cpLength; 637 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options); 638 appendResult(c, dest, cpLength, options, edits); 639 } 640 return dest; 641 } catch (IOException e) { 642 throw new ICUUncheckedIOException(e); 643 } 644 } 645 646 private static final class GreekUpper { 647 // Data bits. 648 private static final int UPPER_MASK = 0x3ff; 649 private static final int HAS_VOWEL = 0x1000; 650 private static final int HAS_YPOGEGRAMMENI = 0x2000; 651 private static final int HAS_ACCENT = 0x4000; 652 private static final int HAS_DIALYTIKA = 0x8000; 653 // Further bits during data building and processing, not stored in the data map. 654 private static final int HAS_COMBINING_DIALYTIKA = 0x10000; 655 private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000; 656 657 private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; 658 private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = 659 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; 660 private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; 661 662 // State bits. 663 private static final int AFTER_CASED = 1; 664 private static final int AFTER_VOWEL_WITH_ACCENT = 2; 665 666 // Data generated by prototype code, see 667 // http://site.icu-project.org/design/case/greek-upper 668 // TODO: Move this data into ucase.icu. 669 private static final char[] data0370 = { 670 // U+0370..03FF 671 0x0370, // 672 0x0370, // 673 0x0372, // 674 0x0372, // 675 0, 676 0, 677 0x0376, // 678 0x0376, // 679 0, 680 0, 681 0x037A, // 682 0x03FD, // 683 0x03FE, // 684 0x03FF, // 685 0, 686 0x037F, // 687 0, 688 0, 689 0, 690 0, 691 0, 692 0, 693 0x0391 | HAS_VOWEL | HAS_ACCENT, // 694 0, 695 0x0395 | HAS_VOWEL | HAS_ACCENT, // 696 0x0397 | HAS_VOWEL | HAS_ACCENT, // 697 0x0399 | HAS_VOWEL | HAS_ACCENT, // 698 0, 699 0x039F | HAS_VOWEL | HAS_ACCENT, // 700 0, 701 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 702 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 703 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 704 0x0391 | HAS_VOWEL, // 705 0x0392, // 706 0x0393, // 707 0x0394, // 708 0x0395 | HAS_VOWEL, // 709 0x0396, // 710 0x0397 | HAS_VOWEL, // 711 0x0398, // 712 0x0399 | HAS_VOWEL, // 713 0x039A, // 714 0x039B, // 715 0x039C, // 716 0x039D, // 717 0x039E, // 718 0x039F | HAS_VOWEL, // 719 0x03A0, // 720 0x03A1, // 721 0, 722 0x03A3, // 723 0x03A4, // 724 0x03A5 | HAS_VOWEL, // 725 0x03A6, // 726 0x03A7, // 727 0x03A8, // 728 0x03A9 | HAS_VOWEL, // 729 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // 730 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // 731 0x0391 | HAS_VOWEL | HAS_ACCENT, // 732 0x0395 | HAS_VOWEL | HAS_ACCENT, // 733 0x0397 | HAS_VOWEL | HAS_ACCENT, // 734 0x0399 | HAS_VOWEL | HAS_ACCENT, // 735 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 736 0x0391 | HAS_VOWEL, // 737 0x0392, // 738 0x0393, // 739 0x0394, // 740 0x0395 | HAS_VOWEL, // 741 0x0396, // 742 0x0397 | HAS_VOWEL, // 743 0x0398, // 744 0x0399 | HAS_VOWEL, // 745 0x039A, // 746 0x039B, // 747 0x039C, // 748 0x039D, // 749 0x039E, // 750 0x039F | HAS_VOWEL, // 751 0x03A0, // 752 0x03A1, // 753 0x03A3, // 754 0x03A3, // 755 0x03A4, // 756 0x03A5 | HAS_VOWEL, // 757 0x03A6, // 758 0x03A7, // 759 0x03A8, // 760 0x03A9 | HAS_VOWEL, // 761 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // 762 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // 763 0x039F | HAS_VOWEL | HAS_ACCENT, // 764 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 765 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 766 0x03CF, // 767 0x0392, // 768 0x0398, // 769 0x03D2, // 770 0x03D2 | HAS_ACCENT, // 771 0x03D2 | HAS_DIALYTIKA, // 772 0x03A6, // 773 0x03A0, // 774 0x03CF, // 775 0x03D8, // 776 0x03D8, // 777 0x03DA, // 778 0x03DA, // 779 0x03DC, // 780 0x03DC, // 781 0x03DE, // 782 0x03DE, // 783 0x03E0, // 784 0x03E0, // 785 0, 786 0, 787 0, 788 0, 789 0, 790 0, 791 0, 792 0, 793 0, 794 0, 795 0, 796 0, 797 0, 798 0, 799 0x039A, // 800 0x03A1, // 801 0x03F9, // 802 0x037F, // 803 0x03F4, // 804 0x0395 | HAS_VOWEL, // 805 0, 806 0x03F7, // 807 0x03F7, // 808 0x03F9, // 809 0x03FA, // 810 0x03FA, // 811 0x03FC, // 812 0x03FD, // 813 0x03FE, // 814 0x03FF, // 815 }; 816 817 private static final char[] data1F00 = { 818 // U+1F00..1FFF 819 0x0391 | HAS_VOWEL, // 820 0x0391 | HAS_VOWEL, // 821 0x0391 | HAS_VOWEL | HAS_ACCENT, // 822 0x0391 | HAS_VOWEL | HAS_ACCENT, // 823 0x0391 | HAS_VOWEL | HAS_ACCENT, // 824 0x0391 | HAS_VOWEL | HAS_ACCENT, // 825 0x0391 | HAS_VOWEL | HAS_ACCENT, // 826 0x0391 | HAS_VOWEL | HAS_ACCENT, // 827 0x0391 | HAS_VOWEL, // 828 0x0391 | HAS_VOWEL, // 829 0x0391 | HAS_VOWEL | HAS_ACCENT, // 830 0x0391 | HAS_VOWEL | HAS_ACCENT, // 831 0x0391 | HAS_VOWEL | HAS_ACCENT, // 832 0x0391 | HAS_VOWEL | HAS_ACCENT, // 833 0x0391 | HAS_VOWEL | HAS_ACCENT, // 834 0x0391 | HAS_VOWEL | HAS_ACCENT, // 835 0x0395 | HAS_VOWEL, // 836 0x0395 | HAS_VOWEL, // 837 0x0395 | HAS_VOWEL | HAS_ACCENT, // 838 0x0395 | HAS_VOWEL | HAS_ACCENT, // 839 0x0395 | HAS_VOWEL | HAS_ACCENT, // 840 0x0395 | HAS_VOWEL | HAS_ACCENT, // 841 0, 842 0, 843 0x0395 | HAS_VOWEL, // 844 0x0395 | HAS_VOWEL, // 845 0x0395 | HAS_VOWEL | HAS_ACCENT, // 846 0x0395 | HAS_VOWEL | HAS_ACCENT, // 847 0x0395 | HAS_VOWEL | HAS_ACCENT, // 848 0x0395 | HAS_VOWEL | HAS_ACCENT, // 849 0, 850 0, 851 0x0397 | HAS_VOWEL, // 852 0x0397 | HAS_VOWEL, // 853 0x0397 | HAS_VOWEL | HAS_ACCENT, // 854 0x0397 | HAS_VOWEL | HAS_ACCENT, // 855 0x0397 | HAS_VOWEL | HAS_ACCENT, // 856 0x0397 | HAS_VOWEL | HAS_ACCENT, // 857 0x0397 | HAS_VOWEL | HAS_ACCENT, // 858 0x0397 | HAS_VOWEL | HAS_ACCENT, // 859 0x0397 | HAS_VOWEL, // 860 0x0397 | HAS_VOWEL, // 861 0x0397 | HAS_VOWEL | HAS_ACCENT, // 862 0x0397 | HAS_VOWEL | HAS_ACCENT, // 863 0x0397 | HAS_VOWEL | HAS_ACCENT, // 864 0x0397 | HAS_VOWEL | HAS_ACCENT, // 865 0x0397 | HAS_VOWEL | HAS_ACCENT, // 866 0x0397 | HAS_VOWEL | HAS_ACCENT, // 867 0x0399 | HAS_VOWEL, // 868 0x0399 | HAS_VOWEL, // 869 0x0399 | HAS_VOWEL | HAS_ACCENT, // 870 0x0399 | HAS_VOWEL | HAS_ACCENT, // 871 0x0399 | HAS_VOWEL | HAS_ACCENT, // 872 0x0399 | HAS_VOWEL | HAS_ACCENT, // 873 0x0399 | HAS_VOWEL | HAS_ACCENT, // 874 0x0399 | HAS_VOWEL | HAS_ACCENT, // 875 0x0399 | HAS_VOWEL, // 876 0x0399 | HAS_VOWEL, // 877 0x0399 | HAS_VOWEL | HAS_ACCENT, // 878 0x0399 | HAS_VOWEL | HAS_ACCENT, // 879 0x0399 | HAS_VOWEL | HAS_ACCENT, // 880 0x0399 | HAS_VOWEL | HAS_ACCENT, // 881 0x0399 | HAS_VOWEL | HAS_ACCENT, // 882 0x0399 | HAS_VOWEL | HAS_ACCENT, // 883 0x039F | HAS_VOWEL, // 884 0x039F | HAS_VOWEL, // 885 0x039F | HAS_VOWEL | HAS_ACCENT, // 886 0x039F | HAS_VOWEL | HAS_ACCENT, // 887 0x039F | HAS_VOWEL | HAS_ACCENT, // 888 0x039F | HAS_VOWEL | HAS_ACCENT, // 889 0, 890 0, 891 0x039F | HAS_VOWEL, // 892 0x039F | HAS_VOWEL, // 893 0x039F | HAS_VOWEL | HAS_ACCENT, // 894 0x039F | HAS_VOWEL | HAS_ACCENT, // 895 0x039F | HAS_VOWEL | HAS_ACCENT, // 896 0x039F | HAS_VOWEL | HAS_ACCENT, // 897 0, 898 0, 899 0x03A5 | HAS_VOWEL, // 900 0x03A5 | HAS_VOWEL, // 901 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 902 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 903 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 904 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 905 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 906 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 907 0, 908 0x03A5 | HAS_VOWEL, // 909 0, 910 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 911 0, 912 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 913 0, 914 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 915 0x03A9 | HAS_VOWEL, // 916 0x03A9 | HAS_VOWEL, // 917 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 918 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 919 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 920 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 921 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 922 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 923 0x03A9 | HAS_VOWEL, // 924 0x03A9 | HAS_VOWEL, // 925 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 926 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 927 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 928 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 929 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 930 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 931 0x0391 | HAS_VOWEL | HAS_ACCENT, // 932 0x0391 | HAS_VOWEL | HAS_ACCENT, // 933 0x0395 | HAS_VOWEL | HAS_ACCENT, // 934 0x0395 | HAS_VOWEL | HAS_ACCENT, // 935 0x0397 | HAS_VOWEL | HAS_ACCENT, // 936 0x0397 | HAS_VOWEL | HAS_ACCENT, // 937 0x0399 | HAS_VOWEL | HAS_ACCENT, // 938 0x0399 | HAS_VOWEL | HAS_ACCENT, // 939 0x039F | HAS_VOWEL | HAS_ACCENT, // 940 0x039F | HAS_VOWEL | HAS_ACCENT, // 941 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 942 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 943 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 944 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 945 0, 946 0, 947 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 948 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 949 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 950 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 951 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 952 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 953 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 954 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 955 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 956 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 957 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 958 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 959 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 960 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 961 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 962 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 963 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 964 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 965 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 966 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 967 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 968 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 969 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 970 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 971 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 972 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 973 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 974 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 975 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 976 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 977 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 978 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 979 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 980 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 981 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 982 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 983 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 984 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 985 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 986 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 987 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 988 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 989 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 990 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 991 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 992 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 993 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 994 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 995 0x0391 | HAS_VOWEL, // 996 0x0391 | HAS_VOWEL, // 997 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 998 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 999 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1000 0, 1001 0x0391 | HAS_VOWEL | HAS_ACCENT, // 1002 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1003 0x0391 | HAS_VOWEL, // 1004 0x0391 | HAS_VOWEL, // 1005 0x0391 | HAS_VOWEL | HAS_ACCENT, // 1006 0x0391 | HAS_VOWEL | HAS_ACCENT, // 1007 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1008 0, 1009 0x0399 | HAS_VOWEL, // 1010 0, 1011 0, 1012 0, 1013 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1014 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1015 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1016 0, 1017 0x0397 | HAS_VOWEL | HAS_ACCENT, // 1018 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1019 0x0395 | HAS_VOWEL | HAS_ACCENT, // 1020 0x0395 | HAS_VOWEL | HAS_ACCENT, // 1021 0x0397 | HAS_VOWEL | HAS_ACCENT, // 1022 0x0397 | HAS_VOWEL | HAS_ACCENT, // 1023 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1024 0, 1025 0, 1026 0, 1027 0x0399 | HAS_VOWEL, // 1028 0x0399 | HAS_VOWEL, // 1029 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1030 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1031 0, 1032 0, 1033 0x0399 | HAS_VOWEL | HAS_ACCENT, // 1034 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1035 0x0399 | HAS_VOWEL, // 1036 0x0399 | HAS_VOWEL, // 1037 0x0399 | HAS_VOWEL | HAS_ACCENT, // 1038 0x0399 | HAS_VOWEL | HAS_ACCENT, // 1039 0, 1040 0, 1041 0, 1042 0, 1043 0x03A5 | HAS_VOWEL, // 1044 0x03A5 | HAS_VOWEL, // 1045 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1046 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1047 0x03A1, // 1048 0x03A1, // 1049 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 1050 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1051 0x03A5 | HAS_VOWEL, // 1052 0x03A5 | HAS_VOWEL, // 1053 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 1054 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 1055 0x03A1, // 1056 0, 1057 0, 1058 0, 1059 0, 1060 0, 1061 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1062 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1063 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1064 0, 1065 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 1066 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1067 0x039F | HAS_VOWEL | HAS_ACCENT, // 1068 0x039F | HAS_VOWEL | HAS_ACCENT, // 1069 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 1070 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 1071 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1072 0, 1073 0, 1074 0, 1075 }; 1076 1077 // U+2126 Ohm sign 1078 private static final char data2126 = 0x03A9 | HAS_VOWEL; // 1079 1080 private static final int getLetterData(int c) { 1081 if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) { 1082 return 0; 1083 } else if (c <= 0x3ff) { 1084 return data0370[c - 0x370]; 1085 } else if (c <= 0x1fff) { 1086 return data1F00[c - 0x1f00]; 1087 } else if (c == 0x2126) { 1088 return data2126; 1089 } else { 1090 return 0; 1091 } 1092 } 1093 1094 /** 1095 * Returns a non-zero value for each of the Greek combining diacritics 1096 * listed in The Unicode Standard, version 8, chapter 7.2 Greek, 1097 * plus some perispomeni look-alikes. 1098 */ 1099 private static final int getDiacriticData(int c) { 1100 switch (c) { 1101 case '\u0300': // varia 1102 case '\u0301': // tonos = oxia 1103 case '\u0342': // perispomeni 1104 case '\u0302': // circumflex can look like perispomeni 1105 case '\u0303': // tilde can look like perispomeni 1106 case '\u0311': // inverted breve can look like perispomeni 1107 return HAS_ACCENT; 1108 case '\u0308': // dialytika = diaeresis 1109 return HAS_COMBINING_DIALYTIKA; 1110 case '\u0344': // dialytika tonos 1111 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT; 1112 case '\u0345': // ypogegrammeni = iota subscript 1113 return HAS_YPOGEGRAMMENI; 1114 case '\u0304': // macron 1115 case '\u0306': // breve 1116 case '\u0313': // comma above 1117 case '\u0314': // reversed comma above 1118 case '\u0343': // koronis 1119 return HAS_OTHER_GREEK_DIACRITIC; 1120 default: 1121 return 0; 1122 } 1123 } 1124 1125 private static boolean isFollowedByCasedLetter(CharSequence s, int i) { 1126 while (i < s.length()) { 1127 int c = Character.codePointAt(s, i); 1128 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1129 if ((type & UCaseProps.IGNORABLE) != 0) { 1130 // Case-ignorable, continue with the loop. 1131 i += Character.charCount(c); 1132 } else if (type != UCaseProps.NONE) { 1133 return true; // Followed by cased letter. 1134 } else { 1135 return false; // Uncased and not case-ignorable. 1136 } 1137 } 1138 return false; // Not followed by cased letter. 1139 } 1140 1141 /** 1142 * Greek string uppercasing with a state machine. 1143 * Probably simpler than a stateless function that has to figure out complex context-before 1144 * for each character. 1145 * TODO: Try to re-consolidate one way or another with the non-Greek function. 1146 * 1147 * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8). 1148 * @throws IOException 1149 */ 1150 private static <A extends Appendable> A toUpper(int options, 1151 CharSequence src, A dest, Edits edits) throws IOException { 1152 int state = 0; 1153 for (int i = 0; i < src.length();) { 1154 int c = Character.codePointAt(src, i); 1155 int nextIndex = i + Character.charCount(c); 1156 int nextState = 0; 1157 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1158 if ((type & UCaseProps.IGNORABLE) != 0) { 1159 // c is case-ignorable 1160 nextState |= (state & AFTER_CASED); 1161 } else if (type != UCaseProps.NONE) { 1162 // c is cased 1163 nextState |= AFTER_CASED; 1164 } 1165 int data = getLetterData(c); 1166 if (data > 0) { 1167 int upper = data & UPPER_MASK; 1168 // Add a dialytika to this iota or ypsilon vowel 1169 // if we removed a tonos from the previous vowel, 1170 // and that previous vowel did not also have (or gain) a dialytika. 1171 // Adding one only to the final vowel in a longer sequence 1172 // (which does not occur in normal writing) would require lookahead. 1173 // Set the same flag as for preserving an existing dialytika. 1174 if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && 1175 (upper == '' || upper == '')) { 1176 data |= HAS_DIALYTIKA; 1177 } 1178 int numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. 1179 if ((data & HAS_YPOGEGRAMMENI) != 0) { 1180 numYpogegrammeni = 1; 1181 } 1182 // Skip combining diacritics after this Greek letter. 1183 while (nextIndex < src.length()) { 1184 int diacriticData = getDiacriticData(src.charAt(nextIndex)); 1185 if (diacriticData != 0) { 1186 data |= diacriticData; 1187 if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { 1188 ++numYpogegrammeni; 1189 } 1190 ++nextIndex; 1191 } else { 1192 break; // not a Greek diacritic 1193 } 1194 } 1195 if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { 1196 nextState |= AFTER_VOWEL_WITH_ACCENT; 1197 } 1198 // Map according to Greek rules. 1199 boolean addTonos = false; 1200 if (upper == '' && 1201 (data & HAS_ACCENT) != 0 && 1202 numYpogegrammeni == 0 && 1203 (state & AFTER_CASED) == 0 && 1204 !isFollowedByCasedLetter(src, nextIndex)) { 1205 // Keep disjunctive "or" with (only) a tonos. 1206 // We use the same "word boundary" conditions as for the Final_Sigma test. 1207 if (i == nextIndex) { 1208 upper = ''; // Preserve the precomposed form. 1209 } else { 1210 addTonos = true; 1211 } 1212 } else if ((data & HAS_DIALYTIKA) != 0) { 1213 // Preserve a vowel with dialytika in precomposed form if it exists. 1214 if (upper == '') { 1215 upper = ''; 1216 data &= ~HAS_EITHER_DIALYTIKA; 1217 } else if (upper == '') { 1218 upper = ''; 1219 data &= ~HAS_EITHER_DIALYTIKA; 1220 } 1221 } 1222 1223 boolean change; 1224 if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) { 1225 change = true; // common, simple usage 1226 } else { 1227 // Find out first whether we are changing the text. 1228 change = src.charAt(i) != upper || numYpogegrammeni > 0; 1229 int i2 = i + 1; 1230 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1231 change |= i2 >= nextIndex || src.charAt(i2) != 0x308; 1232 ++i2; 1233 } 1234 if (addTonos) { 1235 change |= i2 >= nextIndex || src.charAt(i2) != 0x301; 1236 ++i2; 1237 } 1238 int oldLength = nextIndex - i; 1239 int newLength = (i2 - i) + numYpogegrammeni; 1240 change |= oldLength != newLength; 1241 if (change) { 1242 if (edits != null) { 1243 edits.addReplace(oldLength, newLength); 1244 } 1245 } else { 1246 if (edits != null) { 1247 edits.addUnchanged(oldLength); 1248 } 1249 // Write unchanged text? 1250 change = (options & OMIT_UNCHANGED_TEXT) == 0; 1251 } 1252 } 1253 1254 if (change) { 1255 dest.append((char)upper); 1256 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1257 dest.append('\u0308'); // restore or add a dialytika 1258 } 1259 if (addTonos) { 1260 dest.append('\u0301'); 1261 } 1262 while (numYpogegrammeni > 0) { 1263 dest.append(''); 1264 --numYpogegrammeni; 1265 } 1266 } 1267 } else { 1268 c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK); 1269 appendResult(c, dest, nextIndex - i, options, edits); 1270 } 1271 i = nextIndex; 1272 state = nextState; 1273 } 1274 return dest; 1275 } 1276 } 1277 } 1278