1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 package android.icu.impl; 5 6 import java.io.IOException; 7 import java.text.CharacterIterator; 8 import java.util.Locale; 9 10 import android.icu.lang.UCharacter; 11 import android.icu.lang.UCharacterCategory; 12 import android.icu.text.BreakIterator; 13 import android.icu.text.Edits; 14 import android.icu.util.ICUUncheckedIOException; 15 import android.icu.util.ULocale; 16 17 /** 18 * @hide Only a subset of ICU is exposed in Android 19 */ 20 public final class CaseMapImpl { 21 /** 22 * Implementation of UCaseProps.ContextIterator, iterates over a String. 23 * See ustrcase.c/utf16_caseContextIterator(). 24 */ 25 public static final class StringContextIterator implements UCaseProps.ContextIterator { 26 /** 27 * Constructor. 28 * @param src String to iterate over. 29 */ 30 public StringContextIterator(CharSequence src) { 31 this.s=src; 32 limit=src.length(); 33 cpStart=cpLimit=index=0; 34 dir=0; 35 } 36 37 /** 38 * Set the iteration limit for nextCaseMapCP() to an index within the string. 39 * If the limit parameter is negative or past the string, then the 40 * string length is restored as the iteration limit. 41 * 42 * <p>This limit does not affect the next() function which always 43 * iterates to the very end of the string. 44 * 45 * @param lim The iteration limit. 46 */ 47 public void setLimit(int lim) { 48 if(0<=lim && lim<=s.length()) { 49 limit=lim; 50 } else { 51 limit=s.length(); 52 } 53 } 54 55 /** 56 * Move to the iteration limit without fetching code points up to there. 57 */ 58 public void moveToLimit() { 59 cpStart=cpLimit=limit; 60 } 61 62 /** 63 * Iterate forward through the string to fetch the next code point 64 * to be case-mapped, and set the context indexes for it. 65 * 66 * <p>When the iteration limit is reached (and -1 is returned), 67 * getCPStart() will be at the iteration limit. 68 * 69 * <p>Iteration with next() does not affect the position for nextCaseMapCP(). 70 * 71 * @return The next code point to be case-mapped, or <0 when the iteration is done. 72 */ 73 public int nextCaseMapCP() { 74 cpStart=cpLimit; 75 if(cpLimit<limit) { 76 int c=Character.codePointAt(s, cpLimit); 77 cpLimit+=Character.charCount(c); 78 return c; 79 } else { 80 return -1; 81 } 82 } 83 84 /** 85 * Returns the start of the code point that was last returned 86 * by nextCaseMapCP(). 87 */ 88 public int getCPStart() { 89 return cpStart; 90 } 91 92 /** 93 * Returns the limit of the code point that was last returned 94 * by nextCaseMapCP(). 95 */ 96 public int getCPLimit() { 97 return cpLimit; 98 } 99 100 public int getCPLength() { 101 return cpLimit-cpStart; 102 } 103 104 // implement UCaseProps.ContextIterator 105 // The following code is not used anywhere in this private class 106 @Override 107 public void reset(int direction) { 108 if(direction>0) { 109 /* reset for forward iteration */ 110 dir=1; 111 index=cpLimit; 112 } else if(direction<0) { 113 /* reset for backward iteration */ 114 dir=-1; 115 index=cpStart; 116 } else { 117 // not a valid direction 118 dir=0; 119 index=0; 120 } 121 } 122 123 @Override 124 public int next() { 125 int c; 126 127 if(dir>0 && index<s.length()) { 128 c=Character.codePointAt(s, index); 129 index+=Character.charCount(c); 130 return c; 131 } else if(dir<0 && index>0) { 132 c=Character.codePointBefore(s, index); 133 index-=Character.charCount(c); 134 return c; 135 } 136 return -1; 137 } 138 139 // variables 140 protected CharSequence s; 141 protected int index, limit, cpStart, cpLimit; 142 protected int dir; // 0=initial state >0=forward <0=backward 143 } 144 145 public static final int TITLECASE_WHOLE_STRING = 0x20; 146 public static final int TITLECASE_SENTENCES = 0x40; 147 148 /** 149 * Bit mask for the titlecasing iterator options bit field. 150 * Currently only 3 out of 8 values are used: 151 * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES. 152 * See stringoptions.h. 153 * @hide draft / provisional / internal are hidden on Android 154 */ 155 private static final int TITLECASE_ITERATOR_MASK = 0xe0; 156 157 public static final int TITLECASE_ADJUST_TO_CASED = 0x400; 158 159 /** 160 * Bit mask for the titlecasing index adjustment options bit set. 161 * Currently two bits are defined: 162 * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED. 163 * See stringoptions.h. 164 * @hide draft / provisional / internal are hidden on Android 165 */ 166 private static final int TITLECASE_ADJUSTMENT_MASK = 0x600; 167 168 public static int addTitleAdjustmentOption(int options, int newOption) { 169 int adjOptions = options & TITLECASE_ADJUSTMENT_MASK; 170 if (adjOptions !=0 && adjOptions != newOption) { 171 throw new IllegalArgumentException("multiple titlecasing index adjustment options"); 172 } 173 return options | newOption; 174 } 175 176 private static final int LNS = 177 (1 << UCharacterCategory.UPPERCASE_LETTER) | 178 (1 << UCharacterCategory.LOWERCASE_LETTER) | 179 (1 << UCharacterCategory.TITLECASE_LETTER) | 180 // Not MODIFIER_LETTER: We count only cased modifier letters. 181 (1 << UCharacterCategory.OTHER_LETTER) | 182 183 (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) | 184 (1 << UCharacterCategory.LETTER_NUMBER) | 185 (1 << UCharacterCategory.OTHER_NUMBER) | 186 187 (1 << UCharacterCategory.MATH_SYMBOL) | 188 (1 << UCharacterCategory.CURRENCY_SYMBOL) | 189 (1 << UCharacterCategory.MODIFIER_SYMBOL) | 190 (1 << UCharacterCategory.OTHER_SYMBOL) | 191 192 (1 << UCharacterCategory.PRIVATE_USE); 193 194 private static boolean isLNS(int c) { 195 // Letter, number, symbol, 196 // or a private use code point because those are typically used as letters or numbers. 197 // Consider modifier letters only if they are cased. 198 int gc = UCharacterProperty.INSTANCE.getType(c); 199 return ((1 << gc) & LNS) != 0 || 200 (gc == UCharacterCategory.MODIFIER_LETTER && 201 UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE); 202 } 203 204 public static int addTitleIteratorOption(int options, int newOption) { 205 int iterOptions = options & TITLECASE_ITERATOR_MASK; 206 if (iterOptions !=0 && iterOptions != newOption) { 207 throw new IllegalArgumentException("multiple titlecasing iterator options"); 208 } 209 return options | newOption; 210 } 211 212 public static BreakIterator getTitleBreakIterator( 213 Locale locale, int options, BreakIterator iter) { 214 options &= TITLECASE_ITERATOR_MASK; 215 if (options != 0 && iter != null) { 216 throw new IllegalArgumentException( 217 "titlecasing iterator option together with an explicit iterator"); 218 } 219 if (iter == null) { 220 switch (options) { 221 case 0: 222 iter = BreakIterator.getWordInstance(locale); 223 break; 224 case TITLECASE_WHOLE_STRING: 225 iter = new WholeStringBreakIterator(); 226 break; 227 case TITLECASE_SENTENCES: 228 iter = BreakIterator.getSentenceInstance(locale); 229 break; 230 default: 231 throw new IllegalArgumentException("unknown titlecasing iterator option"); 232 } 233 } 234 return iter; 235 } 236 237 public static BreakIterator getTitleBreakIterator( 238 ULocale locale, int options, BreakIterator iter) { 239 options &= TITLECASE_ITERATOR_MASK; 240 if (options != 0 && iter != null) { 241 throw new IllegalArgumentException( 242 "titlecasing iterator option together with an explicit iterator"); 243 } 244 if (iter == null) { 245 switch (options) { 246 case 0: 247 iter = BreakIterator.getWordInstance(locale); 248 break; 249 case TITLECASE_WHOLE_STRING: 250 iter = new WholeStringBreakIterator(); 251 break; 252 case TITLECASE_SENTENCES: 253 iter = BreakIterator.getSentenceInstance(locale); 254 break; 255 default: 256 throw new IllegalArgumentException("unknown titlecasing iterator option"); 257 } 258 } 259 return iter; 260 } 261 262 /** 263 * Omit unchanged text when case-mapping with Edits. 264 */ 265 public static final int OMIT_UNCHANGED_TEXT = 0x4000; 266 267 private static final class WholeStringBreakIterator extends BreakIterator { 268 private int length; 269 270 private static void notImplemented() { 271 throw new UnsupportedOperationException("should not occur"); 272 } 273 274 @Override 275 public int first() { 276 return 0; 277 } 278 279 @Override 280 public int last() { 281 notImplemented(); 282 return 0; 283 } 284 285 @Override 286 public int next(int n) { 287 notImplemented(); 288 return 0; 289 } 290 291 @Override 292 public int next() { 293 return length; 294 } 295 296 @Override 297 public int previous() { 298 notImplemented(); 299 return 0; 300 } 301 302 @Override 303 public int following(int offset) { 304 notImplemented(); 305 return 0; 306 } 307 308 @Override 309 public int current() { 310 notImplemented(); 311 return 0; 312 } 313 314 @Override 315 public CharacterIterator getText() { 316 notImplemented(); 317 return null; 318 } 319 320 @Override 321 public void setText(CharacterIterator newText) { 322 length = newText.getEndIndex(); 323 } 324 325 @Override 326 public void setText(CharSequence newText) { 327 length = newText.length(); 328 } 329 330 @Override 331 public void setText(String newText) { 332 length = newText.length(); 333 } 334 } 335 336 private static int appendCodePoint(Appendable a, int c) throws IOException { 337 if (c <= Character.MAX_VALUE) { 338 a.append((char)c); 339 return 1; 340 } else { 341 a.append((char)(0xd7c0 + (c >> 10))); 342 a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff))); 343 return 2; 344 } 345 } 346 347 /** 348 * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. 349 * @throws IOException 350 */ 351 private static void appendResult(int result, Appendable dest, 352 int cpLength, int options, Edits edits) throws IOException { 353 // Decode the result. 354 if (result < 0) { 355 // (not) original code point 356 if (edits != null) { 357 edits.addUnchanged(cpLength); 358 } 359 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 360 return; 361 } 362 appendCodePoint(dest, ~result); 363 } else if (result <= UCaseProps.MAX_STRING_LENGTH) { 364 // The mapping has already been appended to result. 365 if (edits != null) { 366 edits.addReplace(cpLength, result); 367 } 368 } else { 369 // Append the single-code point mapping. 370 int length = appendCodePoint(dest, result); 371 if (edits != null) { 372 edits.addReplace(cpLength, length); 373 } 374 } 375 } 376 377 private static final void appendUnchanged(CharSequence src, int start, int length, 378 Appendable dest, int options, Edits edits) throws IOException { 379 if (length > 0) { 380 if (edits != null) { 381 edits.addUnchanged(length); 382 } 383 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 384 return; 385 } 386 dest.append(src, start, start + length); 387 } 388 } 389 390 private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) { 391 if (!edits.hasChanges()) { 392 return src.toString(); 393 } 394 StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta()); 395 for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { 396 if (ei.hasChange()) { 397 int i = ei.replacementIndex(); 398 result.append(replacementChars, i, i + ei.newLength()); 399 } else { 400 int i = ei.sourceIndex(); 401 result.append(src, i, i + ei.oldLength()); 402 } 403 } 404 return result.toString(); 405 } 406 407 private static void internalToLower(int caseLocale, int options, StringContextIterator iter, 408 Appendable dest, Edits edits) throws IOException { 409 int c; 410 while ((c = iter.nextCaseMapCP()) >= 0) { 411 c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale); 412 appendResult(c, dest, iter.getCPLength(), options, edits); 413 } 414 } 415 416 public static String toLower(int caseLocale, int options, CharSequence src) { 417 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 418 if (src.length() == 0) { 419 return src.toString(); 420 } 421 // Collect and apply only changes. 422 // Good if no or few changes. Bad (slow) if many changes. 423 Edits edits = new Edits(); 424 StringBuilder replacementChars = toLower( 425 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 426 return applyEdits(src, replacementChars, edits); 427 } else { 428 return toLower(caseLocale, options, src, 429 new StringBuilder(src.length()), null).toString(); 430 } 431 } 432 433 public static <A extends Appendable> A toLower(int caseLocale, int options, 434 CharSequence src, A dest, Edits edits) { 435 try { 436 if (edits != null) { 437 edits.reset(); 438 } 439 StringContextIterator iter = new StringContextIterator(src); 440 internalToLower(caseLocale, options, iter, dest, edits); 441 return dest; 442 } catch (IOException e) { 443 throw new ICUUncheckedIOException(e); 444 } 445 } 446 447 public static String toUpper(int caseLocale, int options, CharSequence src) { 448 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 449 if (src.length() == 0) { 450 return src.toString(); 451 } 452 // Collect and apply only changes. 453 // Good if no or few changes. Bad (slow) if many changes. 454 Edits edits = new Edits(); 455 StringBuilder replacementChars = toUpper( 456 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 457 return applyEdits(src, replacementChars, edits); 458 } else { 459 return toUpper(caseLocale, options, src, 460 new StringBuilder(src.length()), null).toString(); 461 } 462 } 463 464 public static <A extends Appendable> A toUpper(int caseLocale, int options, 465 CharSequence src, A dest, Edits edits) { 466 try { 467 if (edits != null) { 468 edits.reset(); 469 } 470 if (caseLocale == UCaseProps.LOC_GREEK) { 471 return GreekUpper.toUpper(options, src, dest, edits); 472 } 473 StringContextIterator iter = new StringContextIterator(src); 474 int c; 475 while ((c = iter.nextCaseMapCP()) >= 0) { 476 c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale); 477 appendResult(c, dest, iter.getCPLength(), options, edits); 478 } 479 return dest; 480 } catch (IOException e) { 481 throw new ICUUncheckedIOException(e); 482 } 483 } 484 485 public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) { 486 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 487 if (src.length() == 0) { 488 return src.toString(); 489 } 490 // Collect and apply only changes. 491 // Good if no or few changes. Bad (slow) if many changes. 492 Edits edits = new Edits(); 493 StringBuilder replacementChars = toTitle( 494 caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src, 495 new StringBuilder(), edits); 496 return applyEdits(src, replacementChars, edits); 497 } else { 498 return toTitle(caseLocale, options, iter, src, 499 new StringBuilder(src.length()), null).toString(); 500 } 501 } 502 503 public static <A extends Appendable> A toTitle( 504 int caseLocale, int options, BreakIterator titleIter, 505 CharSequence src, A dest, Edits edits) { 506 try { 507 if (edits != null) { 508 edits.reset(); 509 } 510 511 /* set up local variables */ 512 StringContextIterator iter = new StringContextIterator(src); 513 int srcLength = src.length(); 514 int prev=0; 515 boolean isFirstIndex=true; 516 517 /* titlecasing loop */ 518 while(prev<srcLength) { 519 /* find next index where to titlecase */ 520 int index; 521 if(isFirstIndex) { 522 isFirstIndex=false; 523 index=titleIter.first(); 524 } else { 525 index=titleIter.next(); 526 } 527 if(index==BreakIterator.DONE || index>srcLength) { 528 index=srcLength; 529 } 530 531 /* 532 * Segment [prev..index[ into 3 parts: 533 * a) skipped characters (copy as-is) [prev..titleStart[ 534 * b) first letter (titlecase) [titleStart..titleLimit[ 535 * c) subsequent characters (lowercase) [titleLimit..index[ 536 */ 537 if(prev<index) { 538 // Find and copy skipped characters [prev..titleStart[ 539 int titleStart=prev; 540 iter.setLimit(index); 541 int c=iter.nextCaseMapCP(); 542 if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) { 543 // Adjust the titlecasing index to the next cased character, 544 // or to the next letter/number/symbol/private use. 545 // Stop with titleStart<titleLimit<=index 546 // if there is a character to be titlecased, 547 // or else stop with titleStart==titleLimit==index. 548 boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0; 549 while ((toCased ? 550 UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) : 551 !CaseMapImpl.isLNS(c)) && 552 (c=iter.nextCaseMapCP())>=0) {} 553 // If c<0 then we have only uncased characters in [prev..index[ 554 // and stopped with titleStart==titleLimit==index. 555 titleStart=iter.getCPStart(); 556 if (prev < titleStart) { 557 appendUnchanged(src, prev, titleStart-prev, dest, options, edits); 558 } 559 } 560 561 if(titleStart<index) { 562 int titleLimit=iter.getCPLimit(); 563 // titlecase c which is from [titleStart..titleLimit[ 564 c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale); 565 appendResult(c, dest, iter.getCPLength(), options, edits); 566 567 // Special case Dutch IJ titlecasing 568 if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) { 569 char c1 = src.charAt(titleStart); 570 if ((c1 == 'i' || c1 == 'I')) { 571 char c2 = src.charAt(titleStart+1); 572 if (c2 == 'j') { 573 dest.append('J'); 574 if (edits != null) { 575 edits.addReplace(1, 1); 576 } 577 c = iter.nextCaseMapCP(); 578 titleLimit++; 579 assert c == c2; 580 assert titleLimit == iter.getCPLimit(); 581 } else if (c2 == 'J') { 582 // Keep the capital J from getting lowercased. 583 appendUnchanged(src, titleStart + 1, 1, dest, options, edits); 584 c = iter.nextCaseMapCP(); 585 titleLimit++; 586 assert c == c2; 587 assert titleLimit == iter.getCPLimit(); 588 } 589 } 590 } 591 592 // lowercase [titleLimit..index[ 593 if(titleLimit<index) { 594 if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) { 595 // Normal operation: Lowercase the rest of the word. 596 internalToLower(caseLocale, options, iter, dest, edits); 597 } else { 598 // Optionally just copy the rest of the word unchanged. 599 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits); 600 iter.moveToLimit(); 601 } 602 } 603 } 604 } 605 606 prev=index; 607 } 608 return dest; 609 } catch (IOException e) { 610 throw new ICUUncheckedIOException(e); 611 } 612 } 613 614 public static String fold(int options, CharSequence src) { 615 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 616 if (src.length() == 0) { 617 return src.toString(); 618 } 619 // Collect and apply only changes. 620 // Good if no or few changes. Bad (slow) if many changes. 621 Edits edits = new Edits(); 622 StringBuilder replacementChars = fold( 623 options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 624 return applyEdits(src, replacementChars, edits); 625 } else { 626 return fold(options, src, new StringBuilder(src.length()), null).toString(); 627 } 628 } 629 630 public static <A extends Appendable> A fold(int options, 631 CharSequence src, A dest, Edits edits) { 632 try { 633 if (edits != null) { 634 edits.reset(); 635 } 636 int length = src.length(); 637 for (int i = 0; i < length;) { 638 int c = Character.codePointAt(src, i); 639 int cpLength = Character.charCount(c); 640 i += cpLength; 641 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options); 642 appendResult(c, dest, cpLength, options, edits); 643 } 644 return dest; 645 } catch (IOException e) { 646 throw new ICUUncheckedIOException(e); 647 } 648 } 649 650 private static final class GreekUpper { 651 // Data bits. 652 private static final int UPPER_MASK = 0x3ff; 653 private static final int HAS_VOWEL = 0x1000; 654 private static final int HAS_YPOGEGRAMMENI = 0x2000; 655 private static final int HAS_ACCENT = 0x4000; 656 private static final int HAS_DIALYTIKA = 0x8000; 657 // Further bits during data building and processing, not stored in the data map. 658 private static final int HAS_COMBINING_DIALYTIKA = 0x10000; 659 private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000; 660 661 private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; 662 private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = 663 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; 664 private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; 665 666 // State bits. 667 private static final int AFTER_CASED = 1; 668 private static final int AFTER_VOWEL_WITH_ACCENT = 2; 669 670 // Data generated by prototype code, see 671 // http://site.icu-project.org/design/case/greek-upper 672 // TODO: Move this data into ucase.icu. 673 private static final char[] data0370 = { 674 // U+0370..03FF 675 0x0370, // 676 0x0370, // 677 0x0372, // 678 0x0372, // 679 0, 680 0, 681 0x0376, // 682 0x0376, // 683 0, 684 0, 685 0x037A, // 686 0x03FD, // 687 0x03FE, // 688 0x03FF, // 689 0, 690 0x037F, // 691 0, 692 0, 693 0, 694 0, 695 0, 696 0, 697 0x0391 | HAS_VOWEL | HAS_ACCENT, // 698 0, 699 0x0395 | HAS_VOWEL | HAS_ACCENT, // 700 0x0397 | HAS_VOWEL | HAS_ACCENT, // 701 0x0399 | HAS_VOWEL | HAS_ACCENT, // 702 0, 703 0x039F | HAS_VOWEL | HAS_ACCENT, // 704 0, 705 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 706 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 707 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 708 0x0391 | HAS_VOWEL, // 709 0x0392, // 710 0x0393, // 711 0x0394, // 712 0x0395 | HAS_VOWEL, // 713 0x0396, // 714 0x0397 | HAS_VOWEL, // 715 0x0398, // 716 0x0399 | HAS_VOWEL, // 717 0x039A, // 718 0x039B, // 719 0x039C, // 720 0x039D, // 721 0x039E, // 722 0x039F | HAS_VOWEL, // 723 0x03A0, // 724 0x03A1, // 725 0, 726 0x03A3, // 727 0x03A4, // 728 0x03A5 | HAS_VOWEL, // 729 0x03A6, // 730 0x03A7, // 731 0x03A8, // 732 0x03A9 | HAS_VOWEL, // 733 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // 734 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // 735 0x0391 | HAS_VOWEL | HAS_ACCENT, // 736 0x0395 | HAS_VOWEL | HAS_ACCENT, // 737 0x0397 | HAS_VOWEL | HAS_ACCENT, // 738 0x0399 | HAS_VOWEL | HAS_ACCENT, // 739 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 740 0x0391 | HAS_VOWEL, // 741 0x0392, // 742 0x0393, // 743 0x0394, // 744 0x0395 | HAS_VOWEL, // 745 0x0396, // 746 0x0397 | HAS_VOWEL, // 747 0x0398, // 748 0x0399 | HAS_VOWEL, // 749 0x039A, // 750 0x039B, // 751 0x039C, // 752 0x039D, // 753 0x039E, // 754 0x039F | HAS_VOWEL, // 755 0x03A0, // 756 0x03A1, // 757 0x03A3, // 758 0x03A3, // 759 0x03A4, // 760 0x03A5 | HAS_VOWEL, // 761 0x03A6, // 762 0x03A7, // 763 0x03A8, // 764 0x03A9 | HAS_VOWEL, // 765 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // 766 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // 767 0x039F | HAS_VOWEL | HAS_ACCENT, // 768 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 769 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 770 0x03CF, // 771 0x0392, // 772 0x0398, // 773 0x03D2, // 774 0x03D2 | HAS_ACCENT, // 775 0x03D2 | HAS_DIALYTIKA, // 776 0x03A6, // 777 0x03A0, // 778 0x03CF, // 779 0x03D8, // 780 0x03D8, // 781 0x03DA, // 782 0x03DA, // 783 0x03DC, // 784 0x03DC, // 785 0x03DE, // 786 0x03DE, // 787 0x03E0, // 788 0x03E0, // 789 0, 790 0, 791 0, 792 0, 793 0, 794 0, 795 0, 796 0, 797 0, 798 0, 799 0, 800 0, 801 0, 802 0, 803 0x039A, // 804 0x03A1, // 805 0x03F9, // 806 0x037F, // 807 0x03F4, // 808 0x0395 | HAS_VOWEL, // 809 0, 810 0x03F7, // 811 0x03F7, // 812 0x03F9, // 813 0x03FA, // 814 0x03FA, // 815 0x03FC, // 816 0x03FD, // 817 0x03FE, // 818 0x03FF, // 819 }; 820 821 private static final char[] data1F00 = { 822 // U+1F00..1FFF 823 0x0391 | HAS_VOWEL, // 824 0x0391 | HAS_VOWEL, // 825 0x0391 | HAS_VOWEL | HAS_ACCENT, // 826 0x0391 | HAS_VOWEL | HAS_ACCENT, // 827 0x0391 | HAS_VOWEL | HAS_ACCENT, // 828 0x0391 | HAS_VOWEL | HAS_ACCENT, // 829 0x0391 | HAS_VOWEL | HAS_ACCENT, // 830 0x0391 | HAS_VOWEL | HAS_ACCENT, // 831 0x0391 | HAS_VOWEL, // 832 0x0391 | HAS_VOWEL, // 833 0x0391 | HAS_VOWEL | HAS_ACCENT, // 834 0x0391 | HAS_VOWEL | HAS_ACCENT, // 835 0x0391 | HAS_VOWEL | HAS_ACCENT, // 836 0x0391 | HAS_VOWEL | HAS_ACCENT, // 837 0x0391 | HAS_VOWEL | HAS_ACCENT, // 838 0x0391 | HAS_VOWEL | HAS_ACCENT, // 839 0x0395 | HAS_VOWEL, // 840 0x0395 | HAS_VOWEL, // 841 0x0395 | HAS_VOWEL | HAS_ACCENT, // 842 0x0395 | HAS_VOWEL | HAS_ACCENT, // 843 0x0395 | HAS_VOWEL | HAS_ACCENT, // 844 0x0395 | HAS_VOWEL | HAS_ACCENT, // 845 0, 846 0, 847 0x0395 | HAS_VOWEL, // 848 0x0395 | HAS_VOWEL, // 849 0x0395 | HAS_VOWEL | HAS_ACCENT, // 850 0x0395 | HAS_VOWEL | HAS_ACCENT, // 851 0x0395 | HAS_VOWEL | HAS_ACCENT, // 852 0x0395 | HAS_VOWEL | HAS_ACCENT, // 853 0, 854 0, 855 0x0397 | HAS_VOWEL, // 856 0x0397 | HAS_VOWEL, // 857 0x0397 | HAS_VOWEL | HAS_ACCENT, // 858 0x0397 | HAS_VOWEL | HAS_ACCENT, // 859 0x0397 | HAS_VOWEL | HAS_ACCENT, // 860 0x0397 | HAS_VOWEL | HAS_ACCENT, // 861 0x0397 | HAS_VOWEL | HAS_ACCENT, // 862 0x0397 | HAS_VOWEL | HAS_ACCENT, // 863 0x0397 | HAS_VOWEL, // 864 0x0397 | HAS_VOWEL, // 865 0x0397 | HAS_VOWEL | HAS_ACCENT, // 866 0x0397 | HAS_VOWEL | HAS_ACCENT, // 867 0x0397 | HAS_VOWEL | HAS_ACCENT, // 868 0x0397 | HAS_VOWEL | HAS_ACCENT, // 869 0x0397 | HAS_VOWEL | HAS_ACCENT, // 870 0x0397 | HAS_VOWEL | HAS_ACCENT, // 871 0x0399 | HAS_VOWEL, // 872 0x0399 | HAS_VOWEL, // 873 0x0399 | HAS_VOWEL | HAS_ACCENT, // 874 0x0399 | HAS_VOWEL | HAS_ACCENT, // 875 0x0399 | HAS_VOWEL | HAS_ACCENT, // 876 0x0399 | HAS_VOWEL | HAS_ACCENT, // 877 0x0399 | HAS_VOWEL | HAS_ACCENT, // 878 0x0399 | HAS_VOWEL | HAS_ACCENT, // 879 0x0399 | HAS_VOWEL, // 880 0x0399 | HAS_VOWEL, // 881 0x0399 | HAS_VOWEL | HAS_ACCENT, // 882 0x0399 | HAS_VOWEL | HAS_ACCENT, // 883 0x0399 | HAS_VOWEL | HAS_ACCENT, // 884 0x0399 | HAS_VOWEL | HAS_ACCENT, // 885 0x0399 | HAS_VOWEL | HAS_ACCENT, // 886 0x0399 | HAS_VOWEL | HAS_ACCENT, // 887 0x039F | HAS_VOWEL, // 888 0x039F | HAS_VOWEL, // 889 0x039F | HAS_VOWEL | HAS_ACCENT, // 890 0x039F | HAS_VOWEL | HAS_ACCENT, // 891 0x039F | HAS_VOWEL | HAS_ACCENT, // 892 0x039F | HAS_VOWEL | HAS_ACCENT, // 893 0, 894 0, 895 0x039F | HAS_VOWEL, // 896 0x039F | HAS_VOWEL, // 897 0x039F | HAS_VOWEL | HAS_ACCENT, // 898 0x039F | HAS_VOWEL | HAS_ACCENT, // 899 0x039F | HAS_VOWEL | HAS_ACCENT, // 900 0x039F | HAS_VOWEL | HAS_ACCENT, // 901 0, 902 0, 903 0x03A5 | HAS_VOWEL, // 904 0x03A5 | HAS_VOWEL, // 905 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 906 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 907 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 908 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 909 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 910 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 911 0, 912 0x03A5 | HAS_VOWEL, // 913 0, 914 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 915 0, 916 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 917 0, 918 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 919 0x03A9 | HAS_VOWEL, // 920 0x03A9 | HAS_VOWEL, // 921 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 922 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 923 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 924 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 925 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 926 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 927 0x03A9 | HAS_VOWEL, // 928 0x03A9 | HAS_VOWEL, // 929 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 930 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 931 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 932 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 933 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 934 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 935 0x0391 | HAS_VOWEL | HAS_ACCENT, // 936 0x0391 | HAS_VOWEL | HAS_ACCENT, // 937 0x0395 | HAS_VOWEL | HAS_ACCENT, // 938 0x0395 | HAS_VOWEL | HAS_ACCENT, // 939 0x0397 | HAS_VOWEL | HAS_ACCENT, // 940 0x0397 | HAS_VOWEL | HAS_ACCENT, // 941 0x0399 | HAS_VOWEL | HAS_ACCENT, // 942 0x0399 | HAS_VOWEL | HAS_ACCENT, // 943 0x039F | HAS_VOWEL | HAS_ACCENT, // 944 0x039F | HAS_VOWEL | HAS_ACCENT, // 945 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 946 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 947 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 948 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 949 0, 950 0, 951 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 952 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 953 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 954 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 955 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 956 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 957 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 958 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 959 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 960 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 961 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 962 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 963 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 964 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 965 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 966 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 967 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 968 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 969 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 970 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 971 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 972 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 973 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 974 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 975 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 976 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 977 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 978 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 979 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 980 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 981 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 982 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 983 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 984 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 985 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 986 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 987 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 988 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 989 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 990 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 991 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 992 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 993 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 994 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 995 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 996 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 997 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 998 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 999 0x0391 | HAS_VOWEL, // 1000 0x0391 | HAS_VOWEL, // 1001 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1002 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1003 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1004 0, 1005 0x0391 | HAS_VOWEL | HAS_ACCENT, // 1006 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1007 0x0391 | HAS_VOWEL, // 1008 0x0391 | HAS_VOWEL, // 1009 0x0391 | HAS_VOWEL | HAS_ACCENT, // 1010 0x0391 | HAS_VOWEL | HAS_ACCENT, // 1011 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1012 0, 1013 0x0399 | HAS_VOWEL, // 1014 0, 1015 0, 1016 0, 1017 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1018 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1019 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1020 0, 1021 0x0397 | HAS_VOWEL | HAS_ACCENT, // 1022 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1023 0x0395 | HAS_VOWEL | HAS_ACCENT, // 1024 0x0395 | HAS_VOWEL | HAS_ACCENT, // 1025 0x0397 | HAS_VOWEL | HAS_ACCENT, // 1026 0x0397 | HAS_VOWEL | HAS_ACCENT, // 1027 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1028 0, 1029 0, 1030 0, 1031 0x0399 | HAS_VOWEL, // 1032 0x0399 | HAS_VOWEL, // 1033 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1034 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1035 0, 1036 0, 1037 0x0399 | HAS_VOWEL | HAS_ACCENT, // 1038 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1039 0x0399 | HAS_VOWEL, // 1040 0x0399 | HAS_VOWEL, // 1041 0x0399 | HAS_VOWEL | HAS_ACCENT, // 1042 0x0399 | HAS_VOWEL | HAS_ACCENT, // 1043 0, 1044 0, 1045 0, 1046 0, 1047 0x03A5 | HAS_VOWEL, // 1048 0x03A5 | HAS_VOWEL, // 1049 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1050 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1051 0x03A1, // 1052 0x03A1, // 1053 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 1054 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // 1055 0x03A5 | HAS_VOWEL, // 1056 0x03A5 | HAS_VOWEL, // 1057 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 1058 0x03A5 | HAS_VOWEL | HAS_ACCENT, // 1059 0x03A1, // 1060 0, 1061 0, 1062 0, 1063 0, 1064 0, 1065 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1066 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1067 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1068 0, 1069 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 1070 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // 1071 0x039F | HAS_VOWEL | HAS_ACCENT, // 1072 0x039F | HAS_VOWEL | HAS_ACCENT, // 1073 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 1074 0x03A9 | HAS_VOWEL | HAS_ACCENT, // 1075 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // 1076 0, 1077 0, 1078 0, 1079 }; 1080 1081 // U+2126 Ohm sign 1082 private static final char data2126 = 0x03A9 | HAS_VOWEL; // 1083 1084 private static final int getLetterData(int c) { 1085 if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) { 1086 return 0; 1087 } else if (c <= 0x3ff) { 1088 return data0370[c - 0x370]; 1089 } else if (c <= 0x1fff) { 1090 return data1F00[c - 0x1f00]; 1091 } else if (c == 0x2126) { 1092 return data2126; 1093 } else { 1094 return 0; 1095 } 1096 } 1097 1098 /** 1099 * Returns a non-zero value for each of the Greek combining diacritics 1100 * listed in The Unicode Standard, version 8, chapter 7.2 Greek, 1101 * plus some perispomeni look-alikes. 1102 */ 1103 private static final int getDiacriticData(int c) { 1104 switch (c) { 1105 case '\u0300': // varia 1106 case '\u0301': // tonos = oxia 1107 case '\u0342': // perispomeni 1108 case '\u0302': // circumflex can look like perispomeni 1109 case '\u0303': // tilde can look like perispomeni 1110 case '\u0311': // inverted breve can look like perispomeni 1111 return HAS_ACCENT; 1112 case '\u0308': // dialytika = diaeresis 1113 return HAS_COMBINING_DIALYTIKA; 1114 case '\u0344': // dialytika tonos 1115 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT; 1116 case '\u0345': // ypogegrammeni = iota subscript 1117 return HAS_YPOGEGRAMMENI; 1118 case '\u0304': // macron 1119 case '\u0306': // breve 1120 case '\u0313': // comma above 1121 case '\u0314': // reversed comma above 1122 case '\u0343': // koronis 1123 return HAS_OTHER_GREEK_DIACRITIC; 1124 default: 1125 return 0; 1126 } 1127 } 1128 1129 private static boolean isFollowedByCasedLetter(CharSequence s, int i) { 1130 while (i < s.length()) { 1131 int c = Character.codePointAt(s, i); 1132 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1133 if ((type & UCaseProps.IGNORABLE) != 0) { 1134 // Case-ignorable, continue with the loop. 1135 i += Character.charCount(c); 1136 } else if (type != UCaseProps.NONE) { 1137 return true; // Followed by cased letter. 1138 } else { 1139 return false; // Uncased and not case-ignorable. 1140 } 1141 } 1142 return false; // Not followed by cased letter. 1143 } 1144 1145 /** 1146 * Greek string uppercasing with a state machine. 1147 * Probably simpler than a stateless function that has to figure out complex context-before 1148 * for each character. 1149 * TODO: Try to re-consolidate one way or another with the non-Greek function. 1150 * 1151 * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8). 1152 * @throws IOException 1153 */ 1154 private static <A extends Appendable> A toUpper(int options, 1155 CharSequence src, A dest, Edits edits) throws IOException { 1156 int state = 0; 1157 for (int i = 0; i < src.length();) { 1158 int c = Character.codePointAt(src, i); 1159 int nextIndex = i + Character.charCount(c); 1160 int nextState = 0; 1161 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1162 if ((type & UCaseProps.IGNORABLE) != 0) { 1163 // c is case-ignorable 1164 nextState |= (state & AFTER_CASED); 1165 } else if (type != UCaseProps.NONE) { 1166 // c is cased 1167 nextState |= AFTER_CASED; 1168 } 1169 int data = getLetterData(c); 1170 if (data > 0) { 1171 int upper = data & UPPER_MASK; 1172 // Add a dialytika to this iota or ypsilon vowel 1173 // if we removed a tonos from the previous vowel, 1174 // and that previous vowel did not also have (or gain) a dialytika. 1175 // Adding one only to the final vowel in a longer sequence 1176 // (which does not occur in normal writing) would require lookahead. 1177 // Set the same flag as for preserving an existing dialytika. 1178 if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && 1179 (upper == '' || upper == '')) { 1180 data |= HAS_DIALYTIKA; 1181 } 1182 int numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. 1183 if ((data & HAS_YPOGEGRAMMENI) != 0) { 1184 numYpogegrammeni = 1; 1185 } 1186 // Skip combining diacritics after this Greek letter. 1187 while (nextIndex < src.length()) { 1188 int diacriticData = getDiacriticData(src.charAt(nextIndex)); 1189 if (diacriticData != 0) { 1190 data |= diacriticData; 1191 if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { 1192 ++numYpogegrammeni; 1193 } 1194 ++nextIndex; 1195 } else { 1196 break; // not a Greek diacritic 1197 } 1198 } 1199 if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { 1200 nextState |= AFTER_VOWEL_WITH_ACCENT; 1201 } 1202 // Map according to Greek rules. 1203 boolean addTonos = false; 1204 if (upper == '' && 1205 (data & HAS_ACCENT) != 0 && 1206 numYpogegrammeni == 0 && 1207 (state & AFTER_CASED) == 0 && 1208 !isFollowedByCasedLetter(src, nextIndex)) { 1209 // Keep disjunctive "or" with (only) a tonos. 1210 // We use the same "word boundary" conditions as for the Final_Sigma test. 1211 if (i == nextIndex) { 1212 upper = ''; // Preserve the precomposed form. 1213 } else { 1214 addTonos = true; 1215 } 1216 } else if ((data & HAS_DIALYTIKA) != 0) { 1217 // Preserve a vowel with dialytika in precomposed form if it exists. 1218 if (upper == '') { 1219 upper = ''; 1220 data &= ~HAS_EITHER_DIALYTIKA; 1221 } else if (upper == '') { 1222 upper = ''; 1223 data &= ~HAS_EITHER_DIALYTIKA; 1224 } 1225 } 1226 1227 boolean change; 1228 if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) { 1229 change = true; // common, simple usage 1230 } else { 1231 // Find out first whether we are changing the text. 1232 change = src.charAt(i) != upper || numYpogegrammeni > 0; 1233 int i2 = i + 1; 1234 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1235 change |= i2 >= nextIndex || src.charAt(i2) != 0x308; 1236 ++i2; 1237 } 1238 if (addTonos) { 1239 change |= i2 >= nextIndex || src.charAt(i2) != 0x301; 1240 ++i2; 1241 } 1242 int oldLength = nextIndex - i; 1243 int newLength = (i2 - i) + numYpogegrammeni; 1244 change |= oldLength != newLength; 1245 if (change) { 1246 if (edits != null) { 1247 edits.addReplace(oldLength, newLength); 1248 } 1249 } else { 1250 if (edits != null) { 1251 edits.addUnchanged(oldLength); 1252 } 1253 // Write unchanged text? 1254 change = (options & OMIT_UNCHANGED_TEXT) == 0; 1255 } 1256 } 1257 1258 if (change) { 1259 dest.append((char)upper); 1260 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1261 dest.append('\u0308'); // restore or add a dialytika 1262 } 1263 if (addTonos) { 1264 dest.append('\u0301'); 1265 } 1266 while (numYpogegrammeni > 0) { 1267 dest.append(''); 1268 --numYpogegrammeni; 1269 } 1270 } 1271 } else { 1272 c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK); 1273 appendResult(c, dest, nextIndex - i, options, edits); 1274 } 1275 i = nextIndex; 1276 state = nextState; 1277 } 1278 return dest; 1279 } 1280 } 1281 } 1282