Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 package com.ibm.icu.impl;
      4 
      5 import java.io.IOException;
      6 import java.text.CharacterIterator;
      7 import java.util.Locale;
      8 
      9 import com.ibm.icu.lang.UCharacter;
     10 import com.ibm.icu.lang.UCharacterCategory;
     11 import com.ibm.icu.text.BreakIterator;
     12 import com.ibm.icu.text.Edits;
     13 import com.ibm.icu.util.ICUUncheckedIOException;
     14 import com.ibm.icu.util.ULocale;
     15 
     16 public final class CaseMapImpl {
     17     /**
     18      * Implementation of UCaseProps.ContextIterator, iterates over a String.
     19      * See ustrcase.c/utf16_caseContextIterator().
     20      */
     21     public static final class StringContextIterator implements UCaseProps.ContextIterator {
     22         /**
     23          * Constructor.
     24          * @param src String to iterate over.
     25          */
     26         public StringContextIterator(CharSequence src) {
     27             this.s=src;
     28             limit=src.length();
     29             cpStart=cpLimit=index=0;
     30             dir=0;
     31         }
     32 
     33         /**
     34          * Set the iteration limit for nextCaseMapCP() to an index within the string.
     35          * If the limit parameter is negative or past the string, then the
     36          * string length is restored as the iteration limit.
     37          *
     38          * <p>This limit does not affect the next() function which always
     39          * iterates to the very end of the string.
     40          *
     41          * @param lim The iteration limit.
     42          */
     43         public void setLimit(int lim) {
     44             if(0<=lim && lim<=s.length()) {
     45                 limit=lim;
     46             } else {
     47                 limit=s.length();
     48             }
     49         }
     50 
     51         /**
     52          * Move to the iteration limit without fetching code points up to there.
     53          */
     54         public void moveToLimit() {
     55             cpStart=cpLimit=limit;
     56         }
     57 
     58         /**
     59          * Iterate forward through the string to fetch the next code point
     60          * to be case-mapped, and set the context indexes for it.
     61          *
     62          * <p>When the iteration limit is reached (and -1 is returned),
     63          * getCPStart() will be at the iteration limit.
     64          *
     65          * <p>Iteration with next() does not affect the position for nextCaseMapCP().
     66          *
     67          * @return The next code point to be case-mapped, or <0 when the iteration is done.
     68          */
     69         public int nextCaseMapCP() {
     70             cpStart=cpLimit;
     71             if(cpLimit<limit) {
     72                 int c=Character.codePointAt(s, cpLimit);
     73                 cpLimit+=Character.charCount(c);
     74                 return c;
     75             } else {
     76                 return -1;
     77             }
     78         }
     79 
     80         /**
     81          * Returns the start of the code point that was last returned
     82          * by nextCaseMapCP().
     83          */
     84         public int getCPStart() {
     85             return cpStart;
     86         }
     87 
     88         /**
     89          * Returns the limit of the code point that was last returned
     90          * by nextCaseMapCP().
     91          */
     92         public int getCPLimit() {
     93             return cpLimit;
     94         }
     95 
     96         public int getCPLength() {
     97             return cpLimit-cpStart;
     98         }
     99 
    100         // implement UCaseProps.ContextIterator
    101         // The following code is not used anywhere in this private class
    102         @Override
    103         public void reset(int direction) {
    104             if(direction>0) {
    105                 /* reset for forward iteration */
    106                 dir=1;
    107                 index=cpLimit;
    108             } else if(direction<0) {
    109                 /* reset for backward iteration */
    110                 dir=-1;
    111                 index=cpStart;
    112             } else {
    113                 // not a valid direction
    114                 dir=0;
    115                 index=0;
    116             }
    117         }
    118 
    119         @Override
    120         public int next() {
    121             int c;
    122 
    123             if(dir>0 && index<s.length()) {
    124                 c=Character.codePointAt(s, index);
    125                 index+=Character.charCount(c);
    126                 return c;
    127             } else if(dir<0 && index>0) {
    128                 c=Character.codePointBefore(s, index);
    129                 index-=Character.charCount(c);
    130                 return c;
    131             }
    132             return -1;
    133         }
    134 
    135         // variables
    136         protected CharSequence s;
    137         protected int index, limit, cpStart, cpLimit;
    138         protected int dir; // 0=initial state  >0=forward  <0=backward
    139     }
    140 
    141     public static final int TITLECASE_WHOLE_STRING = 0x20;
    142     public static final int TITLECASE_SENTENCES = 0x40;
    143 
    144     /**
    145      * Bit mask for the titlecasing iterator options bit field.
    146      * Currently only 3 out of 8 values are used:
    147      * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES.
    148      * See stringoptions.h.
    149      * @internal
    150      */
    151     private static final int TITLECASE_ITERATOR_MASK = 0xe0;
    152 
    153     public static final int TITLECASE_ADJUST_TO_CASED = 0x400;
    154 
    155     /**
    156      * Bit mask for the titlecasing index adjustment options bit set.
    157      * Currently two bits are defined:
    158      * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED.
    159      * See stringoptions.h.
    160      * @internal
    161      */
    162     private static final int TITLECASE_ADJUSTMENT_MASK = 0x600;
    163 
    164     public static int addTitleAdjustmentOption(int options, int newOption) {
    165         int adjOptions = options & TITLECASE_ADJUSTMENT_MASK;
    166         if (adjOptions !=0 && adjOptions != newOption) {
    167             throw new IllegalArgumentException("multiple titlecasing index adjustment options");
    168         }
    169         return options | newOption;
    170     }
    171 
    172     private static final int LNS =
    173             (1 << UCharacterCategory.UPPERCASE_LETTER) |
    174             (1 << UCharacterCategory.LOWERCASE_LETTER) |
    175             (1 << UCharacterCategory.TITLECASE_LETTER) |
    176             // Not MODIFIER_LETTER: We count only cased modifier letters.
    177             (1 << UCharacterCategory.OTHER_LETTER) |
    178 
    179             (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) |
    180             (1 << UCharacterCategory.LETTER_NUMBER) |
    181             (1 << UCharacterCategory.OTHER_NUMBER) |
    182 
    183             (1 << UCharacterCategory.MATH_SYMBOL) |
    184             (1 << UCharacterCategory.CURRENCY_SYMBOL) |
    185             (1 << UCharacterCategory.MODIFIER_SYMBOL) |
    186             (1 << UCharacterCategory.OTHER_SYMBOL) |
    187 
    188             (1 << UCharacterCategory.PRIVATE_USE);
    189 
    190     private static boolean isLNS(int c) {
    191         // Letter, number, symbol,
    192         // or a private use code point because those are typically used as letters or numbers.
    193         // Consider modifier letters only if they are cased.
    194         int gc = UCharacterProperty.INSTANCE.getType(c);
    195         return ((1 << gc) & LNS) != 0 ||
    196                 (gc == UCharacterCategory.MODIFIER_LETTER &&
    197                     UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE);
    198     }
    199 
    200     public static int addTitleIteratorOption(int options, int newOption) {
    201         int iterOptions = options & TITLECASE_ITERATOR_MASK;
    202         if (iterOptions !=0 && iterOptions != newOption) {
    203             throw new IllegalArgumentException("multiple titlecasing iterator options");
    204         }
    205         return options | newOption;
    206     }
    207 
    208     public static BreakIterator getTitleBreakIterator(
    209             Locale locale, int options, BreakIterator iter) {
    210         options &= TITLECASE_ITERATOR_MASK;
    211         if (options != 0 && iter != null) {
    212             throw new IllegalArgumentException(
    213                     "titlecasing iterator option together with an explicit iterator");
    214         }
    215         if (iter == null) {
    216             switch (options) {
    217             case 0:
    218                 iter = BreakIterator.getWordInstance(locale);
    219                 break;
    220             case TITLECASE_WHOLE_STRING:
    221                 iter = new WholeStringBreakIterator();
    222                 break;
    223             case TITLECASE_SENTENCES:
    224                 iter = BreakIterator.getSentenceInstance(locale);
    225                 break;
    226             default:
    227                 throw new IllegalArgumentException("unknown titlecasing iterator option");
    228             }
    229         }
    230         return iter;
    231     }
    232 
    233     public static BreakIterator getTitleBreakIterator(
    234             ULocale locale, int options, BreakIterator iter) {
    235         options &= TITLECASE_ITERATOR_MASK;
    236         if (options != 0 && iter != null) {
    237             throw new IllegalArgumentException(
    238                     "titlecasing iterator option together with an explicit iterator");
    239         }
    240         if (iter == null) {
    241             switch (options) {
    242             case 0:
    243                 iter = BreakIterator.getWordInstance(locale);
    244                 break;
    245             case TITLECASE_WHOLE_STRING:
    246                 iter = new WholeStringBreakIterator();
    247                 break;
    248             case TITLECASE_SENTENCES:
    249                 iter = BreakIterator.getSentenceInstance(locale);
    250                 break;
    251             default:
    252                 throw new IllegalArgumentException("unknown titlecasing iterator option");
    253             }
    254         }
    255         return iter;
    256     }
    257 
    258     /**
    259      * Omit unchanged text when case-mapping with Edits.
    260      */
    261     public static final int OMIT_UNCHANGED_TEXT = 0x4000;
    262 
    263     private static final class WholeStringBreakIterator extends BreakIterator {
    264         private int length;
    265 
    266         private static void notImplemented() {
    267             throw new UnsupportedOperationException("should not occur");
    268         }
    269 
    270         @Override
    271         public int first() {
    272             return 0;
    273         }
    274 
    275         @Override
    276         public int last() {
    277             notImplemented();
    278             return 0;
    279         }
    280 
    281         @Override
    282         public int next(int n) {
    283             notImplemented();
    284             return 0;
    285         }
    286 
    287         @Override
    288         public int next() {
    289             return length;
    290         }
    291 
    292         @Override
    293         public int previous() {
    294             notImplemented();
    295             return 0;
    296         }
    297 
    298         @Override
    299         public int following(int offset) {
    300             notImplemented();
    301             return 0;
    302         }
    303 
    304         @Override
    305         public int current() {
    306             notImplemented();
    307             return 0;
    308         }
    309 
    310         @Override
    311         public CharacterIterator getText() {
    312             notImplemented();
    313             return null;
    314         }
    315 
    316         @Override
    317         public void setText(CharacterIterator newText) {
    318             length = newText.getEndIndex();
    319         }
    320 
    321         @Override
    322         public void setText(CharSequence newText) {
    323             length = newText.length();
    324         }
    325 
    326         @Override
    327         public void setText(String newText) {
    328             length = newText.length();
    329         }
    330     }
    331 
    332     private static int appendCodePoint(Appendable a, int c) throws IOException {
    333         if (c <= Character.MAX_VALUE) {
    334             a.append((char)c);
    335             return 1;
    336         } else {
    337             a.append((char)(0xd7c0 + (c >> 10)));
    338             a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
    339             return 2;
    340         }
    341     }
    342 
    343     /**
    344      * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
    345      * @throws IOException
    346      */
    347     private static void appendResult(int result, Appendable dest,
    348             int cpLength, int options, Edits edits) throws IOException {
    349         // Decode the result.
    350         if (result < 0) {
    351             // (not) original code point
    352             if (edits != null) {
    353                 edits.addUnchanged(cpLength);
    354             }
    355             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
    356                 return;
    357             }
    358             appendCodePoint(dest, ~result);
    359         } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
    360             // The mapping has already been appended to result.
    361             if (edits != null) {
    362                 edits.addReplace(cpLength, result);
    363             }
    364         } else {
    365             // Append the single-code point mapping.
    366             int length = appendCodePoint(dest, result);
    367             if (edits != null) {
    368                 edits.addReplace(cpLength, length);
    369             }
    370         }
    371     }
    372 
    373     private static final void appendUnchanged(CharSequence src, int start, int length,
    374             Appendable dest, int options, Edits edits) throws IOException {
    375         if (length > 0) {
    376             if (edits != null) {
    377                 edits.addUnchanged(length);
    378             }
    379             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
    380                 return;
    381             }
    382             dest.append(src, start, start + length);
    383         }
    384     }
    385 
    386     private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) {
    387         if (!edits.hasChanges()) {
    388             return src.toString();
    389         }
    390         StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta());
    391         for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
    392             if (ei.hasChange()) {
    393                 int i = ei.replacementIndex();
    394                 result.append(replacementChars, i, i + ei.newLength());
    395             } else {
    396                 int i = ei.sourceIndex();
    397                 result.append(src, i, i + ei.oldLength());
    398             }
    399         }
    400         return result.toString();
    401     }
    402 
    403     private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
    404             Appendable dest, Edits edits) throws IOException {
    405         int c;
    406         while ((c = iter.nextCaseMapCP()) >= 0) {
    407             c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
    408             appendResult(c, dest, iter.getCPLength(), options, edits);
    409         }
    410     }
    411 
    412     public static String toLower(int caseLocale, int options, CharSequence src) {
    413         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    414             if (src.length() == 0) {
    415                 return src.toString();
    416             }
    417             // Collect and apply only changes.
    418             // Good if no or few changes. Bad (slow) if many changes.
    419             Edits edits = new Edits();
    420             StringBuilder replacementChars = toLower(
    421                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
    422             return applyEdits(src, replacementChars, edits);
    423         } else {
    424             return toLower(caseLocale, options, src,
    425                     new StringBuilder(src.length()), null).toString();
    426         }
    427     }
    428 
    429     public static <A extends Appendable> A toLower(int caseLocale, int options,
    430             CharSequence src, A dest, Edits edits) {
    431         try {
    432             if (edits != null) {
    433                 edits.reset();
    434             }
    435             StringContextIterator iter = new StringContextIterator(src);
    436             internalToLower(caseLocale, options, iter, dest, edits);
    437             return dest;
    438         } catch (IOException e) {
    439             throw new ICUUncheckedIOException(e);
    440         }
    441     }
    442 
    443     public static String toUpper(int caseLocale, int options, CharSequence src) {
    444         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    445             if (src.length() == 0) {
    446                 return src.toString();
    447             }
    448             // Collect and apply only changes.
    449             // Good if no or few changes. Bad (slow) if many changes.
    450             Edits edits = new Edits();
    451             StringBuilder replacementChars = toUpper(
    452                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
    453             return applyEdits(src, replacementChars, edits);
    454         } else {
    455             return toUpper(caseLocale, options, src,
    456                     new StringBuilder(src.length()), null).toString();
    457         }
    458     }
    459 
    460     public static <A extends Appendable> A toUpper(int caseLocale, int options,
    461             CharSequence src, A dest, Edits edits) {
    462         try {
    463             if (edits != null) {
    464                 edits.reset();
    465             }
    466             if (caseLocale == UCaseProps.LOC_GREEK) {
    467                 return GreekUpper.toUpper(options, src, dest, edits);
    468             }
    469             StringContextIterator iter = new StringContextIterator(src);
    470             int c;
    471             while ((c = iter.nextCaseMapCP()) >= 0) {
    472                 c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale);
    473                 appendResult(c, dest, iter.getCPLength(), options, edits);
    474             }
    475             return dest;
    476         } catch (IOException e) {
    477             throw new ICUUncheckedIOException(e);
    478         }
    479     }
    480 
    481     public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) {
    482         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    483             if (src.length() == 0) {
    484                 return src.toString();
    485             }
    486             // Collect and apply only changes.
    487             // Good if no or few changes. Bad (slow) if many changes.
    488             Edits edits = new Edits();
    489             StringBuilder replacementChars = toTitle(
    490                     caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src,
    491                     new StringBuilder(), edits);
    492             return applyEdits(src, replacementChars, edits);
    493         } else {
    494             return toTitle(caseLocale, options, iter, src,
    495                     new StringBuilder(src.length()), null).toString();
    496         }
    497     }
    498 
    499     public static <A extends Appendable> A toTitle(
    500             int caseLocale, int options, BreakIterator titleIter,
    501             CharSequence src, A dest, Edits edits) {
    502         try {
    503             if (edits != null) {
    504                 edits.reset();
    505             }
    506 
    507             /* set up local variables */
    508             StringContextIterator iter = new StringContextIterator(src);
    509             int srcLength = src.length();
    510             int prev=0;
    511             boolean isFirstIndex=true;
    512 
    513             /* titlecasing loop */
    514             while(prev<srcLength) {
    515                 /* find next index where to titlecase */
    516                 int index;
    517                 if(isFirstIndex) {
    518                     isFirstIndex=false;
    519                     index=titleIter.first();
    520                 } else {
    521                     index=titleIter.next();
    522                 }
    523                 if(index==BreakIterator.DONE || index>srcLength) {
    524                     index=srcLength;
    525                 }
    526 
    527                 /*
    528                  * Segment [prev..index[ into 3 parts:
    529                  * a) skipped characters (copy as-is) [prev..titleStart[
    530                  * b) first letter (titlecase)              [titleStart..titleLimit[
    531                  * c) subsequent characters (lowercase)                 [titleLimit..index[
    532                  */
    533                 if(prev<index) {
    534                     // Find and copy skipped characters [prev..titleStart[
    535                     int titleStart=prev;
    536                     iter.setLimit(index);
    537                     int c=iter.nextCaseMapCP();
    538                     if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
    539                         // Adjust the titlecasing index to the next cased character,
    540                         // or to the next letter/number/symbol/private use.
    541                         // Stop with titleStart<titleLimit<=index
    542                         // if there is a character to be titlecased,
    543                         // or else stop with titleStart==titleLimit==index.
    544                         boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0;
    545                         while ((toCased ?
    546                                     UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) :
    547                                         !CaseMapImpl.isLNS(c)) &&
    548                                 (c=iter.nextCaseMapCP())>=0) {}
    549                         // If c<0 then we have only uncased characters in [prev..index[
    550                         // and stopped with titleStart==titleLimit==index.
    551                         titleStart=iter.getCPStart();
    552                         if (prev < titleStart) {
    553                             appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
    554                         }
    555                     }
    556 
    557                     if(titleStart<index) {
    558                         int titleLimit=iter.getCPLimit();
    559                         // titlecase c which is from [titleStart..titleLimit[
    560                         c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
    561                         appendResult(c, dest, iter.getCPLength(), options, edits);
    562 
    563                         // Special case Dutch IJ titlecasing
    564                         if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
    565                             char c1 = src.charAt(titleStart);
    566                             if ((c1 == 'i' || c1 == 'I')) {
    567                                 char c2 = src.charAt(titleStart+1);
    568                                 if (c2 == 'j') {
    569                                     dest.append('J');
    570                                     if (edits != null) {
    571                                         edits.addReplace(1, 1);
    572                                     }
    573                                     c = iter.nextCaseMapCP();
    574                                     titleLimit++;
    575                                     assert c == c2;
    576                                     assert titleLimit == iter.getCPLimit();
    577                                 } else if (c2 == 'J') {
    578                                     // Keep the capital J from getting lowercased.
    579                                     appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
    580                                     c = iter.nextCaseMapCP();
    581                                     titleLimit++;
    582                                     assert c == c2;
    583                                     assert titleLimit == iter.getCPLimit();
    584                                 }
    585                             }
    586                         }
    587 
    588                         // lowercase [titleLimit..index[
    589                         if(titleLimit<index) {
    590                             if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
    591                                 // Normal operation: Lowercase the rest of the word.
    592                                 internalToLower(caseLocale, options, iter, dest, edits);
    593                             } else {
    594                                 // Optionally just copy the rest of the word unchanged.
    595                                 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
    596                                 iter.moveToLimit();
    597                             }
    598                         }
    599                     }
    600                 }
    601 
    602                 prev=index;
    603             }
    604             return dest;
    605         } catch (IOException e) {
    606             throw new ICUUncheckedIOException(e);
    607         }
    608     }
    609 
    610     public static String fold(int options, CharSequence src) {
    611         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    612             if (src.length() == 0) {
    613                 return src.toString();
    614             }
    615             // Collect and apply only changes.
    616             // Good if no or few changes. Bad (slow) if many changes.
    617             Edits edits = new Edits();
    618             StringBuilder replacementChars = fold(
    619                     options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
    620             return applyEdits(src, replacementChars, edits);
    621         } else {
    622             return fold(options, src, new StringBuilder(src.length()), null).toString();
    623         }
    624     }
    625 
    626     public static <A extends Appendable> A fold(int options,
    627             CharSequence src, A dest, Edits edits) {
    628         try {
    629             if (edits != null) {
    630                 edits.reset();
    631             }
    632             int length = src.length();
    633             for (int i = 0; i < length;) {
    634                 int c = Character.codePointAt(src, i);
    635                 int cpLength = Character.charCount(c);
    636                 i += cpLength;
    637                 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options);
    638                 appendResult(c, dest, cpLength, options, edits);
    639             }
    640             return dest;
    641         } catch (IOException e) {
    642             throw new ICUUncheckedIOException(e);
    643         }
    644     }
    645 
    646     private static final class GreekUpper {
    647         // Data bits.
    648         private static final int UPPER_MASK = 0x3ff;
    649         private static final int HAS_VOWEL = 0x1000;
    650         private static final int HAS_YPOGEGRAMMENI = 0x2000;
    651         private static final int HAS_ACCENT = 0x4000;
    652         private static final int HAS_DIALYTIKA = 0x8000;
    653         // Further bits during data building and processing, not stored in the data map.
    654         private static final int HAS_COMBINING_DIALYTIKA = 0x10000;
    655         private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000;
    656 
    657         private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
    658         private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
    659                 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
    660         private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
    661 
    662         // State bits.
    663         private static final int AFTER_CASED = 1;
    664         private static final int AFTER_VOWEL_WITH_ACCENT = 2;
    665 
    666         // Data generated by prototype code, see
    667         // http://site.icu-project.org/design/case/greek-upper
    668         // TODO: Move this data into ucase.icu.
    669         private static final char[] data0370 = {
    670             // U+0370..03FF
    671             0x0370,  // 
    672             0x0370,  // 
    673             0x0372,  // 
    674             0x0372,  // 
    675             0,
    676             0,
    677             0x0376,  // 
    678             0x0376,  // 
    679             0,
    680             0,
    681             0x037A,  // 
    682             0x03FD,  // 
    683             0x03FE,  // 
    684             0x03FF,  // 
    685             0,
    686             0x037F,  // 
    687             0,
    688             0,
    689             0,
    690             0,
    691             0,
    692             0,
    693             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    694             0,
    695             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    696             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    697             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    698             0,
    699             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    700             0,
    701             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    702             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    703             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
    704             0x0391 | HAS_VOWEL,  // 
    705             0x0392,  // 
    706             0x0393,  // 
    707             0x0394,  // 
    708             0x0395 | HAS_VOWEL,  // 
    709             0x0396,  // 
    710             0x0397 | HAS_VOWEL,  // 
    711             0x0398,  // 
    712             0x0399 | HAS_VOWEL,  // 
    713             0x039A,  // 
    714             0x039B,  // 
    715             0x039C,  // 
    716             0x039D,  // 
    717             0x039E,  // 
    718             0x039F | HAS_VOWEL,  // 
    719             0x03A0,  // 
    720             0x03A1,  // 
    721             0,
    722             0x03A3,  // 
    723             0x03A4,  // 
    724             0x03A5 | HAS_VOWEL,  // 
    725             0x03A6,  // 
    726             0x03A7,  // 
    727             0x03A8,  // 
    728             0x03A9 | HAS_VOWEL,  // 
    729             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    730             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    731             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    732             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    733             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    734             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    735             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
    736             0x0391 | HAS_VOWEL,  // 
    737             0x0392,  // 
    738             0x0393,  // 
    739             0x0394,  // 
    740             0x0395 | HAS_VOWEL,  // 
    741             0x0396,  // 
    742             0x0397 | HAS_VOWEL,  // 
    743             0x0398,  // 
    744             0x0399 | HAS_VOWEL,  // 
    745             0x039A,  // 
    746             0x039B,  // 
    747             0x039C,  // 
    748             0x039D,  // 
    749             0x039E,  // 
    750             0x039F | HAS_VOWEL,  // 
    751             0x03A0,  // 
    752             0x03A1,  // 
    753             0x03A3,  // 
    754             0x03A3,  // 
    755             0x03A4,  // 
    756             0x03A5 | HAS_VOWEL,  // 
    757             0x03A6,  // 
    758             0x03A7,  // 
    759             0x03A8,  // 
    760             0x03A9 | HAS_VOWEL,  // 
    761             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    762             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    763             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    764             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    765             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    766             0x03CF,  // 
    767             0x0392,  // 
    768             0x0398,  // 
    769             0x03D2,  // 
    770             0x03D2 | HAS_ACCENT,  // 
    771             0x03D2 | HAS_DIALYTIKA,  // 
    772             0x03A6,  // 
    773             0x03A0,  // 
    774             0x03CF,  // 
    775             0x03D8,  // 
    776             0x03D8,  // 
    777             0x03DA,  // 
    778             0x03DA,  // 
    779             0x03DC,  // 
    780             0x03DC,  // 
    781             0x03DE,  // 
    782             0x03DE,  // 
    783             0x03E0,  // 
    784             0x03E0,  // 
    785             0,
    786             0,
    787             0,
    788             0,
    789             0,
    790             0,
    791             0,
    792             0,
    793             0,
    794             0,
    795             0,
    796             0,
    797             0,
    798             0,
    799             0x039A,  // 
    800             0x03A1,  // 
    801             0x03F9,  // 
    802             0x037F,  // 
    803             0x03F4,  // 
    804             0x0395 | HAS_VOWEL,  // 
    805             0,
    806             0x03F7,  // 
    807             0x03F7,  // 
    808             0x03F9,  // 
    809             0x03FA,  // 
    810             0x03FA,  // 
    811             0x03FC,  // 
    812             0x03FD,  // 
    813             0x03FE,  // 
    814             0x03FF,  // 
    815         };
    816 
    817         private static final char[] data1F00 = {
    818             // U+1F00..1FFF
    819             0x0391 | HAS_VOWEL,  // 
    820             0x0391 | HAS_VOWEL,  // 
    821             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    822             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    823             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    824             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    825             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    826             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    827             0x0391 | HAS_VOWEL,  // 
    828             0x0391 | HAS_VOWEL,  // 
    829             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    830             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    831             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    832             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    833             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    834             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    835             0x0395 | HAS_VOWEL,  // 
    836             0x0395 | HAS_VOWEL,  // 
    837             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    838             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    839             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    840             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    841             0,
    842             0,
    843             0x0395 | HAS_VOWEL,  // 
    844             0x0395 | HAS_VOWEL,  // 
    845             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    846             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    847             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    848             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    849             0,
    850             0,
    851             0x0397 | HAS_VOWEL,  // 
    852             0x0397 | HAS_VOWEL,  // 
    853             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    854             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    855             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    856             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    857             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    858             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    859             0x0397 | HAS_VOWEL,  // 
    860             0x0397 | HAS_VOWEL,  // 
    861             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    862             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    863             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    864             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    865             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    866             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    867             0x0399 | HAS_VOWEL,  // 
    868             0x0399 | HAS_VOWEL,  // 
    869             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    870             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    871             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    872             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    873             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    874             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    875             0x0399 | HAS_VOWEL,  // 
    876             0x0399 | HAS_VOWEL,  // 
    877             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    878             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    879             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    880             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    881             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    882             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    883             0x039F | HAS_VOWEL,  // 
    884             0x039F | HAS_VOWEL,  // 
    885             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    886             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    887             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    888             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    889             0,
    890             0,
    891             0x039F | HAS_VOWEL,  // 
    892             0x039F | HAS_VOWEL,  // 
    893             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    894             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    895             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    896             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    897             0,
    898             0,
    899             0x03A5 | HAS_VOWEL,  // 
    900             0x03A5 | HAS_VOWEL,  // 
    901             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    902             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    903             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    904             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    905             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    906             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    907             0,
    908             0x03A5 | HAS_VOWEL,  // 
    909             0,
    910             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    911             0,
    912             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    913             0,
    914             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    915             0x03A9 | HAS_VOWEL,  // 
    916             0x03A9 | HAS_VOWEL,  // 
    917             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    918             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    919             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    920             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    921             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    922             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    923             0x03A9 | HAS_VOWEL,  // 
    924             0x03A9 | HAS_VOWEL,  // 
    925             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    926             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    927             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    928             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    929             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    930             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    931             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    932             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    933             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    934             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    935             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    936             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    937             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    938             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    939             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    940             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    941             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    942             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    943             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    944             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    945             0,
    946             0,
    947             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    948             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    949             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    950             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    951             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    952             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    953             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    954             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    955             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    956             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    957             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    958             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    959             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    960             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    961             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    962             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    963             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    964             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    965             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    966             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    967             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    968             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    969             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    970             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    971             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    972             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    973             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    974             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    975             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    976             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    977             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    978             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    979             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    980             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    981             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    982             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    983             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    984             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    985             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    986             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    987             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    988             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    989             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    990             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    991             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    992             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    993             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    994             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    995             0x0391 | HAS_VOWEL,  // 
    996             0x0391 | HAS_VOWEL,  // 
    997             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    998             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    999             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1000             0,
   1001             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
   1002             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1003             0x0391 | HAS_VOWEL,  // 
   1004             0x0391 | HAS_VOWEL,  // 
   1005             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
   1006             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
   1007             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1008             0,
   1009             0x0399 | HAS_VOWEL,  // 
   1010             0,
   1011             0,
   1012             0,
   1013             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1014             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1015             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1016             0,
   1017             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
   1018             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1019             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
   1020             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
   1021             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
   1022             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
   1023             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1024             0,
   1025             0,
   1026             0,
   1027             0x0399 | HAS_VOWEL,  // 
   1028             0x0399 | HAS_VOWEL,  // 
   1029             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1030             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1031             0,
   1032             0,
   1033             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
   1034             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1035             0x0399 | HAS_VOWEL,  // 
   1036             0x0399 | HAS_VOWEL,  // 
   1037             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
   1038             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
   1039             0,
   1040             0,
   1041             0,
   1042             0,
   1043             0x03A5 | HAS_VOWEL,  // 
   1044             0x03A5 | HAS_VOWEL,  // 
   1045             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1046             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1047             0x03A1,  // 
   1048             0x03A1,  // 
   1049             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
   1050             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1051             0x03A5 | HAS_VOWEL,  // 
   1052             0x03A5 | HAS_VOWEL,  // 
   1053             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
   1054             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
   1055             0x03A1,  // 
   1056             0,
   1057             0,
   1058             0,
   1059             0,
   1060             0,
   1061             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1062             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1063             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1064             0,
   1065             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
   1066             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1067             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
   1068             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
   1069             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
   1070             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
   1071             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1072             0,
   1073             0,
   1074             0,
   1075         };
   1076 
   1077         // U+2126 Ohm sign
   1078         private static final char data2126 = 0x03A9 | HAS_VOWEL;  // 
   1079 
   1080         private static final int getLetterData(int c) {
   1081             if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
   1082                 return 0;
   1083             } else if (c <= 0x3ff) {
   1084                 return data0370[c - 0x370];
   1085             } else if (c <= 0x1fff) {
   1086                 return data1F00[c - 0x1f00];
   1087             } else if (c == 0x2126) {
   1088                 return data2126;
   1089             } else {
   1090                 return 0;
   1091             }
   1092         }
   1093 
   1094         /**
   1095          * Returns a non-zero value for each of the Greek combining diacritics
   1096          * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
   1097          * plus some perispomeni look-alikes.
   1098          */
   1099         private static final int getDiacriticData(int c) {
   1100             switch (c) {
   1101             case '\u0300':  // varia
   1102             case '\u0301':  // tonos = oxia
   1103             case '\u0342':  // perispomeni
   1104             case '\u0302':  // circumflex can look like perispomeni
   1105             case '\u0303':  // tilde can look like perispomeni
   1106             case '\u0311':  // inverted breve can look like perispomeni
   1107                 return HAS_ACCENT;
   1108             case '\u0308':  // dialytika = diaeresis
   1109                 return HAS_COMBINING_DIALYTIKA;
   1110             case '\u0344':  // dialytika tonos
   1111                 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
   1112             case '\u0345':  // ypogegrammeni = iota subscript
   1113                 return HAS_YPOGEGRAMMENI;
   1114             case '\u0304':  // macron
   1115             case '\u0306':  // breve
   1116             case '\u0313':  // comma above
   1117             case '\u0314':  // reversed comma above
   1118             case '\u0343':  // koronis
   1119                 return HAS_OTHER_GREEK_DIACRITIC;
   1120             default:
   1121                 return 0;
   1122             }
   1123         }
   1124 
   1125         private static boolean isFollowedByCasedLetter(CharSequence s, int i) {
   1126             while (i < s.length()) {
   1127                 int c = Character.codePointAt(s, i);
   1128                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
   1129                 if ((type & UCaseProps.IGNORABLE) != 0) {
   1130                     // Case-ignorable, continue with the loop.
   1131                     i += Character.charCount(c);
   1132                 } else if (type != UCaseProps.NONE) {
   1133                     return true;  // Followed by cased letter.
   1134                 } else {
   1135                     return false;  // Uncased and not case-ignorable.
   1136                 }
   1137             }
   1138             return false;  // Not followed by cased letter.
   1139         }
   1140 
   1141         /**
   1142          * Greek string uppercasing with a state machine.
   1143          * Probably simpler than a stateless function that has to figure out complex context-before
   1144          * for each character.
   1145          * TODO: Try to re-consolidate one way or another with the non-Greek function.
   1146          *
   1147          * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
   1148          * @throws IOException
   1149          */
   1150         private static <A extends Appendable> A toUpper(int options,
   1151                 CharSequence src, A dest, Edits edits) throws IOException {
   1152             int state = 0;
   1153             for (int i = 0; i < src.length();) {
   1154                 int c = Character.codePointAt(src, i);
   1155                 int nextIndex = i + Character.charCount(c);
   1156                 int nextState = 0;
   1157                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
   1158                 if ((type & UCaseProps.IGNORABLE) != 0) {
   1159                     // c is case-ignorable
   1160                     nextState |= (state & AFTER_CASED);
   1161                 } else if (type != UCaseProps.NONE) {
   1162                     // c is cased
   1163                     nextState |= AFTER_CASED;
   1164                 }
   1165                 int data = getLetterData(c);
   1166                 if (data > 0) {
   1167                     int upper = data & UPPER_MASK;
   1168                     // Add a dialytika to this iota or ypsilon vowel
   1169                     // if we removed a tonos from the previous vowel,
   1170                     // and that previous vowel did not also have (or gain) a dialytika.
   1171                     // Adding one only to the final vowel in a longer sequence
   1172                     // (which does not occur in normal writing) would require lookahead.
   1173                     // Set the same flag as for preserving an existing dialytika.
   1174                     if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
   1175                             (upper == '' || upper == '')) {
   1176                         data |= HAS_DIALYTIKA;
   1177                     }
   1178                     int numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
   1179                     if ((data & HAS_YPOGEGRAMMENI) != 0) {
   1180                         numYpogegrammeni = 1;
   1181                     }
   1182                     // Skip combining diacritics after this Greek letter.
   1183                     while (nextIndex < src.length()) {
   1184                         int diacriticData = getDiacriticData(src.charAt(nextIndex));
   1185                         if (diacriticData != 0) {
   1186                             data |= diacriticData;
   1187                             if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
   1188                                 ++numYpogegrammeni;
   1189                             }
   1190                             ++nextIndex;
   1191                         } else {
   1192                             break;  // not a Greek diacritic
   1193                         }
   1194                     }
   1195                     if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
   1196                         nextState |= AFTER_VOWEL_WITH_ACCENT;
   1197                     }
   1198                     // Map according to Greek rules.
   1199                     boolean addTonos = false;
   1200                     if (upper == '' &&
   1201                             (data & HAS_ACCENT) != 0 &&
   1202                             numYpogegrammeni == 0 &&
   1203                             (state & AFTER_CASED) == 0 &&
   1204                             !isFollowedByCasedLetter(src, nextIndex)) {
   1205                         // Keep disjunctive "or" with (only) a tonos.
   1206                         // We use the same "word boundary" conditions as for the Final_Sigma test.
   1207                         if (i == nextIndex) {
   1208                             upper = '';  // Preserve the precomposed form.
   1209                         } else {
   1210                             addTonos = true;
   1211                         }
   1212                     } else if ((data & HAS_DIALYTIKA) != 0) {
   1213                         // Preserve a vowel with dialytika in precomposed form if it exists.
   1214                         if (upper == '') {
   1215                             upper = '';
   1216                             data &= ~HAS_EITHER_DIALYTIKA;
   1217                         } else if (upper == '') {
   1218                             upper = '';
   1219                             data &= ~HAS_EITHER_DIALYTIKA;
   1220                         }
   1221                     }
   1222 
   1223                     boolean change;
   1224                     if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) {
   1225                         change = true;  // common, simple usage
   1226                     } else {
   1227                         // Find out first whether we are changing the text.
   1228                         change = src.charAt(i) != upper || numYpogegrammeni > 0;
   1229                         int i2 = i + 1;
   1230                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
   1231                             change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
   1232                             ++i2;
   1233                         }
   1234                         if (addTonos) {
   1235                             change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
   1236                             ++i2;
   1237                         }
   1238                         int oldLength = nextIndex - i;
   1239                         int newLength = (i2 - i) + numYpogegrammeni;
   1240                         change |= oldLength != newLength;
   1241                         if (change) {
   1242                             if (edits != null) {
   1243                                 edits.addReplace(oldLength, newLength);
   1244                             }
   1245                         } else {
   1246                             if (edits != null) {
   1247                                 edits.addUnchanged(oldLength);
   1248                             }
   1249                             // Write unchanged text?
   1250                             change = (options & OMIT_UNCHANGED_TEXT) == 0;
   1251                         }
   1252                     }
   1253 
   1254                     if (change) {
   1255                         dest.append((char)upper);
   1256                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
   1257                             dest.append('\u0308');  // restore or add a dialytika
   1258                         }
   1259                         if (addTonos) {
   1260                             dest.append('\u0301');
   1261                         }
   1262                         while (numYpogegrammeni > 0) {
   1263                             dest.append('');
   1264                             --numYpogegrammeni;
   1265                         }
   1266                     }
   1267                 } else {
   1268                     c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
   1269                     appendResult(c, dest, nextIndex - i, options, edits);
   1270                 }
   1271                 i = nextIndex;
   1272                 state = nextState;
   1273             }
   1274             return dest;
   1275         }
   1276     }
   1277 }
   1278