Home | History | Annotate | Download | only in impl
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 package android.icu.impl;
      5 
      6 import java.io.IOException;
      7 import java.text.CharacterIterator;
      8 import java.util.Locale;
      9 
     10 import android.icu.lang.UCharacter;
     11 import android.icu.lang.UCharacterCategory;
     12 import android.icu.text.BreakIterator;
     13 import android.icu.text.Edits;
     14 import android.icu.util.ICUUncheckedIOException;
     15 import android.icu.util.ULocale;
     16 
     17 /**
     18  * @hide Only a subset of ICU is exposed in Android
     19  */
     20 public final class CaseMapImpl {
     21     /**
     22      * Implementation of UCaseProps.ContextIterator, iterates over a String.
     23      * See ustrcase.c/utf16_caseContextIterator().
     24      */
     25     public static final class StringContextIterator implements UCaseProps.ContextIterator {
     26         /**
     27          * Constructor.
     28          * @param src String to iterate over.
     29          */
     30         public StringContextIterator(CharSequence src) {
     31             this.s=src;
     32             limit=src.length();
     33             cpStart=cpLimit=index=0;
     34             dir=0;
     35         }
     36 
     37         /**
     38          * Set the iteration limit for nextCaseMapCP() to an index within the string.
     39          * If the limit parameter is negative or past the string, then the
     40          * string length is restored as the iteration limit.
     41          *
     42          * <p>This limit does not affect the next() function which always
     43          * iterates to the very end of the string.
     44          *
     45          * @param lim The iteration limit.
     46          */
     47         public void setLimit(int lim) {
     48             if(0<=lim && lim<=s.length()) {
     49                 limit=lim;
     50             } else {
     51                 limit=s.length();
     52             }
     53         }
     54 
     55         /**
     56          * Move to the iteration limit without fetching code points up to there.
     57          */
     58         public void moveToLimit() {
     59             cpStart=cpLimit=limit;
     60         }
     61 
     62         /**
     63          * Iterate forward through the string to fetch the next code point
     64          * to be case-mapped, and set the context indexes for it.
     65          *
     66          * <p>When the iteration limit is reached (and -1 is returned),
     67          * getCPStart() will be at the iteration limit.
     68          *
     69          * <p>Iteration with next() does not affect the position for nextCaseMapCP().
     70          *
     71          * @return The next code point to be case-mapped, or <0 when the iteration is done.
     72          */
     73         public int nextCaseMapCP() {
     74             cpStart=cpLimit;
     75             if(cpLimit<limit) {
     76                 int c=Character.codePointAt(s, cpLimit);
     77                 cpLimit+=Character.charCount(c);
     78                 return c;
     79             } else {
     80                 return -1;
     81             }
     82         }
     83 
     84         /**
     85          * Returns the start of the code point that was last returned
     86          * by nextCaseMapCP().
     87          */
     88         public int getCPStart() {
     89             return cpStart;
     90         }
     91 
     92         /**
     93          * Returns the limit of the code point that was last returned
     94          * by nextCaseMapCP().
     95          */
     96         public int getCPLimit() {
     97             return cpLimit;
     98         }
     99 
    100         public int getCPLength() {
    101             return cpLimit-cpStart;
    102         }
    103 
    104         // implement UCaseProps.ContextIterator
    105         // The following code is not used anywhere in this private class
    106         @Override
    107         public void reset(int direction) {
    108             if(direction>0) {
    109                 /* reset for forward iteration */
    110                 dir=1;
    111                 index=cpLimit;
    112             } else if(direction<0) {
    113                 /* reset for backward iteration */
    114                 dir=-1;
    115                 index=cpStart;
    116             } else {
    117                 // not a valid direction
    118                 dir=0;
    119                 index=0;
    120             }
    121         }
    122 
    123         @Override
    124         public int next() {
    125             int c;
    126 
    127             if(dir>0 && index<s.length()) {
    128                 c=Character.codePointAt(s, index);
    129                 index+=Character.charCount(c);
    130                 return c;
    131             } else if(dir<0 && index>0) {
    132                 c=Character.codePointBefore(s, index);
    133                 index-=Character.charCount(c);
    134                 return c;
    135             }
    136             return -1;
    137         }
    138 
    139         // variables
    140         protected CharSequence s;
    141         protected int index, limit, cpStart, cpLimit;
    142         protected int dir; // 0=initial state  >0=forward  <0=backward
    143     }
    144 
    145     public static final int TITLECASE_WHOLE_STRING = 0x20;
    146     public static final int TITLECASE_SENTENCES = 0x40;
    147 
    148     /**
    149      * Bit mask for the titlecasing iterator options bit field.
    150      * Currently only 3 out of 8 values are used:
    151      * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES.
    152      * See stringoptions.h.
    153      * @hide draft / provisional / internal are hidden on Android
    154      */
    155     private static final int TITLECASE_ITERATOR_MASK = 0xe0;
    156 
    157     public static final int TITLECASE_ADJUST_TO_CASED = 0x400;
    158 
    159     /**
    160      * Bit mask for the titlecasing index adjustment options bit set.
    161      * Currently two bits are defined:
    162      * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED.
    163      * See stringoptions.h.
    164      * @hide draft / provisional / internal are hidden on Android
    165      */
    166     private static final int TITLECASE_ADJUSTMENT_MASK = 0x600;
    167 
    168     public static int addTitleAdjustmentOption(int options, int newOption) {
    169         int adjOptions = options & TITLECASE_ADJUSTMENT_MASK;
    170         if (adjOptions !=0 && adjOptions != newOption) {
    171             throw new IllegalArgumentException("multiple titlecasing index adjustment options");
    172         }
    173         return options | newOption;
    174     }
    175 
    176     private static final int LNS =
    177             (1 << UCharacterCategory.UPPERCASE_LETTER) |
    178             (1 << UCharacterCategory.LOWERCASE_LETTER) |
    179             (1 << UCharacterCategory.TITLECASE_LETTER) |
    180             // Not MODIFIER_LETTER: We count only cased modifier letters.
    181             (1 << UCharacterCategory.OTHER_LETTER) |
    182 
    183             (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) |
    184             (1 << UCharacterCategory.LETTER_NUMBER) |
    185             (1 << UCharacterCategory.OTHER_NUMBER) |
    186 
    187             (1 << UCharacterCategory.MATH_SYMBOL) |
    188             (1 << UCharacterCategory.CURRENCY_SYMBOL) |
    189             (1 << UCharacterCategory.MODIFIER_SYMBOL) |
    190             (1 << UCharacterCategory.OTHER_SYMBOL) |
    191 
    192             (1 << UCharacterCategory.PRIVATE_USE);
    193 
    194     private static boolean isLNS(int c) {
    195         // Letter, number, symbol,
    196         // or a private use code point because those are typically used as letters or numbers.
    197         // Consider modifier letters only if they are cased.
    198         int gc = UCharacterProperty.INSTANCE.getType(c);
    199         return ((1 << gc) & LNS) != 0 ||
    200                 (gc == UCharacterCategory.MODIFIER_LETTER &&
    201                     UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE);
    202     }
    203 
    204     public static int addTitleIteratorOption(int options, int newOption) {
    205         int iterOptions = options & TITLECASE_ITERATOR_MASK;
    206         if (iterOptions !=0 && iterOptions != newOption) {
    207             throw new IllegalArgumentException("multiple titlecasing iterator options");
    208         }
    209         return options | newOption;
    210     }
    211 
    212     public static BreakIterator getTitleBreakIterator(
    213             Locale locale, int options, BreakIterator iter) {
    214         options &= TITLECASE_ITERATOR_MASK;
    215         if (options != 0 && iter != null) {
    216             throw new IllegalArgumentException(
    217                     "titlecasing iterator option together with an explicit iterator");
    218         }
    219         if (iter == null) {
    220             switch (options) {
    221             case 0:
    222                 iter = BreakIterator.getWordInstance(locale);
    223                 break;
    224             case TITLECASE_WHOLE_STRING:
    225                 iter = new WholeStringBreakIterator();
    226                 break;
    227             case TITLECASE_SENTENCES:
    228                 iter = BreakIterator.getSentenceInstance(locale);
    229                 break;
    230             default:
    231                 throw new IllegalArgumentException("unknown titlecasing iterator option");
    232             }
    233         }
    234         return iter;
    235     }
    236 
    237     public static BreakIterator getTitleBreakIterator(
    238             ULocale locale, int options, BreakIterator iter) {
    239         options &= TITLECASE_ITERATOR_MASK;
    240         if (options != 0 && iter != null) {
    241             throw new IllegalArgumentException(
    242                     "titlecasing iterator option together with an explicit iterator");
    243         }
    244         if (iter == null) {
    245             switch (options) {
    246             case 0:
    247                 iter = BreakIterator.getWordInstance(locale);
    248                 break;
    249             case TITLECASE_WHOLE_STRING:
    250                 iter = new WholeStringBreakIterator();
    251                 break;
    252             case TITLECASE_SENTENCES:
    253                 iter = BreakIterator.getSentenceInstance(locale);
    254                 break;
    255             default:
    256                 throw new IllegalArgumentException("unknown titlecasing iterator option");
    257             }
    258         }
    259         return iter;
    260     }
    261 
    262     /**
    263      * Omit unchanged text when case-mapping with Edits.
    264      */
    265     public static final int OMIT_UNCHANGED_TEXT = 0x4000;
    266 
    267     private static final class WholeStringBreakIterator extends BreakIterator {
    268         private int length;
    269 
    270         private static void notImplemented() {
    271             throw new UnsupportedOperationException("should not occur");
    272         }
    273 
    274         @Override
    275         public int first() {
    276             return 0;
    277         }
    278 
    279         @Override
    280         public int last() {
    281             notImplemented();
    282             return 0;
    283         }
    284 
    285         @Override
    286         public int next(int n) {
    287             notImplemented();
    288             return 0;
    289         }
    290 
    291         @Override
    292         public int next() {
    293             return length;
    294         }
    295 
    296         @Override
    297         public int previous() {
    298             notImplemented();
    299             return 0;
    300         }
    301 
    302         @Override
    303         public int following(int offset) {
    304             notImplemented();
    305             return 0;
    306         }
    307 
    308         @Override
    309         public int current() {
    310             notImplemented();
    311             return 0;
    312         }
    313 
    314         @Override
    315         public CharacterIterator getText() {
    316             notImplemented();
    317             return null;
    318         }
    319 
    320         @Override
    321         public void setText(CharacterIterator newText) {
    322             length = newText.getEndIndex();
    323         }
    324 
    325         @Override
    326         public void setText(CharSequence newText) {
    327             length = newText.length();
    328         }
    329 
    330         @Override
    331         public void setText(String newText) {
    332             length = newText.length();
    333         }
    334     }
    335 
    336     private static int appendCodePoint(Appendable a, int c) throws IOException {
    337         if (c <= Character.MAX_VALUE) {
    338             a.append((char)c);
    339             return 1;
    340         } else {
    341             a.append((char)(0xd7c0 + (c >> 10)));
    342             a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
    343             return 2;
    344         }
    345     }
    346 
    347     /**
    348      * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
    349      * @throws IOException
    350      */
    351     private static void appendResult(int result, Appendable dest,
    352             int cpLength, int options, Edits edits) throws IOException {
    353         // Decode the result.
    354         if (result < 0) {
    355             // (not) original code point
    356             if (edits != null) {
    357                 edits.addUnchanged(cpLength);
    358             }
    359             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
    360                 return;
    361             }
    362             appendCodePoint(dest, ~result);
    363         } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
    364             // The mapping has already been appended to result.
    365             if (edits != null) {
    366                 edits.addReplace(cpLength, result);
    367             }
    368         } else {
    369             // Append the single-code point mapping.
    370             int length = appendCodePoint(dest, result);
    371             if (edits != null) {
    372                 edits.addReplace(cpLength, length);
    373             }
    374         }
    375     }
    376 
    377     private static final void appendUnchanged(CharSequence src, int start, int length,
    378             Appendable dest, int options, Edits edits) throws IOException {
    379         if (length > 0) {
    380             if (edits != null) {
    381                 edits.addUnchanged(length);
    382             }
    383             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
    384                 return;
    385             }
    386             dest.append(src, start, start + length);
    387         }
    388     }
    389 
    390     private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) {
    391         if (!edits.hasChanges()) {
    392             return src.toString();
    393         }
    394         StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta());
    395         for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
    396             if (ei.hasChange()) {
    397                 int i = ei.replacementIndex();
    398                 result.append(replacementChars, i, i + ei.newLength());
    399             } else {
    400                 int i = ei.sourceIndex();
    401                 result.append(src, i, i + ei.oldLength());
    402             }
    403         }
    404         return result.toString();
    405     }
    406 
    407     private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
    408             Appendable dest, Edits edits) throws IOException {
    409         int c;
    410         while ((c = iter.nextCaseMapCP()) >= 0) {
    411             c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
    412             appendResult(c, dest, iter.getCPLength(), options, edits);
    413         }
    414     }
    415 
    416     public static String toLower(int caseLocale, int options, CharSequence src) {
    417         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    418             if (src.length() == 0) {
    419                 return src.toString();
    420             }
    421             // Collect and apply only changes.
    422             // Good if no or few changes. Bad (slow) if many changes.
    423             Edits edits = new Edits();
    424             StringBuilder replacementChars = toLower(
    425                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
    426             return applyEdits(src, replacementChars, edits);
    427         } else {
    428             return toLower(caseLocale, options, src,
    429                     new StringBuilder(src.length()), null).toString();
    430         }
    431     }
    432 
    433     public static <A extends Appendable> A toLower(int caseLocale, int options,
    434             CharSequence src, A dest, Edits edits) {
    435         try {
    436             if (edits != null) {
    437                 edits.reset();
    438             }
    439             StringContextIterator iter = new StringContextIterator(src);
    440             internalToLower(caseLocale, options, iter, dest, edits);
    441             return dest;
    442         } catch (IOException e) {
    443             throw new ICUUncheckedIOException(e);
    444         }
    445     }
    446 
    447     public static String toUpper(int caseLocale, int options, CharSequence src) {
    448         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    449             if (src.length() == 0) {
    450                 return src.toString();
    451             }
    452             // Collect and apply only changes.
    453             // Good if no or few changes. Bad (slow) if many changes.
    454             Edits edits = new Edits();
    455             StringBuilder replacementChars = toUpper(
    456                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
    457             return applyEdits(src, replacementChars, edits);
    458         } else {
    459             return toUpper(caseLocale, options, src,
    460                     new StringBuilder(src.length()), null).toString();
    461         }
    462     }
    463 
    464     public static <A extends Appendable> A toUpper(int caseLocale, int options,
    465             CharSequence src, A dest, Edits edits) {
    466         try {
    467             if (edits != null) {
    468                 edits.reset();
    469             }
    470             if (caseLocale == UCaseProps.LOC_GREEK) {
    471                 return GreekUpper.toUpper(options, src, dest, edits);
    472             }
    473             StringContextIterator iter = new StringContextIterator(src);
    474             int c;
    475             while ((c = iter.nextCaseMapCP()) >= 0) {
    476                 c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale);
    477                 appendResult(c, dest, iter.getCPLength(), options, edits);
    478             }
    479             return dest;
    480         } catch (IOException e) {
    481             throw new ICUUncheckedIOException(e);
    482         }
    483     }
    484 
    485     public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) {
    486         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    487             if (src.length() == 0) {
    488                 return src.toString();
    489             }
    490             // Collect and apply only changes.
    491             // Good if no or few changes. Bad (slow) if many changes.
    492             Edits edits = new Edits();
    493             StringBuilder replacementChars = toTitle(
    494                     caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src,
    495                     new StringBuilder(), edits);
    496             return applyEdits(src, replacementChars, edits);
    497         } else {
    498             return toTitle(caseLocale, options, iter, src,
    499                     new StringBuilder(src.length()), null).toString();
    500         }
    501     }
    502 
    503     public static <A extends Appendable> A toTitle(
    504             int caseLocale, int options, BreakIterator titleIter,
    505             CharSequence src, A dest, Edits edits) {
    506         try {
    507             if (edits != null) {
    508                 edits.reset();
    509             }
    510 
    511             /* set up local variables */
    512             StringContextIterator iter = new StringContextIterator(src);
    513             int srcLength = src.length();
    514             int prev=0;
    515             boolean isFirstIndex=true;
    516 
    517             /* titlecasing loop */
    518             while(prev<srcLength) {
    519                 /* find next index where to titlecase */
    520                 int index;
    521                 if(isFirstIndex) {
    522                     isFirstIndex=false;
    523                     index=titleIter.first();
    524                 } else {
    525                     index=titleIter.next();
    526                 }
    527                 if(index==BreakIterator.DONE || index>srcLength) {
    528                     index=srcLength;
    529                 }
    530 
    531                 /*
    532                  * Segment [prev..index[ into 3 parts:
    533                  * a) skipped characters (copy as-is) [prev..titleStart[
    534                  * b) first letter (titlecase)              [titleStart..titleLimit[
    535                  * c) subsequent characters (lowercase)                 [titleLimit..index[
    536                  */
    537                 if(prev<index) {
    538                     // Find and copy skipped characters [prev..titleStart[
    539                     int titleStart=prev;
    540                     iter.setLimit(index);
    541                     int c=iter.nextCaseMapCP();
    542                     if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
    543                         // Adjust the titlecasing index to the next cased character,
    544                         // or to the next letter/number/symbol/private use.
    545                         // Stop with titleStart<titleLimit<=index
    546                         // if there is a character to be titlecased,
    547                         // or else stop with titleStart==titleLimit==index.
    548                         boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0;
    549                         while ((toCased ?
    550                                     UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) :
    551                                         !CaseMapImpl.isLNS(c)) &&
    552                                 (c=iter.nextCaseMapCP())>=0) {}
    553                         // If c<0 then we have only uncased characters in [prev..index[
    554                         // and stopped with titleStart==titleLimit==index.
    555                         titleStart=iter.getCPStart();
    556                         if (prev < titleStart) {
    557                             appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
    558                         }
    559                     }
    560 
    561                     if(titleStart<index) {
    562                         int titleLimit=iter.getCPLimit();
    563                         // titlecase c which is from [titleStart..titleLimit[
    564                         c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
    565                         appendResult(c, dest, iter.getCPLength(), options, edits);
    566 
    567                         // Special case Dutch IJ titlecasing
    568                         if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
    569                             char c1 = src.charAt(titleStart);
    570                             if ((c1 == 'i' || c1 == 'I')) {
    571                                 char c2 = src.charAt(titleStart+1);
    572                                 if (c2 == 'j') {
    573                                     dest.append('J');
    574                                     if (edits != null) {
    575                                         edits.addReplace(1, 1);
    576                                     }
    577                                     c = iter.nextCaseMapCP();
    578                                     titleLimit++;
    579                                     assert c == c2;
    580                                     assert titleLimit == iter.getCPLimit();
    581                                 } else if (c2 == 'J') {
    582                                     // Keep the capital J from getting lowercased.
    583                                     appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
    584                                     c = iter.nextCaseMapCP();
    585                                     titleLimit++;
    586                                     assert c == c2;
    587                                     assert titleLimit == iter.getCPLimit();
    588                                 }
    589                             }
    590                         }
    591 
    592                         // lowercase [titleLimit..index[
    593                         if(titleLimit<index) {
    594                             if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
    595                                 // Normal operation: Lowercase the rest of the word.
    596                                 internalToLower(caseLocale, options, iter, dest, edits);
    597                             } else {
    598                                 // Optionally just copy the rest of the word unchanged.
    599                                 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
    600                                 iter.moveToLimit();
    601                             }
    602                         }
    603                     }
    604                 }
    605 
    606                 prev=index;
    607             }
    608             return dest;
    609         } catch (IOException e) {
    610             throw new ICUUncheckedIOException(e);
    611         }
    612     }
    613 
    614     public static String fold(int options, CharSequence src) {
    615         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
    616             if (src.length() == 0) {
    617                 return src.toString();
    618             }
    619             // Collect and apply only changes.
    620             // Good if no or few changes. Bad (slow) if many changes.
    621             Edits edits = new Edits();
    622             StringBuilder replacementChars = fold(
    623                     options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
    624             return applyEdits(src, replacementChars, edits);
    625         } else {
    626             return fold(options, src, new StringBuilder(src.length()), null).toString();
    627         }
    628     }
    629 
    630     public static <A extends Appendable> A fold(int options,
    631             CharSequence src, A dest, Edits edits) {
    632         try {
    633             if (edits != null) {
    634                 edits.reset();
    635             }
    636             int length = src.length();
    637             for (int i = 0; i < length;) {
    638                 int c = Character.codePointAt(src, i);
    639                 int cpLength = Character.charCount(c);
    640                 i += cpLength;
    641                 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options);
    642                 appendResult(c, dest, cpLength, options, edits);
    643             }
    644             return dest;
    645         } catch (IOException e) {
    646             throw new ICUUncheckedIOException(e);
    647         }
    648     }
    649 
    650     private static final class GreekUpper {
    651         // Data bits.
    652         private static final int UPPER_MASK = 0x3ff;
    653         private static final int HAS_VOWEL = 0x1000;
    654         private static final int HAS_YPOGEGRAMMENI = 0x2000;
    655         private static final int HAS_ACCENT = 0x4000;
    656         private static final int HAS_DIALYTIKA = 0x8000;
    657         // Further bits during data building and processing, not stored in the data map.
    658         private static final int HAS_COMBINING_DIALYTIKA = 0x10000;
    659         private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000;
    660 
    661         private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
    662         private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
    663                 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
    664         private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
    665 
    666         // State bits.
    667         private static final int AFTER_CASED = 1;
    668         private static final int AFTER_VOWEL_WITH_ACCENT = 2;
    669 
    670         // Data generated by prototype code, see
    671         // http://site.icu-project.org/design/case/greek-upper
    672         // TODO: Move this data into ucase.icu.
    673         private static final char[] data0370 = {
    674             // U+0370..03FF
    675             0x0370,  // 
    676             0x0370,  // 
    677             0x0372,  // 
    678             0x0372,  // 
    679             0,
    680             0,
    681             0x0376,  // 
    682             0x0376,  // 
    683             0,
    684             0,
    685             0x037A,  // 
    686             0x03FD,  // 
    687             0x03FE,  // 
    688             0x03FF,  // 
    689             0,
    690             0x037F,  // 
    691             0,
    692             0,
    693             0,
    694             0,
    695             0,
    696             0,
    697             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    698             0,
    699             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    700             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    701             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    702             0,
    703             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    704             0,
    705             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    706             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    707             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
    708             0x0391 | HAS_VOWEL,  // 
    709             0x0392,  // 
    710             0x0393,  // 
    711             0x0394,  // 
    712             0x0395 | HAS_VOWEL,  // 
    713             0x0396,  // 
    714             0x0397 | HAS_VOWEL,  // 
    715             0x0398,  // 
    716             0x0399 | HAS_VOWEL,  // 
    717             0x039A,  // 
    718             0x039B,  // 
    719             0x039C,  // 
    720             0x039D,  // 
    721             0x039E,  // 
    722             0x039F | HAS_VOWEL,  // 
    723             0x03A0,  // 
    724             0x03A1,  // 
    725             0,
    726             0x03A3,  // 
    727             0x03A4,  // 
    728             0x03A5 | HAS_VOWEL,  // 
    729             0x03A6,  // 
    730             0x03A7,  // 
    731             0x03A8,  // 
    732             0x03A9 | HAS_VOWEL,  // 
    733             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    734             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    735             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    736             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    737             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    738             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    739             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
    740             0x0391 | HAS_VOWEL,  // 
    741             0x0392,  // 
    742             0x0393,  // 
    743             0x0394,  // 
    744             0x0395 | HAS_VOWEL,  // 
    745             0x0396,  // 
    746             0x0397 | HAS_VOWEL,  // 
    747             0x0398,  // 
    748             0x0399 | HAS_VOWEL,  // 
    749             0x039A,  // 
    750             0x039B,  // 
    751             0x039C,  // 
    752             0x039D,  // 
    753             0x039E,  // 
    754             0x039F | HAS_VOWEL,  // 
    755             0x03A0,  // 
    756             0x03A1,  // 
    757             0x03A3,  // 
    758             0x03A3,  // 
    759             0x03A4,  // 
    760             0x03A5 | HAS_VOWEL,  // 
    761             0x03A6,  // 
    762             0x03A7,  // 
    763             0x03A8,  // 
    764             0x03A9 | HAS_VOWEL,  // 
    765             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    766             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // 
    767             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    768             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    769             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    770             0x03CF,  // 
    771             0x0392,  // 
    772             0x0398,  // 
    773             0x03D2,  // 
    774             0x03D2 | HAS_ACCENT,  // 
    775             0x03D2 | HAS_DIALYTIKA,  // 
    776             0x03A6,  // 
    777             0x03A0,  // 
    778             0x03CF,  // 
    779             0x03D8,  // 
    780             0x03D8,  // 
    781             0x03DA,  // 
    782             0x03DA,  // 
    783             0x03DC,  // 
    784             0x03DC,  // 
    785             0x03DE,  // 
    786             0x03DE,  // 
    787             0x03E0,  // 
    788             0x03E0,  // 
    789             0,
    790             0,
    791             0,
    792             0,
    793             0,
    794             0,
    795             0,
    796             0,
    797             0,
    798             0,
    799             0,
    800             0,
    801             0,
    802             0,
    803             0x039A,  // 
    804             0x03A1,  // 
    805             0x03F9,  // 
    806             0x037F,  // 
    807             0x03F4,  // 
    808             0x0395 | HAS_VOWEL,  // 
    809             0,
    810             0x03F7,  // 
    811             0x03F7,  // 
    812             0x03F9,  // 
    813             0x03FA,  // 
    814             0x03FA,  // 
    815             0x03FC,  // 
    816             0x03FD,  // 
    817             0x03FE,  // 
    818             0x03FF,  // 
    819         };
    820 
    821         private static final char[] data1F00 = {
    822             // U+1F00..1FFF
    823             0x0391 | HAS_VOWEL,  // 
    824             0x0391 | HAS_VOWEL,  // 
    825             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    826             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    827             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    828             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    829             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    830             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    831             0x0391 | HAS_VOWEL,  // 
    832             0x0391 | HAS_VOWEL,  // 
    833             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    834             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    835             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    836             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    837             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    838             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    839             0x0395 | HAS_VOWEL,  // 
    840             0x0395 | HAS_VOWEL,  // 
    841             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    842             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    843             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    844             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    845             0,
    846             0,
    847             0x0395 | HAS_VOWEL,  // 
    848             0x0395 | HAS_VOWEL,  // 
    849             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    850             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    851             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    852             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    853             0,
    854             0,
    855             0x0397 | HAS_VOWEL,  // 
    856             0x0397 | HAS_VOWEL,  // 
    857             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    858             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    859             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    860             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    861             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    862             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    863             0x0397 | HAS_VOWEL,  // 
    864             0x0397 | HAS_VOWEL,  // 
    865             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    866             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    867             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    868             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    869             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    870             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    871             0x0399 | HAS_VOWEL,  // 
    872             0x0399 | HAS_VOWEL,  // 
    873             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    874             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    875             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    876             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    877             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    878             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    879             0x0399 | HAS_VOWEL,  // 
    880             0x0399 | HAS_VOWEL,  // 
    881             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    882             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    883             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    884             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    885             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    886             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    887             0x039F | HAS_VOWEL,  // 
    888             0x039F | HAS_VOWEL,  // 
    889             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    890             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    891             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    892             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    893             0,
    894             0,
    895             0x039F | HAS_VOWEL,  // 
    896             0x039F | HAS_VOWEL,  // 
    897             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    898             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    899             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    900             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    901             0,
    902             0,
    903             0x03A5 | HAS_VOWEL,  // 
    904             0x03A5 | HAS_VOWEL,  // 
    905             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    906             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    907             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    908             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    909             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    910             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    911             0,
    912             0x03A5 | HAS_VOWEL,  // 
    913             0,
    914             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    915             0,
    916             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    917             0,
    918             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    919             0x03A9 | HAS_VOWEL,  // 
    920             0x03A9 | HAS_VOWEL,  // 
    921             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    922             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    923             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    924             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    925             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    926             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    927             0x03A9 | HAS_VOWEL,  // 
    928             0x03A9 | HAS_VOWEL,  // 
    929             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    930             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    931             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    932             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    933             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    934             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    935             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    936             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
    937             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    938             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
    939             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    940             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
    941             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    942             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
    943             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    944             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
    945             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    946             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
    947             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    948             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
    949             0,
    950             0,
    951             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    952             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    953             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    954             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    955             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    956             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    957             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    958             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    959             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    960             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    961             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    962             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    963             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    964             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    965             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    966             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    967             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    968             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    969             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    970             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    971             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    972             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    973             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    974             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    975             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    976             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    977             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    978             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    979             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    980             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    981             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    982             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    983             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    984             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    985             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    986             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    987             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    988             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    989             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    990             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    991             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    992             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
    993             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    994             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    995             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    996             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    997             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    998             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
    999             0x0391 | HAS_VOWEL,  // 
   1000             0x0391 | HAS_VOWEL,  // 
   1001             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1002             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1003             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1004             0,
   1005             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
   1006             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1007             0x0391 | HAS_VOWEL,  // 
   1008             0x0391 | HAS_VOWEL,  // 
   1009             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
   1010             0x0391 | HAS_VOWEL | HAS_ACCENT,  // 
   1011             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1012             0,
   1013             0x0399 | HAS_VOWEL,  // 
   1014             0,
   1015             0,
   1016             0,
   1017             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1018             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1019             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1020             0,
   1021             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
   1022             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1023             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
   1024             0x0395 | HAS_VOWEL | HAS_ACCENT,  // 
   1025             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
   1026             0x0397 | HAS_VOWEL | HAS_ACCENT,  // 
   1027             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1028             0,
   1029             0,
   1030             0,
   1031             0x0399 | HAS_VOWEL,  // 
   1032             0x0399 | HAS_VOWEL,  // 
   1033             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1034             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1035             0,
   1036             0,
   1037             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
   1038             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1039             0x0399 | HAS_VOWEL,  // 
   1040             0x0399 | HAS_VOWEL,  // 
   1041             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
   1042             0x0399 | HAS_VOWEL | HAS_ACCENT,  // 
   1043             0,
   1044             0,
   1045             0,
   1046             0,
   1047             0x03A5 | HAS_VOWEL,  // 
   1048             0x03A5 | HAS_VOWEL,  // 
   1049             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1050             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1051             0x03A1,  // 
   1052             0x03A1,  // 
   1053             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
   1054             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // 
   1055             0x03A5 | HAS_VOWEL,  // 
   1056             0x03A5 | HAS_VOWEL,  // 
   1057             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
   1058             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // 
   1059             0x03A1,  // 
   1060             0,
   1061             0,
   1062             0,
   1063             0,
   1064             0,
   1065             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1066             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1067             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1068             0,
   1069             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
   1070             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // 
   1071             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
   1072             0x039F | HAS_VOWEL | HAS_ACCENT,  // 
   1073             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
   1074             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // 
   1075             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // 
   1076             0,
   1077             0,
   1078             0,
   1079         };
   1080 
   1081         // U+2126 Ohm sign
   1082         private static final char data2126 = 0x03A9 | HAS_VOWEL;  // 
   1083 
   1084         private static final int getLetterData(int c) {
   1085             if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
   1086                 return 0;
   1087             } else if (c <= 0x3ff) {
   1088                 return data0370[c - 0x370];
   1089             } else if (c <= 0x1fff) {
   1090                 return data1F00[c - 0x1f00];
   1091             } else if (c == 0x2126) {
   1092                 return data2126;
   1093             } else {
   1094                 return 0;
   1095             }
   1096         }
   1097 
   1098         /**
   1099          * Returns a non-zero value for each of the Greek combining diacritics
   1100          * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
   1101          * plus some perispomeni look-alikes.
   1102          */
   1103         private static final int getDiacriticData(int c) {
   1104             switch (c) {
   1105             case '\u0300':  // varia
   1106             case '\u0301':  // tonos = oxia
   1107             case '\u0342':  // perispomeni
   1108             case '\u0302':  // circumflex can look like perispomeni
   1109             case '\u0303':  // tilde can look like perispomeni
   1110             case '\u0311':  // inverted breve can look like perispomeni
   1111                 return HAS_ACCENT;
   1112             case '\u0308':  // dialytika = diaeresis
   1113                 return HAS_COMBINING_DIALYTIKA;
   1114             case '\u0344':  // dialytika tonos
   1115                 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
   1116             case '\u0345':  // ypogegrammeni = iota subscript
   1117                 return HAS_YPOGEGRAMMENI;
   1118             case '\u0304':  // macron
   1119             case '\u0306':  // breve
   1120             case '\u0313':  // comma above
   1121             case '\u0314':  // reversed comma above
   1122             case '\u0343':  // koronis
   1123                 return HAS_OTHER_GREEK_DIACRITIC;
   1124             default:
   1125                 return 0;
   1126             }
   1127         }
   1128 
   1129         private static boolean isFollowedByCasedLetter(CharSequence s, int i) {
   1130             while (i < s.length()) {
   1131                 int c = Character.codePointAt(s, i);
   1132                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
   1133                 if ((type & UCaseProps.IGNORABLE) != 0) {
   1134                     // Case-ignorable, continue with the loop.
   1135                     i += Character.charCount(c);
   1136                 } else if (type != UCaseProps.NONE) {
   1137                     return true;  // Followed by cased letter.
   1138                 } else {
   1139                     return false;  // Uncased and not case-ignorable.
   1140                 }
   1141             }
   1142             return false;  // Not followed by cased letter.
   1143         }
   1144 
   1145         /**
   1146          * Greek string uppercasing with a state machine.
   1147          * Probably simpler than a stateless function that has to figure out complex context-before
   1148          * for each character.
   1149          * TODO: Try to re-consolidate one way or another with the non-Greek function.
   1150          *
   1151          * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
   1152          * @throws IOException
   1153          */
   1154         private static <A extends Appendable> A toUpper(int options,
   1155                 CharSequence src, A dest, Edits edits) throws IOException {
   1156             int state = 0;
   1157             for (int i = 0; i < src.length();) {
   1158                 int c = Character.codePointAt(src, i);
   1159                 int nextIndex = i + Character.charCount(c);
   1160                 int nextState = 0;
   1161                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
   1162                 if ((type & UCaseProps.IGNORABLE) != 0) {
   1163                     // c is case-ignorable
   1164                     nextState |= (state & AFTER_CASED);
   1165                 } else if (type != UCaseProps.NONE) {
   1166                     // c is cased
   1167                     nextState |= AFTER_CASED;
   1168                 }
   1169                 int data = getLetterData(c);
   1170                 if (data > 0) {
   1171                     int upper = data & UPPER_MASK;
   1172                     // Add a dialytika to this iota or ypsilon vowel
   1173                     // if we removed a tonos from the previous vowel,
   1174                     // and that previous vowel did not also have (or gain) a dialytika.
   1175                     // Adding one only to the final vowel in a longer sequence
   1176                     // (which does not occur in normal writing) would require lookahead.
   1177                     // Set the same flag as for preserving an existing dialytika.
   1178                     if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
   1179                             (upper == '' || upper == '')) {
   1180                         data |= HAS_DIALYTIKA;
   1181                     }
   1182                     int numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
   1183                     if ((data & HAS_YPOGEGRAMMENI) != 0) {
   1184                         numYpogegrammeni = 1;
   1185                     }
   1186                     // Skip combining diacritics after this Greek letter.
   1187                     while (nextIndex < src.length()) {
   1188                         int diacriticData = getDiacriticData(src.charAt(nextIndex));
   1189                         if (diacriticData != 0) {
   1190                             data |= diacriticData;
   1191                             if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
   1192                                 ++numYpogegrammeni;
   1193                             }
   1194                             ++nextIndex;
   1195                         } else {
   1196                             break;  // not a Greek diacritic
   1197                         }
   1198                     }
   1199                     if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
   1200                         nextState |= AFTER_VOWEL_WITH_ACCENT;
   1201                     }
   1202                     // Map according to Greek rules.
   1203                     boolean addTonos = false;
   1204                     if (upper == '' &&
   1205                             (data & HAS_ACCENT) != 0 &&
   1206                             numYpogegrammeni == 0 &&
   1207                             (state & AFTER_CASED) == 0 &&
   1208                             !isFollowedByCasedLetter(src, nextIndex)) {
   1209                         // Keep disjunctive "or" with (only) a tonos.
   1210                         // We use the same "word boundary" conditions as for the Final_Sigma test.
   1211                         if (i == nextIndex) {
   1212                             upper = '';  // Preserve the precomposed form.
   1213                         } else {
   1214                             addTonos = true;
   1215                         }
   1216                     } else if ((data & HAS_DIALYTIKA) != 0) {
   1217                         // Preserve a vowel with dialytika in precomposed form if it exists.
   1218                         if (upper == '') {
   1219                             upper = '';
   1220                             data &= ~HAS_EITHER_DIALYTIKA;
   1221                         } else if (upper == '') {
   1222                             upper = '';
   1223                             data &= ~HAS_EITHER_DIALYTIKA;
   1224                         }
   1225                     }
   1226 
   1227                     boolean change;
   1228                     if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) {
   1229                         change = true;  // common, simple usage
   1230                     } else {
   1231                         // Find out first whether we are changing the text.
   1232                         change = src.charAt(i) != upper || numYpogegrammeni > 0;
   1233                         int i2 = i + 1;
   1234                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
   1235                             change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
   1236                             ++i2;
   1237                         }
   1238                         if (addTonos) {
   1239                             change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
   1240                             ++i2;
   1241                         }
   1242                         int oldLength = nextIndex - i;
   1243                         int newLength = (i2 - i) + numYpogegrammeni;
   1244                         change |= oldLength != newLength;
   1245                         if (change) {
   1246                             if (edits != null) {
   1247                                 edits.addReplace(oldLength, newLength);
   1248                             }
   1249                         } else {
   1250                             if (edits != null) {
   1251                                 edits.addUnchanged(oldLength);
   1252                             }
   1253                             // Write unchanged text?
   1254                             change = (options & OMIT_UNCHANGED_TEXT) == 0;
   1255                         }
   1256                     }
   1257 
   1258                     if (change) {
   1259                         dest.append((char)upper);
   1260                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
   1261                             dest.append('\u0308');  // restore or add a dialytika
   1262                         }
   1263                         if (addTonos) {
   1264                             dest.append('\u0301');
   1265                         }
   1266                         while (numYpogegrammeni > 0) {
   1267                             dest.append('');
   1268                             --numYpogegrammeni;
   1269                         }
   1270                     }
   1271                 } else {
   1272                     c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
   1273                     appendResult(c, dest, nextIndex - i, options, edits);
   1274                 }
   1275                 i = nextIndex;
   1276                 state = nextState;
   1277             }
   1278             return dest;
   1279         }
   1280     }
   1281 }
   1282