Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package java.lang;
     18 
     19 import android.icu.text.Transliterator;
     20 import java.util.Locale;
     21 import libcore.icu.ICU;
     22 
     23 /**
     24  * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html.
     25  */
     26 class CaseMapper {
     27     private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".toCharArray();
     28     private static final char[] upperValues2 = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".toCharArray();
     29 
     30     private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130';
     31     private static final char GREEK_CAPITAL_SIGMA = '\u03a3';
     32     private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2';
     33 
     34     /**
     35      * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed,
     36      * this class should be changed so that you instantiate it with the String and its value,
     37      * and count fields.
     38      */
     39     private CaseMapper() {
     40     }
     41 
     42     /**
     43      * Implements String.toLowerCase. The original String instance is returned if nothing changes.
     44      */
     45     public static String toLowerCase(Locale locale, String s) {
     46         // Punt hard cases to ICU4C.
     47         // Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase.
     48         String languageCode = locale.getLanguage();
     49         if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
     50             return ICU.toLowerCase(s, locale);
     51         }
     52 
     53         String newString = null;
     54         for (int i = 0, end = s.length(); i < end; ++i) {
     55             char ch = s.charAt(i);
     56             char newCh;
     57             if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) {
     58                 // Punt these hard cases.
     59                 return ICU.toLowerCase(s, locale);
     60             } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(s, i)) {
     61                 newCh = GREEK_SMALL_FINAL_SIGMA;
     62             } else {
     63                 newCh = Character.toLowerCase(ch);
     64             }
     65             if (ch != newCh) {
     66                 if (newString == null) {
     67                     newString = StringFactory.newStringFromString(s);
     68                 }
     69                 newString.setCharAt(i, newCh);
     70             }
     71         }
     72         return newString != null ? newString : s;
     73     }
     74 
     75     /**
     76      * True if 'index' is preceded by a sequence consisting of a cased letter and a case-ignorable
     77      * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and
     78      * then a cased letter.
     79      */
     80     private static boolean isFinalSigma(String s, int index) {
     81         // TODO: we don't skip case-ignorable sequences like we should.
     82         // TODO: we should add a more direct way to test for a cased letter.
     83         if (index <= 0) {
     84             return false;
     85         }
     86         char previous = s.charAt(index - 1);
     87         if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) {
     88             return false;
     89         }
     90         if (index + 1 >= s.length()) {
     91             return true;
     92         }
     93         char next = s.charAt(index + 1);
     94         if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) {
     95             return false;
     96         }
     97         return true;
     98     }
     99 
    100     /**
    101      * Return the index of the specified character into the upperValues table.
    102      * The upperValues table contains three entries at each position. These
    103      * three characters are the upper case conversion. If only two characters
    104      * are used, the third character in the table is \u0000.
    105      * @return the index into the upperValues table, or -1
    106      */
    107     private static int upperIndex(int ch) {
    108         int index = -1;
    109         if (ch >= 0xdf) {
    110             if (ch <= 0x587) {
    111                 switch (ch) {
    112                 case 0xdf: return 0;
    113                 case 0x149: return 1;
    114                 case 0x1f0: return 2;
    115                 case 0x390: return 3;
    116                 case 0x3b0: return 4;
    117                 case 0x587: return 5;
    118                 }
    119             } else if (ch >= 0x1e96) {
    120                 if (ch <= 0x1e9a) {
    121                     index = 6 + ch - 0x1e96;
    122                 } else if (ch >= 0x1f50 && ch <= 0x1ffc) {
    123                     index = upperValues2[ch - 0x1f50];
    124                     if (index == 0) {
    125                         index = -1;
    126                     }
    127                 } else if (ch >= 0xfb00) {
    128                     if (ch <= 0xfb06) {
    129                         index = 90 + ch - 0xfb00;
    130                     } else if (ch >= 0xfb13 && ch <= 0xfb17) {
    131                         index = 97 + ch - 0xfb13;
    132                     }
    133                 }
    134             }
    135         }
    136         return index;
    137     }
    138 
    139     private static final ThreadLocal<Transliterator> EL_UPPER = new ThreadLocal<Transliterator>() {
    140         @Override protected Transliterator initialValue() {
    141             return Transliterator.getInstance("el-Upper");
    142         }
    143     };
    144 
    145     public static String toUpperCase(Locale locale, String s, int count) {
    146         String languageCode = locale.getLanguage();
    147         if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
    148             return ICU.toUpperCase(s, locale);
    149         }
    150         if (languageCode.equals("el")) {
    151             return EL_UPPER.get().transliterate(s);
    152         }
    153 
    154         char[] output = null;
    155         String newString = null;
    156         int i = 0;
    157         for (int o = 0, end = count; o < end; o++) {
    158             char ch = s.charAt(o);
    159             if (Character.isHighSurrogate(ch)) {
    160                 return ICU.toUpperCase(s, locale);
    161             }
    162             int index = upperIndex(ch);
    163             if (index == -1) {
    164                 if (output != null && i >= output.length) {
    165                     char[] newoutput = new char[output.length + (count / 6) + 2];
    166                     System.arraycopy(output, 0, newoutput, 0, output.length);
    167                     output = newoutput;
    168                 }
    169                 char upch = Character.toUpperCase(ch);
    170                 if (output != null) {
    171                     output[i++] = upch;
    172                 } else if (ch != upch) {
    173                     if (newString == null) {
    174                         newString = StringFactory.newStringFromString(s);
    175                     }
    176                     newString.setCharAt(o, upch);
    177                 }
    178             } else {
    179                 int target = index * 3;
    180                 char val3 = upperValues[target + 2];
    181                 if (output == null) {
    182                     output = new char[count + (count / 6) + 2];
    183                     i = o;
    184                     if (newString != null) {
    185                         System.arraycopy(newString.toCharArray(), 0, output, 0, i);
    186                     } else {
    187                         System.arraycopy(s.toCharArray(), 0, output, 0, i);
    188                     }
    189                 } else if (i + (val3 == 0 ? 1 : 2) >= output.length) {
    190                     char[] newoutput = new char[output.length + (count / 6) + 3];
    191                     System.arraycopy(output, 0, newoutput, 0, output.length);
    192                     output = newoutput;
    193                 }
    194 
    195                 char val = upperValues[target];
    196                 output[i++] = val;
    197                 val = upperValues[target + 1];
    198                 output[i++] = val;
    199                 if (val3 != 0) {
    200                     output[i++] = val3;
    201                 }
    202             }
    203         }
    204         if (output == null) {
    205             if (newString != null) {
    206                 return newString;
    207             } else {
    208                 return s;
    209             }
    210         }
    211         return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i);
    212     }
    213 }
    214