Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package libcore.java.lang;
     18 
     19 import java.lang.reflect.Field;
     20 import java.lang.reflect.Modifier;
     21 import java.nio.ByteBuffer;
     22 import java.nio.CharBuffer;
     23 import java.nio.ReadOnlyBufferException;
     24 import java.nio.charset.Charset;
     25 import java.nio.charset.CharsetDecoder;
     26 import java.nio.charset.CharsetEncoder;
     27 import java.nio.charset.CoderResult;
     28 import java.nio.charset.CodingErrorAction;
     29 import java.util.Arrays;
     30 import java.util.Locale;
     31 import junit.framework.TestCase;
     32 
     33 public class StringTest extends TestCase {
     34     public void testIsEmpty() {
     35         assertTrue("".isEmpty());
     36         assertFalse("x".isEmpty());
     37     }
     38 
     39     // The evil decoder keeps hold of the CharBuffer it wrote to.
     40     private static final class EvilCharsetDecoder extends CharsetDecoder {
     41         private static char[] chars;
     42         public EvilCharsetDecoder(Charset cs) {
     43             super(cs, 1.0f, 1.0f);
     44         }
     45         protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
     46             chars = out.array();
     47             int inLength = in.remaining();
     48             for (int i = 0; i < inLength; ++i) {
     49                 in.put((byte) 'X');
     50                 out.put('Y');
     51             }
     52             return CoderResult.UNDERFLOW;
     53         }
     54         public static void corrupt() {
     55             for (int i = 0; i < chars.length; ++i) {
     56                 chars[i] = '$';
     57             }
     58         }
     59     }
     60 
     61     // The evil encoder tries to write to the CharBuffer it was given to
     62     // read from.
     63     private static final class EvilCharsetEncoder extends CharsetEncoder {
     64         public EvilCharsetEncoder(Charset cs) {
     65             super(cs, 1.0f, 1.0f);
     66         }
     67         protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
     68             int inLength = in.remaining();
     69             for (int i = 0; i < inLength; ++i) {
     70                 in.put('x');
     71                 out.put((byte) 'y');
     72             }
     73             return CoderResult.UNDERFLOW;
     74         }
     75     }
     76 
     77     private static final Charset EVIL_CHARSET = new Charset("evil", null) {
     78         public boolean contains(Charset charset) { return false; }
     79         public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); }
     80         public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); }
     81     };
     82 
     83     public void testGetBytes_MaliciousCharset() {
     84         try {
     85             String s = "hi";
     86             // Check that our encoder can't write to the input CharBuffer
     87             // it was given.
     88             s.getBytes(EVIL_CHARSET);
     89             fail(); // We shouldn't have got here!
     90         } catch (ReadOnlyBufferException expected) {
     91             // We caught you trying to be naughty!
     92         }
     93     }
     94 
     95     public void testString_BII() throws Exception {
     96         byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
     97         assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2));
     98     }
     99 
    100     public void testString_BIIString() throws Exception {
    101         byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
    102         assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, "UTF-8"));
    103     }
    104 
    105     public void testString_BIICharset() throws Exception {
    106         byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
    107         assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, Charset.forName("UTF-8")));
    108     }
    109 
    110     public void testString_BCharset() throws Exception {
    111         byte[] bytes = "a\u0666b".getBytes("UTF-8");
    112         assertEquals("a\u0666b", new String(bytes, Charset.forName("UTF-8")));
    113     }
    114 
    115     public void testStringFromCharset_MaliciousCharset() {
    116         Charset cs = EVIL_CHARSET;
    117         byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'};
    118         final String result = new String(bytes, cs);
    119         assertEquals("YY", result); // (Our decoder always outputs 'Y's.)
    120         // Check that even if the decoder messes with the output CharBuffer
    121         // after we've created a string from it, it doesn't affect the string.
    122         EvilCharsetDecoder.corrupt();
    123         assertEquals("YY", result);
    124     }
    125 
    126     public void test_getBytes_bad() throws Exception {
    127         // Check that we use '?' as the replacement byte for invalid characters.
    128         assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII")));
    129         assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII"))));
    130     }
    131 
    132     public void test_getBytes_UTF_8() {
    133         // We have a fast path implementation of String.getBytes for UTF-8.
    134         Charset cs = Charset.forName("UTF-8");
    135 
    136         // Test the empty string.
    137         assertEquals("[]", Arrays.toString("".getBytes(cs)));
    138 
    139         // Test one-byte characters.
    140         assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs)));
    141         assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs)));
    142         assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs)));
    143 
    144         // Test two-byte characters.
    145         assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs)));
    146         assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs)));
    147         assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs)));
    148         assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs)));
    149 
    150         // Test three-byte characters.
    151         assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs)));
    152         assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs)));
    153         assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs)));
    154         assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs)));
    155 
    156         // Test supplementary characters.
    157         // Minimum supplementary character: U+10000
    158         assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs)));
    159         // Random supplementary character: U+10381 Ugaritic letter beta
    160         assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs)));
    161         // Maximum supplementary character: U+10FFFF
    162         assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs)));
    163         // A high surrogate at end of string is an error replaced with '?'.
    164         assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs)));
    165         // A high surrogate not followed by a low surrogate is an error replaced with '?'.
    166         assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs)));
    167     }
    168 
    169     public void test_new_String_bad() throws Exception {
    170         // Check that we use U+FFFD as the replacement string for invalid bytes.
    171         assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII"));
    172         assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII")));
    173     }
    174 
    175     /**
    176      * Tests a widely assumed performance characteristic of String.substring():
    177      * that it reuses the original's backing array. Although behavior should be
    178      * correct even if this test fails, many applications may suffer
    179      * significant performance degradation.
    180      */
    181     public void testSubstringSharesBackingArray() throws IllegalAccessException {
    182         String abcdefghij = "ABCDEFGHIJ";
    183         String cdefg = abcdefghij.substring(2, 7);
    184         assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
    185     }
    186 
    187     /**
    188      * Tests a widely assumed performance characteristic of string's copy
    189      * constructor: that it ensures the backing array is the same length as the
    190      * string. Although behavior should be correct even if this test fails,
    191      * many applications may suffer significant performance degradation.
    192      */
    193     public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException {
    194         String abcdefghij = "ABCDEFGHIJ";
    195         assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij)));
    196 
    197         String cdefg = abcdefghij.substring(2, 7);
    198         assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
    199         assertEquals(5, getBackingArray(new String(cdefg)).length);
    200     }
    201 
    202     /**
    203      * Uses reflection to return the char[] backing the given string. This
    204      * returns the actual backing array; which must not be modified.
    205      */
    206     private char[] getBackingArray(String string) throws IllegalAccessException {
    207         for (Field f : String.class.getDeclaredFields()) {
    208             if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) {
    209                 f.setAccessible(true);
    210                 return (char[]) f.get(string);
    211             }
    212         }
    213         throw new UnsupportedOperationException("No chars[] field on String!");
    214     }
    215 
    216     /**
    217      * Test that strings interned manually and then later loaded as literals
    218      * maintain reference equality. http://b/3098960
    219      */
    220     public void testInternBeforeLiteralIsLoaded() throws Exception{
    221         String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern();
    222         String literal = (String) Class.forName("libcore.java.lang.StringTest$HasLiteral")
    223                 .getDeclaredField("literal").get(null);
    224         assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal));
    225         assertSame(programmatic, literal);
    226     }
    227 
    228     static class HasLiteral {
    229         static String literal = "[5058, 9962, 1563, 5744]";
    230     }
    231 
    232     private static final String COMBINING_DOT_ABOVE = "\u0307";
    233     private static final String LATIN_CAPITAL_I = "I";
    234     private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130";
    235     private static final String LATIN_SMALL_I = "i";
    236     private static final String LATIN_SMALL_DOTLESS_I = "\u0131";
    237 
    238     private static final String[] LATIN_I_VARIANTS = {
    239         LATIN_SMALL_I,
    240         LATIN_SMALL_DOTLESS_I,
    241         LATIN_CAPITAL_I,
    242         LATIN_CAPITAL_I_WITH_DOT_ABOVE,
    243     };
    244 
    245     public void testCaseMapping_tr_TR() {
    246         Locale tr_TR = new Locale("tr", "TR");
    247         assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(tr_TR));
    248         assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(tr_TR));
    249         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(tr_TR));
    250 
    251         assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(tr_TR));
    252         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(tr_TR));
    253         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_SMALL_I.toUpperCase(tr_TR));
    254 
    255         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(tr_TR));
    256         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(tr_TR));
    257     }
    258 
    259     public void testCaseMapping_en_US() {
    260         Locale en_US = new Locale("en", "US");
    261         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(en_US));
    262         assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(en_US));
    263         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(en_US));
    264 
    265         assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(en_US));
    266         assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(en_US));
    267         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(en_US));
    268 
    269         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(en_US));
    270         // http://b/3325799: the RI fails this because it's using an obsolete version of the Unicode rules.
    271         // Android correctly preserves canonical equivalence. (See the separate test for tr_TR.)
    272         assertEquals(LATIN_SMALL_I + COMBINING_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(en_US));
    273     }
    274 
    275     public void testCaseMapping_el() {
    276         Locale el_GR = new Locale("el", "GR");
    277         assertEquals("    O   ", "    o   ".toUpperCase(el_GR));
    278         assertEquals("    O   ", "    o   ".toUpperCase(el_GR));
    279         assertEquals("    O   ", "    o   ".toUpperCase(el_GR));
    280 
    281         Locale en_US = new Locale("en", "US");
    282         assertEquals("    O   ", "    o   ".toUpperCase(en_US));
    283     }
    284 
    285     public void testEqualsIgnoreCase_tr_TR() {
    286         testEqualsIgnoreCase(new Locale("tr", "TR"));
    287     }
    288 
    289     public void testEqualsIgnoreCase_en_US() {
    290         testEqualsIgnoreCase(new Locale("en", "US"));
    291     }
    292 
    293     /**
    294      * String.equalsIgnoreCase should not depend on the locale.
    295      */
    296     private void testEqualsIgnoreCase(Locale locale) {
    297         Locale defaultLocale = Locale.getDefault();
    298         Locale.setDefault(locale);
    299         try {
    300             for (String a : LATIN_I_VARIANTS) {
    301                 for (String b : LATIN_I_VARIANTS) {
    302                     if (!a.equalsIgnoreCase(b)) {
    303                         fail("Expected " + a + " to equal " + b + " in " +  locale);
    304                     }
    305                 }
    306             }
    307         } finally {
    308             Locale.setDefault(defaultLocale);
    309         }
    310     }
    311 
    312     public void testRegionMatches_ignoreCase_en_US() {
    313         testRegionMatches_ignoreCase(new Locale("en", "US"));
    314     }
    315 
    316     public void testRegionMatches_ignoreCase_tr_TR() {
    317         testRegionMatches_ignoreCase(new Locale("tr", "TR"));
    318     }
    319 
    320     private void testRegionMatches_ignoreCase(Locale locale) {
    321         Locale defaultLocale = Locale.getDefault();
    322         Locale.setDefault(locale);
    323         try {
    324             for (String a : LATIN_I_VARIANTS) {
    325                 for (String b : LATIN_I_VARIANTS) {
    326                     if (!a.regionMatches(true, 0, b, 0, b.length())) {
    327                         fail("Expected " + a + " to equal " + b + " in " +  locale);
    328                     }
    329                 }
    330             }
    331         } finally {
    332             Locale.setDefault(defaultLocale);
    333         }
    334     }
    335 
    336     // http://code.google.com/p/android/issues/detail?id=15266
    337     public void test_replaceAll() throws Exception {
    338         assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1"));
    339     }
    340 
    341     // https://code.google.com/p/android/issues/detail?id=23831
    342     public void test_23831() throws Exception {
    343         byte[] bytes = { (byte) 0xf5, (byte) 0xa9, (byte) 0xea, (byte) 0x21 };
    344         String expected = "\ufffd\ufffd\u0021";
    345 
    346         // Since we use icu4c for CharsetDecoder...
    347         CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
    348         decoder.onMalformedInput(CodingErrorAction.REPLACE);
    349         assertEquals(expected, decoder.decode(ByteBuffer.wrap(bytes)).toString());
    350 
    351         // Our fast-path code in String should behave the same...
    352         assertEquals(expected, new String(bytes, "UTF-8"));
    353     }
    354 
    355     // https://code.google.com/p/android/issues/detail?id=55129
    356     public void test_55129() throws Exception {
    357         assertEquals("-h-e-l-l-o- -w-o-r-l-d-", "hello world".replace("", "-"));
    358         assertEquals("-w-o-r-l-d-", "hello world".substring(6).replace("", "-"));
    359         assertEquals("-*-w-*-o-*-r-*-l-*-d-*-", "hello world".substring(6).replace("", "-*-"));
    360     }
    361 
    362     public void test_String_getBytes() throws Exception {
    363         // http://b/11571917
    364         assertEquals("[-126, -96]", Arrays.toString("".getBytes("Shift_JIS")));
    365         assertEquals("[-126, -87]", Arrays.toString("".getBytes("Shift_JIS")));
    366         assertEquals("[-105, 67]", Arrays.toString("".getBytes("Shift_JIS")));
    367         assertEquals("[36]", Arrays.toString("$".getBytes("Shift_JIS")));
    368         assertEquals("[-29, -127, -117]", Arrays.toString("".getBytes("UTF-8")));
    369 
    370         // http://b/11639117
    371         assertEquals("[-79, -72, -70, -48]", Arrays.toString("".getBytes("EUC-KR")));
    372 
    373 
    374         // https://code.google.com/p/android/issues/detail?id=63188
    375         assertEquals("[-77, -10, -64, -76, -63, -53]", Arrays.toString("".getBytes("gbk")));
    376         assertEquals("[-77, -10, -64, -76]", Arrays.toString("".getBytes("gbk")));
    377         assertEquals("[-77, -10]", Arrays.toString("".getBytes("gbk")));
    378     }
    379 
    380     public void test_compareTo() throws Exception {
    381         // For strings where a character differs, the result is
    382         // the difference between the characters.
    383         assertEquals(-1, "a".compareTo("b"));
    384         assertEquals(-2, "a".compareTo("c"));
    385         assertEquals(1, "b".compareTo("a"));
    386         assertEquals(2, "c".compareTo("a"));
    387 
    388         // For strings where the characters match up to the length of the shorter,
    389         // the result is the difference between the strings' lengths.
    390         assertEquals(0, "a".compareTo("a"));
    391         assertEquals(-1, "a".compareTo("aa"));
    392         assertEquals(-1, "a".compareTo("az"));
    393         assertEquals(-2, "a".compareTo("aaa"));
    394         assertEquals(-2, "a".compareTo("azz"));
    395         assertEquals(-3, "a".compareTo("aaaa"));
    396         assertEquals(-3, "a".compareTo("azzz"));
    397         assertEquals(0, "a".compareTo("a"));
    398         assertEquals(1, "aa".compareTo("a"));
    399         assertEquals(1, "az".compareTo("a"));
    400         assertEquals(2, "aaa".compareTo("a"));
    401         assertEquals(2, "azz".compareTo("a"));
    402         assertEquals(3, "aaaa".compareTo("a"));
    403         assertEquals(3, "azzz".compareTo("a"));
    404     }
    405 
    406     public void test_compareToIgnoreCase() throws Exception {
    407         // For strings where a character differs, the result is
    408         // the difference between the characters.
    409         assertEquals(-1, "a".compareToIgnoreCase("b"));
    410         assertEquals(-1, "a".compareToIgnoreCase("B"));
    411         assertEquals(-2, "a".compareToIgnoreCase("c"));
    412         assertEquals(-2, "a".compareToIgnoreCase("C"));
    413         assertEquals(1, "b".compareToIgnoreCase("a"));
    414         assertEquals(1, "B".compareToIgnoreCase("a"));
    415         assertEquals(2, "c".compareToIgnoreCase("a"));
    416         assertEquals(2, "C".compareToIgnoreCase("a"));
    417 
    418         // For strings where the characters match up to the length of the shorter,
    419         // the result is the difference between the strings' lengths.
    420         assertEquals(0, "a".compareToIgnoreCase("a"));
    421         assertEquals(0, "a".compareToIgnoreCase("A"));
    422         assertEquals(0, "A".compareToIgnoreCase("a"));
    423         assertEquals(0, "A".compareToIgnoreCase("A"));
    424         assertEquals(-1, "a".compareToIgnoreCase("aa"));
    425         assertEquals(-1, "a".compareToIgnoreCase("aA"));
    426         assertEquals(-1, "a".compareToIgnoreCase("Aa"));
    427         assertEquals(-1, "a".compareToIgnoreCase("az"));
    428         assertEquals(-1, "a".compareToIgnoreCase("aZ"));
    429         assertEquals(-2, "a".compareToIgnoreCase("aaa"));
    430         assertEquals(-2, "a".compareToIgnoreCase("AAA"));
    431         assertEquals(-2, "a".compareToIgnoreCase("azz"));
    432         assertEquals(-2, "a".compareToIgnoreCase("AZZ"));
    433         assertEquals(-3, "a".compareToIgnoreCase("aaaa"));
    434         assertEquals(-3, "a".compareToIgnoreCase("AAAA"));
    435         assertEquals(-3, "a".compareToIgnoreCase("azzz"));
    436         assertEquals(-3, "a".compareToIgnoreCase("AZZZ"));
    437         assertEquals(1, "aa".compareToIgnoreCase("a"));
    438         assertEquals(1, "aA".compareToIgnoreCase("a"));
    439         assertEquals(1, "Aa".compareToIgnoreCase("a"));
    440         assertEquals(1, "az".compareToIgnoreCase("a"));
    441         assertEquals(2, "aaa".compareToIgnoreCase("a"));
    442         assertEquals(2, "azz".compareToIgnoreCase("a"));
    443         assertEquals(3, "aaaa".compareToIgnoreCase("a"));
    444         assertEquals(3, "azzz".compareToIgnoreCase("a"));
    445     }
    446 }
    447