Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package libcore.java.lang;
     18 
     19 import java.lang.reflect.Field;
     20 import java.lang.reflect.Modifier;
     21 import java.nio.ByteBuffer;
     22 import java.nio.CharBuffer;
     23 import java.nio.ReadOnlyBufferException;
     24 import java.nio.charset.Charset;
     25 import java.nio.charset.CharsetDecoder;
     26 import java.nio.charset.CharsetEncoder;
     27 import java.nio.charset.CoderResult;
     28 import java.util.Arrays;
     29 import java.util.Locale;
     30 import junit.framework.TestCase;
     31 
     32 public class StringTest extends TestCase {
     33     public void testIsEmpty() {
     34         assertTrue("".isEmpty());
     35         assertFalse("x".isEmpty());
     36     }
     37 
     38     // The evil decoder keeps hold of the CharBuffer it wrote to.
     39     private static final class EvilCharsetDecoder extends CharsetDecoder {
     40         private static char[] chars;
     41         public EvilCharsetDecoder(Charset cs) {
     42             super(cs, 1.0f, 1.0f);
     43         }
     44         protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
     45             chars = out.array();
     46             int inLength = in.remaining();
     47             for (int i = 0; i < inLength; ++i) {
     48                 in.put((byte) 'X');
     49                 out.put('Y');
     50             }
     51             return CoderResult.UNDERFLOW;
     52         }
     53         public static void corrupt() {
     54             for (int i = 0; i < chars.length; ++i) {
     55                 chars[i] = '$';
     56             }
     57         }
     58     }
     59 
     60     // The evil encoder tries to write to the CharBuffer it was given to
     61     // read from.
     62     private static final class EvilCharsetEncoder extends CharsetEncoder {
     63         public EvilCharsetEncoder(Charset cs) {
     64             super(cs, 1.0f, 1.0f);
     65         }
     66         protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
     67             int inLength = in.remaining();
     68             for (int i = 0; i < inLength; ++i) {
     69                 in.put('x');
     70                 out.put((byte) 'y');
     71             }
     72             return CoderResult.UNDERFLOW;
     73         }
     74     }
     75 
     76     private static final Charset EVIL_CHARSET = new Charset("evil", null) {
     77         public boolean contains(Charset charset) { return false; }
     78         public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); }
     79         public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); }
     80     };
     81 
     82     public void testGetBytes_MaliciousCharset() {
     83         try {
     84             String s = "hi";
     85             // Check that our encoder can't write to the input CharBuffer
     86             // it was given.
     87             s.getBytes(EVIL_CHARSET);
     88             fail(); // We shouldn't have got here!
     89         } catch (ReadOnlyBufferException expected) {
     90             // We caught you trying to be naughty!
     91         }
     92     }
     93 
     94     public void testStringFromCharset() {
     95         Charset cs = Charset.forName("UTF-8");
     96         byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'};
     97         assertEquals("hi", new String(bytes, cs));
     98     }
     99 
    100     public void testStringFromCharset_MaliciousCharset() {
    101         Charset cs = EVIL_CHARSET;
    102         byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'};
    103         final String result = new String(bytes, cs);
    104         assertEquals("YY", result); // (Our decoder always outputs 'Y's.)
    105         // Check that even if the decoder messes with the output CharBuffer
    106         // after we've created a string from it, it doesn't affect the string.
    107         EvilCharsetDecoder.corrupt();
    108         assertEquals("YY", result);
    109     }
    110 
    111     public void test_getBytes_bad() throws Exception {
    112         // Check that we use '?' as the replacement byte for invalid characters.
    113         assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII")));
    114         assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII"))));
    115     }
    116 
    117     public void test_getBytes_UTF_8() {
    118         // We have a fast path implementation of String.getBytes for UTF-8.
    119         Charset cs = Charset.forName("UTF-8");
    120 
    121         // Test the empty string.
    122         assertEquals("[]", Arrays.toString("".getBytes(cs)));
    123 
    124         // Test one-byte characters.
    125         assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs)));
    126         assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs)));
    127         assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs)));
    128 
    129         // Test two-byte characters.
    130         assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs)));
    131         assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs)));
    132         assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs)));
    133         assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs)));
    134 
    135         // Test three-byte characters.
    136         assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs)));
    137         assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs)));
    138         assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs)));
    139         assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs)));
    140 
    141         // Test supplementary characters.
    142         // Minimum supplementary character: U+10000
    143         assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs)));
    144         // Random supplementary character: U+10381 Ugaritic letter beta
    145         assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs)));
    146         // Maximum supplementary character: U+10FFFF
    147         assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs)));
    148         // A high surrogate at end of string is an error replaced with '?'.
    149         assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs)));
    150         // A high surrogate not followed by a low surrogate is an error replaced with '?'.
    151         assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs)));
    152     }
    153 
    154     public void test_new_String_bad() throws Exception {
    155         // Check that we use U+FFFD as the replacement string for invalid bytes.
    156         assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII"));
    157         assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII")));
    158     }
    159 
    160     /**
    161      * Tests a widely assumed performance characteristic of String.substring():
    162      * that it reuses the original's backing array. Although behaviour should be
    163      * correct even if this test fails, many applications may suffer
    164      * significant performance degradation.
    165      */
    166     public void testSubstringSharesBackingArray() throws IllegalAccessException {
    167         String abcdefghij = "ABCDEFGHIJ";
    168         String cdefg = abcdefghij.substring(2, 7);
    169         assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
    170     }
    171 
    172     /**
    173      * Tests a widely assumed performance characteristic of string's copy
    174      * constructor: that it ensures the backing array is the same length as the
    175      * string. Although behaviour should be correct even if this test fails,
    176      * many applications may suffer significant performance degradation.
    177      */
    178     public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException {
    179         String abcdefghij = "ABCDEFGHIJ";
    180         assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij)));
    181 
    182         String cdefg = abcdefghij.substring(2, 7);
    183         assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg));
    184         assertEquals(5, getBackingArray(new String(cdefg)).length);
    185     }
    186 
    187     /**
    188      * Uses reflection to return the char[] backing the given string. This
    189      * returns the actual backing array; which must not be modified.
    190      */
    191     private char[] getBackingArray(String string) throws IllegalAccessException {
    192         for (Field f : String.class.getDeclaredFields()) {
    193             if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) {
    194                 f.setAccessible(true);
    195                 return (char[]) f.get(string);
    196             }
    197         }
    198         throw new UnsupportedOperationException("No chars[] field on String!");
    199     }
    200 
    201     /**
    202      * Test that strings interned manually and then later loaded as literals
    203      * maintain reference equality. http://b/3098960
    204      */
    205     public void testInternBeforeLiteralIsLoaded() throws Exception{
    206         String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern();
    207         String literal = (String) Class.forName("libcore.java.lang.StringTest$HasLiteral")
    208                 .getDeclaredField("literal").get(null);
    209         assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal));
    210         assertSame(programmatic, literal);
    211     }
    212 
    213     static class HasLiteral {
    214         static String literal = "[5058, 9962, 1563, 5744]";
    215     }
    216 
    217     private static final String LATIN_CAPITAL_I = "I";
    218     private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130";
    219     private static final String LATIN_SMALL_I = "i";
    220     private static final String LATIN_SMALL_DOTLESS_I = "\u0131";
    221 
    222     private static final String[] LATIN_I_VARIANTS = {
    223         LATIN_SMALL_I,
    224         LATIN_SMALL_DOTLESS_I,
    225         LATIN_CAPITAL_I,
    226         LATIN_CAPITAL_I_WITH_DOT_ABOVE,
    227     };
    228 
    229     public void testCaseMapping_tr_TR() {
    230         Locale trTR = new Locale("tr", "TR");
    231         assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(trTR));
    232         assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(trTR));
    233         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(trTR));
    234 
    235         assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(trTR));
    236         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(trTR));
    237         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_SMALL_I.toUpperCase(trTR));
    238 
    239         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(trTR));
    240         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(trTR));
    241     }
    242 
    243     public void testCaseMapping_en_US() {
    244         Locale enUs = new Locale("en", "US");
    245         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(enUs));
    246         assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(enUs));
    247         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(enUs));
    248 
    249         assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(enUs));
    250         assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(enUs));
    251         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(enUs));
    252 
    253         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(enUs));
    254         // http://b/3325799: Android fails this with an extra combining "dot above".
    255         assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(enUs));
    256     }
    257 
    258     public void testEqualsIgnoreCase_tr_TR() {
    259         testEqualsIgnoreCase(new Locale("tr", "TR"));
    260     }
    261 
    262     public void testEqualsIgnoreCase_en_US() {
    263         testEqualsIgnoreCase(new Locale("en", "US"));
    264     }
    265 
    266     /**
    267      * String.equalsIgnoreCase should not depend on the locale.
    268      */
    269     private void testEqualsIgnoreCase(Locale locale) {
    270         Locale defaultLocale = Locale.getDefault();
    271         Locale.setDefault(locale);
    272         try {
    273             for (String a : LATIN_I_VARIANTS) {
    274                 for (String b : LATIN_I_VARIANTS) {
    275                     if (!a.equalsIgnoreCase(b)) {
    276                         fail("Expected " + a + " to equal " + b + " in " +  locale);
    277                     }
    278                 }
    279             }
    280         } finally {
    281             Locale.setDefault(defaultLocale);
    282         }
    283     }
    284 
    285     public void testRegionMatches_ignoreCase_en_US() {
    286         testRegionMatches_ignoreCase(new Locale("en", "US"));
    287     }
    288 
    289     public void testRegionMatches_ignoreCase_tr_TR() {
    290         testRegionMatches_ignoreCase(new Locale("tr", "TR"));
    291     }
    292 
    293     private void testRegionMatches_ignoreCase(Locale locale) {
    294         Locale defaultLocale = Locale.getDefault();
    295         Locale.setDefault(locale);
    296         try {
    297             for (String a : LATIN_I_VARIANTS) {
    298                 for (String b : LATIN_I_VARIANTS) {
    299                     if (!a.regionMatches(true, 0, b, 0, b.length())) {
    300                         fail("Expected " + a + " to equal " + b + " in " +  locale);
    301                     }
    302                 }
    303             }
    304         } finally {
    305             Locale.setDefault(defaultLocale);
    306         }
    307     }
    308 
    309     // http://code.google.com/p/android/issues/detail?id=15266
    310     public void test_replaceAll() throws Exception {
    311         assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1"));
    312     }
    313 }
    314