Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package libcore.java.lang;
     18 
     19 import android.icu.lang.UCharacter;
     20 
     21 import java.io.ByteArrayInputStream;
     22 import java.io.DataInputStream;
     23 import java.io.IOException;
     24 import java.nio.ByteBuffer;
     25 import java.nio.CharBuffer;
     26 import java.nio.ReadOnlyBufferException;
     27 import java.nio.charset.Charset;
     28 import java.nio.charset.CharsetDecoder;
     29 import java.nio.charset.CharsetEncoder;
     30 import java.nio.charset.CoderResult;
     31 import java.nio.charset.CodingErrorAction;
     32 import java.nio.charset.ModifiedUtf8;
     33 import java.nio.charset.StandardCharsets;
     34 import java.util.Arrays;
     35 import java.util.ArrayList;
     36 import java.util.Locale;
     37 
     38 import junit.framework.TestCase;
     39 
     40 public class StringTest extends TestCase {
     41     public void testIsEmpty() {
     42         assertTrue("".isEmpty());
     43         assertFalse("x".isEmpty());
     44     }
     45 
     46     // The evil decoder keeps hold of the CharBuffer it wrote to.
     47     private static final class EvilCharsetDecoder extends CharsetDecoder {
     48         private static char[] chars;
     49         public EvilCharsetDecoder(Charset cs) {
     50             super(cs, 1.0f, 1.0f);
     51         }
     52         protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
     53             chars = out.array();
     54             int inLength = in.remaining();
     55             for (int i = 0; i < inLength; ++i) {
     56                 in.put((byte) 'X');
     57                 out.put('Y');
     58             }
     59             return CoderResult.UNDERFLOW;
     60         }
     61         public static void corrupt() {
     62             for (int i = 0; i < chars.length; ++i) {
     63                 chars[i] = '$';
     64             }
     65         }
     66     }
     67 
     68     // The evil encoder tries to write to the CharBuffer it was given to
     69     // read from.
     70     private static final class EvilCharsetEncoder extends CharsetEncoder {
     71         public EvilCharsetEncoder(Charset cs) {
     72             super(cs, 1.0f, 1.0f);
     73         }
     74         protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
     75             int inLength = in.remaining();
     76             for (int i = 0; i < inLength; ++i) {
     77                 in.put('x');
     78                 out.put((byte) 'y');
     79             }
     80             return CoderResult.UNDERFLOW;
     81         }
     82     }
     83 
     84     private static final Charset EVIL_CHARSET = new Charset("evil", null) {
     85         public boolean contains(Charset charset) { return false; }
     86         public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); }
     87         public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); }
     88     };
     89 
     90     public void testGetBytes_MaliciousCharset() {
     91         try {
     92             String s = "hi";
     93             // Check that our encoder can't write to the input CharBuffer
     94             // it was given.
     95             s.getBytes(EVIL_CHARSET);
     96             fail(); // We shouldn't have got here!
     97         } catch (ReadOnlyBufferException expected) {
     98             // We caught you trying to be naughty!
     99         }
    100     }
    101 
    102     public void testString_BII() throws Exception {
    103         byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
    104         assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2));
    105     }
    106 
    107     public void testString_BIIString() throws Exception {
    108         byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
    109         assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, "UTF-8"));
    110     }
    111 
    112     public void testString_BIICharset() throws Exception {
    113         byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
    114         assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, Charset.forName("UTF-8")));
    115     }
    116 
    117     public void testString_BCharset() throws Exception {
    118         byte[] bytes = "a\u0666b".getBytes("UTF-8");
    119         assertEquals("a\u0666b", new String(bytes, Charset.forName("UTF-8")));
    120     }
    121 
    122     public void testStringFromCharset_MaliciousCharset() {
    123         Charset cs = EVIL_CHARSET;
    124         byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'};
    125         final String result = new String(bytes, cs);
    126         assertEquals("YY", result); // (Our decoder always outputs 'Y's.)
    127         // Check that even if the decoder messes with the output CharBuffer
    128         // after we've created a string from it, it doesn't affect the string.
    129         EvilCharsetDecoder.corrupt();
    130         assertEquals("YY", result);
    131     }
    132 
    133     public void test_getBytes_bad() throws Exception {
    134         // Check that we use '?' as the replacement byte for invalid characters.
    135         assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII")));
    136         assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII"))));
    137     }
    138 
    139     public void test_getBytes_UTF_8() {
    140         // We have a fast path implementation of String.getBytes for UTF-8.
    141         Charset cs = Charset.forName("UTF-8");
    142 
    143         // Test the empty string.
    144         assertEquals("[]", Arrays.toString("".getBytes(cs)));
    145 
    146         // Test one-byte characters.
    147         assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs)));
    148         assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs)));
    149         assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs)));
    150 
    151         // Test two-byte characters.
    152         assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs)));
    153         assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs)));
    154         assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs)));
    155         assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs)));
    156 
    157         // Test three-byte characters.
    158         assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs)));
    159         assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs)));
    160         assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs)));
    161         assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs)));
    162 
    163         // Test supplementary characters.
    164         // Minimum supplementary character: U+10000
    165         assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs)));
    166         // Random supplementary character: U+10381 Ugaritic letter beta
    167         assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs)));
    168         // Maximum supplementary character: U+10FFFF
    169         assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs)));
    170         // A high surrogate at end of string is an error replaced with '?'.
    171         assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs)));
    172         // A high surrogate not followed by a low surrogate is an error replaced with '?'.
    173         assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs)));
    174     }
    175 
    176     public void test_new_String_bad() throws Exception {
    177         // Check that we use U+FFFD as the replacement string for invalid bytes.
    178         assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII"));
    179         assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII")));
    180     }
    181 
    182     /**
    183 
    184      * Test that strings interned manually and then later loaded as literals
    185      * maintain reference equality. http://b/3098960
    186      */
    187     public void testInternBeforeLiteralIsLoaded() throws Exception{
    188         String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern();
    189         String literal = (String) Class.forName("libcore.java.lang.StringTest$HasLiteral")
    190                 .getDeclaredField("literal").get(null);
    191         assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal));
    192         assertSame(programmatic, literal);
    193     }
    194 
    195     static class HasLiteral {
    196         static String literal = "[5058, 9962, 1563, 5744]";
    197     }
    198 
    199     private static final String COMBINING_DOT_ABOVE = "\u0307";
    200     private static final String LATIN_CAPITAL_I = "I";
    201     private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130";
    202     private static final String LATIN_SMALL_I = "i";
    203     private static final String LATIN_SMALL_DOTLESS_I = "\u0131";
    204 
    205     private static final String[] LATIN_I_VARIANTS = {
    206         LATIN_SMALL_I,
    207         LATIN_SMALL_DOTLESS_I,
    208         LATIN_CAPITAL_I,
    209         LATIN_CAPITAL_I_WITH_DOT_ABOVE,
    210     };
    211 
    212     public void testCaseMapping_tr_TR() {
    213         Locale tr_TR = new Locale("tr", "TR");
    214         assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(tr_TR));
    215         assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(tr_TR));
    216         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(tr_TR));
    217 
    218         assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(tr_TR));
    219         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(tr_TR));
    220         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_SMALL_I.toUpperCase(tr_TR));
    221 
    222         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(tr_TR));
    223         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(tr_TR));
    224     }
    225 
    226     public void testCaseMapping_en_US() {
    227         Locale en_US = new Locale("en", "US");
    228         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(en_US));
    229         assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(en_US));
    230         assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(en_US));
    231 
    232         assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(en_US));
    233         assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(en_US));
    234         assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(en_US));
    235 
    236         assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(en_US));
    237         // http://b/3325799: the RI fails this because it's using an obsolete version of the Unicode rules.
    238         // Android correctly preserves canonical equivalence. (See the separate test for tr_TR.)
    239         assertEquals(LATIN_SMALL_I + COMBINING_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(en_US));
    240     }
    241 
    242     public void testCaseMapping_el() {
    243         Locale el_GR = new Locale("el", "GR");
    244         assertEquals("    O   ", "    o   ".toUpperCase(el_GR));
    245         assertEquals("    O   ", "    o   ".toUpperCase(el_GR));
    246         assertEquals("    O   ", "    o   ".toUpperCase(el_GR));
    247 
    248         Locale en_US = new Locale("en", "US");
    249         assertEquals("    O   ", "    o   ".toUpperCase(en_US));
    250     }
    251 
    252     public void testEqualsIgnoreCase_tr_TR() {
    253         testEqualsIgnoreCase(new Locale("tr", "TR"));
    254     }
    255 
    256     public void testEqualsIgnoreCase_en_US() {
    257         testEqualsIgnoreCase(new Locale("en", "US"));
    258     }
    259 
    260     /**
    261      * String.equalsIgnoreCase should not depend on the locale.
    262      */
    263     private void testEqualsIgnoreCase(Locale locale) {
    264         Locale defaultLocale = Locale.getDefault();
    265         Locale.setDefault(locale);
    266         try {
    267             for (String a : LATIN_I_VARIANTS) {
    268                 for (String b : LATIN_I_VARIANTS) {
    269                     if (!a.equalsIgnoreCase(b)) {
    270                         fail("Expected " + a + " to equal " + b + " in " +  locale);
    271                     }
    272                 }
    273             }
    274         } finally {
    275             Locale.setDefault(defaultLocale);
    276         }
    277     }
    278 
    279     public void testRegionMatches_ignoreCase_en_US() {
    280         testRegionMatches_ignoreCase(new Locale("en", "US"));
    281     }
    282 
    283     public void testRegionMatches_ignoreCase_tr_TR() {
    284         testRegionMatches_ignoreCase(new Locale("tr", "TR"));
    285     }
    286 
    287     private void testRegionMatches_ignoreCase(Locale locale) {
    288         Locale defaultLocale = Locale.getDefault();
    289         Locale.setDefault(locale);
    290         try {
    291             for (String a : LATIN_I_VARIANTS) {
    292                 for (String b : LATIN_I_VARIANTS) {
    293                     if (!a.regionMatches(true, 0, b, 0, b.length())) {
    294                         fail("Expected " + a + " to equal " + b + " in " +  locale);
    295                     }
    296                 }
    297             }
    298         } finally {
    299             Locale.setDefault(defaultLocale);
    300         }
    301     }
    302 
    303     // http://code.google.com/p/android/issues/detail?id=15266
    304     public void test_replaceAll() throws Exception {
    305         assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1"));
    306     }
    307 
    308     // Test that CharsetDecoder and fast-path decoder are consistent when handling ill-formed
    309     // sequence. http://b/69599767
    310     // This test was originally created for the bug
    311     // https://code.google.com/p/android/issues/detail?id=23831
    312     public void test_69599767() throws Exception {
    313         byte[] bytes = { (byte) 0xf5, (byte) 0xa9, (byte) 0xea, (byte) 0x21 };
    314         String expected = "\ufffd\ufffd\ufffd\u0021";
    315 
    316         // Since we use ICU4C for CharsetDecoder...
    317         CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
    318         decoder.onMalformedInput(CodingErrorAction.REPLACE);
    319         assertEquals(expected, decoder.decode(ByteBuffer.wrap(bytes)).toString());
    320 
    321         // Our fast-path code in String should behave the same...
    322         assertEquals(expected, new String(bytes, "UTF-8"));
    323     }
    324 
    325     public void testFastPathString_wellFormedUtf8Sequence() throws Exception {
    326         // U+0000 null
    327         assertFastPathUtf8DecodedEquals("\u0000", "00");
    328         // U+0031 ASCII char '1'
    329         assertFastPathUtf8DecodedEquals("1", "31");
    330         // U+007f
    331         assertFastPathUtf8DecodedEquals("\u007f", "7f");
    332         // 2-byte UTF-8 sequence
    333         assertFastPathUtf8DecodedEquals("\u0080", "c2 80");
    334         assertFastPathUtf8DecodedEquals("\u07ff", "df bf");
    335         // 3-byte UTF-8 sequence
    336         assertFastPathUtf8DecodedEquals("\u0800", "e0 a0 80");
    337         assertFastPathUtf8DecodedEquals("\ud7ff", "ed 9f bf"); // last code point before surrogate
    338         assertFastPathUtf8DecodedEquals("\ue000", "ee 80 80"); // first code point after surrogate
    339         assertFastPathUtf8DecodedEquals("\uffff", "ef bf bf");
    340         // U+10000 The minimum value of a Unicode supplementary code point
    341         assertEquals("\ud800\udc00", String.valueOf(Character.toChars(0x10000)));
    342         assertFastPathUtf8DecodedEquals("\ud800\udc00", "f0 90 80 80");
    343         // U+10ffff The maximum value of a Unicode code point
    344         assertEquals("\udbff\udfff", String.valueOf(Character.toChars(0x10ffff)));
    345         assertFastPathUtf8DecodedEquals("\udbff\udfff", "f4 8f bf bf");
    346 
    347         // Null in the middle
    348         assertFastPathUtf8DecodedEquals("1\u00002\u07ff", "31 00 32 df bf");
    349 
    350         assertFastPathUtf8DecodedEquals("\u0800\udbff\udfff\uffff1\u0080",
    351                 "e0 a0 80 f4 8f bf bf ef bf bf 31 c2 80");
    352 
    353         // Check UTF8 sequences of all code points is decoded correctly.
    354         // Validate the decoder using byte sequence generated by UTF-8 encoder.
    355         for (int codePoint = Character.MIN_CODE_POINT;
    356                 codePoint <= Character.MAX_CODE_POINT;
    357                 codePoint++) {
    358             if (codePoint < Character.MIN_SURROGATE || codePoint > Character.MAX_SURROGATE) {
    359                 String expected = UCharacter.toString(codePoint);
    360                 // Android platform default is always UTF-8.
    361                 byte[] utf8Bytes = expected.getBytes();
    362                 assertEquals(expected, new String(utf8Bytes));
    363             }
    364         }
    365     }
    366 
    367     public void testFastPathString_illFormedUtf8Sequence() throws Exception {
    368         // Overlong Sequence of ASCII char '1'
    369         assertFastPathUtf8DecodedEquals("\ufffd\ufffd", "c0 b1");
    370         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "e0 80 b1");
    371         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd", "f0 80 80 b1");
    372         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd", "f8 80 80 80 b1");
    373         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd", "fc 80 80 80 80 b1");
    374 
    375         // Overlong null \u0000
    376         // "c0 80" is a Modified UTF-8 sequence representing \u0000, but illegal in UTF-8.
    377         assertEquals("\u0000", decodeModifiedUTF8("c0 80"));
    378         assertFastPathUtf8DecodedEquals("\ufffd\ufffd", "c0 80");
    379 
    380         // Overlong BMP char U+0080. The correct UTF-8 encoded form of U+0080 is 2-byte "c2 80".
    381         // The overlong form can be obtained by filling 0x80 into 1110xxxx 10xxxxxx 10xxxxxx
    382         // == 1110000 10000010 10000000. (hex form e0 82 80)
    383         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "e0 82 80");
    384 
    385         // Overlong Supplementary Characters U+10000.
    386         // The correct UTF-8 encoded form of U+10000 is 4-byte "f0 90 80 80".
    387         // The overlong form can be obtained by filling 0x10000 into
    388         // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    389         // == 1110000 10000000 10010000 10000000 10000000. (hex form f8 80 90 80 80)
    390         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd", "f8 80 90 80 80");
    391 
    392         // Single surrogate in CESU-8 encoding
    393         // A CESU-8 sequence, but illegal in UTF-8.
    394         assertEquals("\ud800", decodeCESU8("ed a0 80"));
    395         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "ed a0 80");
    396 
    397         // Surrogate pair in CESU-8 encoding. The value is bytes U+10000
    398         // Assert the bytes are valid CESU-8 sequence before decoding using UTF-8
    399         String surrogatePair = decodeCESU8("ed a0 80 ed b0 80");
    400         assertEquals("\ud800\udc00", surrogatePair);
    401         assertEquals(0x10000, Character.codePointAt(surrogatePair.toCharArray(), 0));
    402         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd",
    403                 "ed a0 80 ed b0 80");
    404 
    405         // Illegal first-byte
    406         assertFastPathUtf8DecodedEquals("\ufffd", "c0");
    407         assertFastPathUtf8DecodedEquals("\ufffd", "80");
    408 
    409         // Maximal valid subpart. byte 0x31 should be decoded into ASCII char '1', not part of
    410         // ill-formed byte sequence
    411         assertFastPathUtf8DecodedEquals("\ufffd1", "c2 31");
    412         assertFastPathUtf8DecodedEquals("\ufffd1", "e1 31");
    413         assertFastPathUtf8DecodedEquals("\ufffd1", "e1 80 31");
    414         assertFastPathUtf8DecodedEquals("\ufffd1", "f1 31");
    415         assertFastPathUtf8DecodedEquals("\ufffd1", "f1 80 31");
    416         assertFastPathUtf8DecodedEquals("\ufffd1", "f1 80 80 31");;
    417 
    418         // Ill-formed sequence in the end of stream
    419         assertFastPathUtf8DecodedEquals("1\ufffd", "31 c2");
    420         assertFastPathUtf8DecodedEquals("1\ufffd", "31 e1");
    421         assertFastPathUtf8DecodedEquals("1\ufffd", "31 e1 80");
    422         assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1");
    423         assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1 80");
    424         assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1 80 80");
    425 
    426         // Test lower and upper bound of first trail byte when leading byte is e0/ed/f0/f4
    427         // Valid range of trail byte is A0..BF.
    428         assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 e0 9f");
    429         assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 e0 c0");
    430         // Valid range of trail byte is 80..9F.
    431         assertFastPathUtf8DecodedEquals("1\ufffd\u007f", "31 ed 7f");
    432         assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 ed a0");
    433         // Valid range of trail byte is 90..BF.
    434         assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f0 8f");
    435         assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f0 c0");
    436         // Valid range of trail byte is 80..8F.
    437         assertFastPathUtf8DecodedEquals("1\ufffd\u007f", "31 f4 7f");
    438         assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f4 90");
    439 
    440         // More ill-formed sequences
    441         assertFastPathUtf8DecodedEquals("\ufffd\ufffd1", "f1 80 80 e1 80 31");
    442         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd1", "f1 80 80 c0 b1 31");
    443         assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd1", "f1 80 80 ed a0 31");
    444         assertFastPathUtf8DecodedEquals("A\ufffd\ufffdA\ufffdA", "41 C0 AF 41 F4 80 80 41");
    445     }
    446 
    447     private void assertFastPathUtf8DecodedEquals(String expected, String hexString)
    448             throws Exception {
    449         String actual = new String(hexStringtoBytes(hexString));
    450         assertEquals("Fast-path UTF-8 decoder decodes sequence [" + hexString
    451                         + "] into unexpected String",
    452                 expected, actual);
    453         // Since we use ICU4C for CharsetDecoder,
    454         // check UTF-8 CharsetDecoder has the same result as the fast-path decoder
    455         CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
    456                 .onMalformedInput(CodingErrorAction.REPLACE);
    457         assertEquals("Fast-path UTF-8 decoder and UTF-8 CharsetDecoder has a different conversion"
    458                         + " result for sequence [" + hexString + "]",
    459                 decoder.decode(ByteBuffer.wrap(hexStringtoBytes(hexString))).toString(), actual);
    460     }
    461 
    462     private static String decodeCESU8(String hexString) throws IOException {
    463         CharsetDecoder cesu8Decoder = Charset.forName("CESU-8").newDecoder();
    464         return cesu8Decoder.decode(ByteBuffer.wrap(hexStringtoBytes(hexString))).toString();
    465     }
    466 
    467     private static String decodeModifiedUTF8(String hexString) throws IOException {
    468         byte[] bytes = hexStringtoBytes(hexString);
    469         // DataInputStream stores length as 2-byte short. Check the length before decoding
    470         if (bytes.length > 0xffff) {
    471             throw new IllegalArgumentException("Modified UTF-8 bytes are too long.");
    472         }
    473         byte[] buf = new byte[bytes.length + 2];
    474         buf[0] = (byte)(bytes.length >>> 8);
    475         buf[1] = (byte) bytes.length;
    476         System.arraycopy(bytes, 0, buf, 2, bytes.length);
    477         DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf));
    478         return dis.readUTF();
    479     }
    480 
    481     private static byte[] hexStringtoBytes(String input) {
    482         String[] parts = input.split(" ");
    483         byte[] bytes = new byte[parts.length];
    484         for (int i = 0; i < parts.length; i++) {
    485             int val = Integer.parseInt(parts[i], 16);
    486             if (val < 0 || val > 255) {
    487                 throw new IllegalArgumentException();
    488             }
    489             bytes[i] = (byte) (0xff & val);
    490         }
    491         return bytes;
    492     }
    493 
    494     // https://code.google.com/p/android/issues/detail?id=55129
    495     public void test_55129() throws Exception {
    496         assertEquals("-h-e-l-l-o- -w-o-r-l-d-", "hello world".replace("", "-"));
    497         assertEquals("-w-o-r-l-d-", "hello world".substring(6).replace("", "-"));
    498         assertEquals("-*-w-*-o-*-r-*-l-*-d-*-", "hello world".substring(6).replace("", "-*-"));
    499 
    500         // Replace on an empty string with an empty target should insert the pattern
    501         // precisely once.
    502         assertEquals("", "".replace("", ""));
    503         assertEquals("food", "".replace("", "food"));
    504     }
    505 
    506     public void test_replace() {
    507         // Replace on an empty string is a no-op.
    508         assertEquals("", "".replace("foo", "bar"));
    509         // Replace on a string which doesn't contain the target sequence is a no-op.
    510         assertEquals("baz", "baz".replace("foo", "bar"));
    511         // Test that we iterate forward on the string.
    512         assertEquals("mmmba", "bababa".replace("baba", "mmm"));
    513         // Test replacements at the end of the string.
    514         assertEquals("foodie", "foolish".replace("lish", "die"));
    515         // Test a string that has multiple replacements.
    516         assertEquals("hahahaha", "kkkk".replace("k", "ha"));
    517     }
    518 
    519     public void test_String_getBytes() throws Exception {
    520         // http://b/11571917
    521         assertEquals("[-126, -96]", Arrays.toString("".getBytes("Shift_JIS")));
    522         assertEquals("[-126, -87]", Arrays.toString("".getBytes("Shift_JIS")));
    523         assertEquals("[-105, 67]", Arrays.toString("".getBytes("Shift_JIS")));
    524         assertEquals("[36]", Arrays.toString("$".getBytes("Shift_JIS")));
    525         assertEquals("[-29, -127, -117]", Arrays.toString("".getBytes("UTF-8")));
    526 
    527         // http://b/11639117
    528         assertEquals("[-79, -72, -70, -48]", Arrays.toString("".getBytes("EUC-KR")));
    529 
    530 
    531         // https://code.google.com/p/android/issues/detail?id=63188
    532         assertEquals("[-77, -10, -64, -76, -63, -53]", Arrays.toString("".getBytes("gbk")));
    533         assertEquals("[-77, -10, -64, -76]", Arrays.toString("".getBytes("gbk")));
    534         assertEquals("[-77, -10]", Arrays.toString("".getBytes("gbk")));
    535     }
    536 
    537     public void test_compareTo() throws Exception {
    538         // For strings where a character differs, the result is
    539         // the difference between the characters.
    540         assertEquals(-1, "a".compareTo("b"));
    541         assertEquals(-2, "a".compareTo("c"));
    542         assertEquals(1, "b".compareTo("a"));
    543         assertEquals(2, "c".compareTo("a"));
    544 
    545         // For strings where the characters match up to the length of the shorter,
    546         // the result is the difference between the strings' lengths.
    547         assertEquals(0, "a".compareTo("a"));
    548         assertEquals(-1, "a".compareTo("aa"));
    549         assertEquals(-1, "a".compareTo("az"));
    550         assertEquals(-2, "a".compareTo("aaa"));
    551         assertEquals(-2, "a".compareTo("azz"));
    552         assertEquals(-3, "a".compareTo("aaaa"));
    553         assertEquals(-3, "a".compareTo("azzz"));
    554         assertEquals(0, "a".compareTo("a"));
    555         assertEquals(1, "aa".compareTo("a"));
    556         assertEquals(1, "az".compareTo("a"));
    557         assertEquals(2, "aaa".compareTo("a"));
    558         assertEquals(2, "azz".compareTo("a"));
    559         assertEquals(3, "aaaa".compareTo("a"));
    560         assertEquals(3, "azzz".compareTo("a"));
    561     }
    562 
    563     public void test_compareToIgnoreCase() throws Exception {
    564         // For strings where a character differs, the result is
    565         // the difference between the characters.
    566         assertEquals(-1, "a".compareToIgnoreCase("b"));
    567         assertEquals(-1, "a".compareToIgnoreCase("B"));
    568         assertEquals(-2, "a".compareToIgnoreCase("c"));
    569         assertEquals(-2, "a".compareToIgnoreCase("C"));
    570         assertEquals(1, "b".compareToIgnoreCase("a"));
    571         assertEquals(1, "B".compareToIgnoreCase("a"));
    572         assertEquals(2, "c".compareToIgnoreCase("a"));
    573         assertEquals(2, "C".compareToIgnoreCase("a"));
    574 
    575         // For strings where the characters match up to the length of the shorter,
    576         // the result is the difference between the strings' lengths.
    577         assertEquals(0, "a".compareToIgnoreCase("a"));
    578         assertEquals(0, "a".compareToIgnoreCase("A"));
    579         assertEquals(0, "A".compareToIgnoreCase("a"));
    580         assertEquals(0, "A".compareToIgnoreCase("A"));
    581         assertEquals(-1, "a".compareToIgnoreCase("aa"));
    582         assertEquals(-1, "a".compareToIgnoreCase("aA"));
    583         assertEquals(-1, "a".compareToIgnoreCase("Aa"));
    584         assertEquals(-1, "a".compareToIgnoreCase("az"));
    585         assertEquals(-1, "a".compareToIgnoreCase("aZ"));
    586         assertEquals(-2, "a".compareToIgnoreCase("aaa"));
    587         assertEquals(-2, "a".compareToIgnoreCase("AAA"));
    588         assertEquals(-2, "a".compareToIgnoreCase("azz"));
    589         assertEquals(-2, "a".compareToIgnoreCase("AZZ"));
    590         assertEquals(-3, "a".compareToIgnoreCase("aaaa"));
    591         assertEquals(-3, "a".compareToIgnoreCase("AAAA"));
    592         assertEquals(-3, "a".compareToIgnoreCase("azzz"));
    593         assertEquals(-3, "a".compareToIgnoreCase("AZZZ"));
    594         assertEquals(1, "aa".compareToIgnoreCase("a"));
    595         assertEquals(1, "aA".compareToIgnoreCase("a"));
    596         assertEquals(1, "Aa".compareToIgnoreCase("a"));
    597         assertEquals(1, "az".compareToIgnoreCase("a"));
    598         assertEquals(2, "aaa".compareToIgnoreCase("a"));
    599         assertEquals(2, "azz".compareToIgnoreCase("a"));
    600         assertEquals(3, "aaaa".compareToIgnoreCase("a"));
    601         assertEquals(3, "azzz".compareToIgnoreCase("a"));
    602     }
    603 
    604     // http://b/25943996
    605     public void testSplit_trailingSeparators() {
    606         String[] splits = "test\0message\0\0\0\0\0\0".split("\0", -1);
    607         assertEquals("test", splits[0]);
    608         assertEquals("message", splits[1]);
    609         assertEquals("", splits[2]);
    610         assertEquals("", splits[3]);
    611         assertEquals("", splits[4]);
    612         assertEquals("", splits[5]);
    613         assertEquals("", splits[6]);
    614         assertEquals("", splits[7]);
    615     }
    616 
    617     // http://b/63745717
    618     // A buffer overflow bug was found in ICU4C. A native crash occurs only when ASAN is enabled.
    619     public void testSplit_lookBehind() {
    620         String string = "a";
    621         String[] words = string.split("(?<!(^|[A-Z]))(?=[A-Z])|(?<!^)(?=[A-Z][a-z])| |_|-");
    622         assertEquals(1, words.length);
    623         assertEquals(string, words[0]);
    624     }
    625 
    626     // http://b/26126818
    627     public void testCodePointCount() {
    628         String hello = "Hello, fools";
    629 
    630         assertEquals(5, hello.codePointCount(0, 5));
    631         assertEquals(7, hello.codePointCount(5, 12));
    632         assertEquals(2, hello.codePointCount(10, 12));
    633     }
    634 
    635     // http://b/26444984
    636     public void testGetCharsOverflow() {
    637         int srcBegin = Integer.MAX_VALUE; //2147483647
    638         int srcEnd = srcBegin + 10;  //-2147483639
    639         try {
    640             // The output array size must be larger than |srcEnd - srcBegin|.
    641             "yes".getChars(srcBegin, srcEnd, new char[256], 0);
    642             fail();
    643         } catch (StringIndexOutOfBoundsException expected) {
    644         }
    645     }
    646 
    647     // http://b/28998511
    648     public void testGetCharsBoundsChecks() {
    649         // This is the explicit case from the bug: dstBegin == srcEnd - srcBegin
    650         assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[0], -4);
    651 
    652         // Some valid cases.
    653         char[] dst = new char[1];
    654         "abcd".getChars(0, 1, dst, 0);
    655         assertEquals('a', dst[0]);
    656         "abcd".getChars(3, 4, dst, 0);
    657         assertEquals('d', dst[0]);
    658         dst = new char[4];
    659         "abcd".getChars(0, 4, dst, 0);
    660         assertTrue(Arrays.equals("abcd".toCharArray(), dst));
    661 
    662         // Zero length src.
    663         "abcd".getChars(0, 0, new char[0], 0);  // dstBegin == 0 is ok if copying zero chars
    664         "abcd".getChars(0, 0, new char[1], 1);  // dstBegin == 1 is ok if copying zero chars
    665         "".getChars(0, 0, new char[0], 0);
    666         "abcd".getChars(1, 1, new char[1], 0);
    667         "abcd".getChars(1, 1, new char[1], 1);
    668 
    669         // Valid src args, invalid dst args.
    670         assertGetCharsThrowsAIOOBException("abcd", 3, 4, new char[1], 1); // Out of range dstBegin
    671         assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[3], 0); // Small dst
    672         assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], -1); // Negative dstBegin
    673 
    674         // dstBegin + (srcEnd - srcBegin) -> integer overflow OR dstBegin >= dst.length
    675         assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], Integer.MAX_VALUE - 1);
    676 
    677         // Invalid src args, valid dst args.
    678         assertGetCharsThrowsSIOOBException("abcd", 2, 1, new char[4], 0); // srcBegin > srcEnd
    679         assertGetCharsThrowsSIOOBException("abcd", -1, 3, new char[4], 0); // Negative srcBegin
    680         assertGetCharsThrowsSIOOBException("abcd", 0, 5, new char[4], 0); // Out of range srcEnd
    681         assertGetCharsThrowsSIOOBException("abcd", 0, -1, new char[4], 0); // Negative srcEnd
    682 
    683         // Valid src args, invalid dst args.
    684         assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], 1); // Bad dstBegin
    685 
    686         // Zero length src copy, invalid dst args.
    687         assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[4], -1); // Negative dstBegin
    688         assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[0], 1); // Out of range dstBegin
    689         assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[1], 2);  // Out of range dstBegin
    690         assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[4], 5); // Out of range dstBegin
    691     }
    692 
    693     private static void assertGetCharsThrowsAIOOBException(String s, int srcBegin, int srcEnd,
    694             char[] dst, int dstBegin) {
    695         try {
    696             s.getChars(srcBegin, srcEnd, dst, dstBegin);
    697             fail();
    698         } catch (ArrayIndexOutOfBoundsException expected) {
    699         }
    700     }
    701 
    702     private static void assertGetCharsThrowsSIOOBException(String s, int srcBegin, int srcEnd,
    703             char[] dst, int dstBegin) {
    704         try {
    705             s.getChars(srcBegin, srcEnd, dst, dstBegin);
    706             fail();
    707         } catch (StringIndexOutOfBoundsException expected) {
    708         }
    709     }
    710 
    711     public void testChars() {
    712         String s = "Hello\n\tworld";
    713         int[] expected = new int[s.length()];
    714         for (int i = 0; i < s.length(); ++i) {
    715             expected[i] = (int) s.charAt(i);
    716         }
    717         assertTrue(Arrays.equals(expected, s.chars().toArray()));
    718 
    719         // Surrogate code point
    720         char high = '\uD83D', low = '\uDE02';
    721         String surrogateCP = new String(new char[]{high, low, low});
    722         assertTrue(Arrays.equals(new int[]{high, low, low}, surrogateCP.chars().toArray()));
    723     }
    724 
    725     public void testCodePoints() {
    726         String s = "Hello\n\tworld";
    727         int[] expected = new int[s.length()];
    728         for (int i = 0; i < s.length(); ++i) {
    729             expected[i] = (int) s.charAt(i);
    730         }
    731         assertTrue(Arrays.equals(expected, s.codePoints().toArray()));
    732 
    733         // Surrogate code point
    734         char high = '\uD83D', low = '\uDE02';
    735         String surrogateCP = new String(new char[]{high, low, low, '0'});
    736         assertEquals(Character.toCodePoint(high, low), surrogateCP.codePoints().toArray()[0]);
    737         assertEquals((int) low, surrogateCP.codePoints().toArray()[1]); // Unmatched surrogate.
    738         assertEquals((int) '0', surrogateCP.codePoints().toArray()[2]);
    739     }
    740 
    741     public void testJoin_CharSequenceArray() {
    742         assertEquals("", String.join("-"));
    743         assertEquals("", String.join("-", ""));
    744         assertEquals("foo", String.join("-", "foo"));
    745         assertEquals("foo---bar---boo", String.join("---", "foo", "bar", "boo"));
    746         assertEquals("foobarboo", String.join("", "foo", "bar", "boo"));
    747         assertEquals("null-null", String.join("-", null, null));
    748         assertEquals("\\_()_/", String.join("()", "\\_", "_/"));
    749     }
    750 
    751     public void testJoin_CharSequenceArray_NPE() {
    752         try {
    753             String.join(null, "foo", "bar");
    754             fail();
    755         } catch (NullPointerException expected) {}
    756     }
    757 
    758     public void testJoin_Iterable() {
    759         ArrayList<String> iterable = new ArrayList<>();
    760         assertEquals("", String.join("-", iterable));
    761 
    762         iterable.add("foo");
    763         assertEquals("foo", String.join("-", iterable));
    764 
    765         iterable.add("bar");
    766         assertEquals("foo...bar", String.join("...", iterable));
    767 
    768         iterable.add("foo");
    769         assertEquals("foo-bar-foo", String.join("-", iterable));
    770         assertEquals("foobarfoo", String.join("", iterable));
    771     }
    772 
    773     public void testJoin_Iterable_NPE() {
    774         try {
    775             String.join(null, new ArrayList<String>());
    776             fail();
    777         } catch (NullPointerException expected) {}
    778 
    779         try {
    780             String.join("-", (Iterable<String>)null);
    781             fail();
    782         } catch (NullPointerException expected) {}
    783     }
    784 }
    785