1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.java.lang; 18 19 import android.icu.lang.UCharacter; 20 21 import java.io.ByteArrayInputStream; 22 import java.io.DataInputStream; 23 import java.io.IOException; 24 import java.nio.ByteBuffer; 25 import java.nio.CharBuffer; 26 import java.nio.ReadOnlyBufferException; 27 import java.nio.charset.Charset; 28 import java.nio.charset.CharsetDecoder; 29 import java.nio.charset.CharsetEncoder; 30 import java.nio.charset.CoderResult; 31 import java.nio.charset.CodingErrorAction; 32 import java.nio.charset.ModifiedUtf8; 33 import java.nio.charset.StandardCharsets; 34 import java.util.Arrays; 35 import java.util.ArrayList; 36 import java.util.Locale; 37 38 import junit.framework.TestCase; 39 40 public class StringTest extends TestCase { 41 public void testIsEmpty() { 42 assertTrue("".isEmpty()); 43 assertFalse("x".isEmpty()); 44 } 45 46 // The evil decoder keeps hold of the CharBuffer it wrote to. 47 private static final class EvilCharsetDecoder extends CharsetDecoder { 48 private static char[] chars; 49 public EvilCharsetDecoder(Charset cs) { 50 super(cs, 1.0f, 1.0f); 51 } 52 protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { 53 chars = out.array(); 54 int inLength = in.remaining(); 55 for (int i = 0; i < inLength; ++i) { 56 in.put((byte) 'X'); 57 out.put('Y'); 58 } 59 return CoderResult.UNDERFLOW; 60 } 61 public static void corrupt() { 62 for (int i = 0; i < chars.length; ++i) { 63 chars[i] = '$'; 64 } 65 } 66 } 67 68 // The evil encoder tries to write to the CharBuffer it was given to 69 // read from. 70 private static final class EvilCharsetEncoder extends CharsetEncoder { 71 public EvilCharsetEncoder(Charset cs) { 72 super(cs, 1.0f, 1.0f); 73 } 74 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 75 int inLength = in.remaining(); 76 for (int i = 0; i < inLength; ++i) { 77 in.put('x'); 78 out.put((byte) 'y'); 79 } 80 return CoderResult.UNDERFLOW; 81 } 82 } 83 84 private static final Charset EVIL_CHARSET = new Charset("evil", null) { 85 public boolean contains(Charset charset) { return false; } 86 public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); } 87 public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); } 88 }; 89 90 public void testGetBytes_MaliciousCharset() { 91 try { 92 String s = "hi"; 93 // Check that our encoder can't write to the input CharBuffer 94 // it was given. 95 s.getBytes(EVIL_CHARSET); 96 fail(); // We shouldn't have got here! 97 } catch (ReadOnlyBufferException expected) { 98 // We caught you trying to be naughty! 99 } 100 } 101 102 public void testString_BII() throws Exception { 103 byte[] bytes = "xa\u0666bx".getBytes("UTF-8"); 104 assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2)); 105 } 106 107 public void testString_BIIString() throws Exception { 108 byte[] bytes = "xa\u0666bx".getBytes("UTF-8"); 109 assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, "UTF-8")); 110 } 111 112 public void testString_BIICharset() throws Exception { 113 byte[] bytes = "xa\u0666bx".getBytes("UTF-8"); 114 assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, Charset.forName("UTF-8"))); 115 } 116 117 public void testString_BCharset() throws Exception { 118 byte[] bytes = "a\u0666b".getBytes("UTF-8"); 119 assertEquals("a\u0666b", new String(bytes, Charset.forName("UTF-8"))); 120 } 121 122 public void testStringFromCharset_MaliciousCharset() { 123 Charset cs = EVIL_CHARSET; 124 byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'}; 125 final String result = new String(bytes, cs); 126 assertEquals("YY", result); // (Our decoder always outputs 'Y's.) 127 // Check that even if the decoder messes with the output CharBuffer 128 // after we've created a string from it, it doesn't affect the string. 129 EvilCharsetDecoder.corrupt(); 130 assertEquals("YY", result); 131 } 132 133 public void test_getBytes_bad() throws Exception { 134 // Check that we use '?' as the replacement byte for invalid characters. 135 assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII"))); 136 assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII")))); 137 } 138 139 public void test_getBytes_UTF_8() { 140 // We have a fast path implementation of String.getBytes for UTF-8. 141 Charset cs = Charset.forName("UTF-8"); 142 143 // Test the empty string. 144 assertEquals("[]", Arrays.toString("".getBytes(cs))); 145 146 // Test one-byte characters. 147 assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs))); 148 assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs))); 149 assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs))); 150 151 // Test two-byte characters. 152 assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs))); 153 assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs))); 154 assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs))); 155 assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs))); 156 157 // Test three-byte characters. 158 assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs))); 159 assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs))); 160 assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs))); 161 assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs))); 162 163 // Test supplementary characters. 164 // Minimum supplementary character: U+10000 165 assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs))); 166 // Random supplementary character: U+10381 Ugaritic letter beta 167 assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs))); 168 // Maximum supplementary character: U+10FFFF 169 assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs))); 170 // A high surrogate at end of string is an error replaced with '?'. 171 assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs))); 172 // A high surrogate not followed by a low surrogate is an error replaced with '?'. 173 assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs))); 174 } 175 176 public void test_new_String_bad() throws Exception { 177 // Check that we use U+FFFD as the replacement string for invalid bytes. 178 assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII")); 179 assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII"))); 180 } 181 182 /** 183 184 * Test that strings interned manually and then later loaded as literals 185 * maintain reference equality. http://b/3098960 186 */ 187 public void testInternBeforeLiteralIsLoaded() throws Exception{ 188 String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern(); 189 String literal = (String) Class.forName("libcore.java.lang.StringTest$HasLiteral") 190 .getDeclaredField("literal").get(null); 191 assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal)); 192 assertSame(programmatic, literal); 193 } 194 195 static class HasLiteral { 196 static String literal = "[5058, 9962, 1563, 5744]"; 197 } 198 199 private static final String COMBINING_DOT_ABOVE = "\u0307"; 200 private static final String LATIN_CAPITAL_I = "I"; 201 private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130"; 202 private static final String LATIN_SMALL_I = "i"; 203 private static final String LATIN_SMALL_DOTLESS_I = "\u0131"; 204 205 private static final String[] LATIN_I_VARIANTS = { 206 LATIN_SMALL_I, 207 LATIN_SMALL_DOTLESS_I, 208 LATIN_CAPITAL_I, 209 LATIN_CAPITAL_I_WITH_DOT_ABOVE, 210 }; 211 212 public void testCaseMapping_tr_TR() { 213 Locale tr_TR = new Locale("tr", "TR"); 214 assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(tr_TR)); 215 assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(tr_TR)); 216 assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(tr_TR)); 217 218 assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(tr_TR)); 219 assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(tr_TR)); 220 assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_SMALL_I.toUpperCase(tr_TR)); 221 222 assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(tr_TR)); 223 assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(tr_TR)); 224 } 225 226 public void testCaseMapping_en_US() { 227 Locale en_US = new Locale("en", "US"); 228 assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(en_US)); 229 assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(en_US)); 230 assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(en_US)); 231 232 assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(en_US)); 233 assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(en_US)); 234 assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(en_US)); 235 236 assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(en_US)); 237 // http://b/3325799: the RI fails this because it's using an obsolete version of the Unicode rules. 238 // Android correctly preserves canonical equivalence. (See the separate test for tr_TR.) 239 assertEquals(LATIN_SMALL_I + COMBINING_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(en_US)); 240 } 241 242 public void testCaseMapping_el() { 243 Locale el_GR = new Locale("el", "GR"); 244 assertEquals(" O ", " o ".toUpperCase(el_GR)); 245 assertEquals(" O ", " o ".toUpperCase(el_GR)); 246 assertEquals(" O ", " o ".toUpperCase(el_GR)); 247 248 Locale en_US = new Locale("en", "US"); 249 assertEquals(" O ", " o ".toUpperCase(en_US)); 250 } 251 252 public void testEqualsIgnoreCase_tr_TR() { 253 testEqualsIgnoreCase(new Locale("tr", "TR")); 254 } 255 256 public void testEqualsIgnoreCase_en_US() { 257 testEqualsIgnoreCase(new Locale("en", "US")); 258 } 259 260 /** 261 * String.equalsIgnoreCase should not depend on the locale. 262 */ 263 private void testEqualsIgnoreCase(Locale locale) { 264 Locale defaultLocale = Locale.getDefault(); 265 Locale.setDefault(locale); 266 try { 267 for (String a : LATIN_I_VARIANTS) { 268 for (String b : LATIN_I_VARIANTS) { 269 if (!a.equalsIgnoreCase(b)) { 270 fail("Expected " + a + " to equal " + b + " in " + locale); 271 } 272 } 273 } 274 } finally { 275 Locale.setDefault(defaultLocale); 276 } 277 } 278 279 public void testRegionMatches_ignoreCase_en_US() { 280 testRegionMatches_ignoreCase(new Locale("en", "US")); 281 } 282 283 public void testRegionMatches_ignoreCase_tr_TR() { 284 testRegionMatches_ignoreCase(new Locale("tr", "TR")); 285 } 286 287 private void testRegionMatches_ignoreCase(Locale locale) { 288 Locale defaultLocale = Locale.getDefault(); 289 Locale.setDefault(locale); 290 try { 291 for (String a : LATIN_I_VARIANTS) { 292 for (String b : LATIN_I_VARIANTS) { 293 if (!a.regionMatches(true, 0, b, 0, b.length())) { 294 fail("Expected " + a + " to equal " + b + " in " + locale); 295 } 296 } 297 } 298 } finally { 299 Locale.setDefault(defaultLocale); 300 } 301 } 302 303 // http://code.google.com/p/android/issues/detail?id=15266 304 public void test_replaceAll() throws Exception { 305 assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1")); 306 } 307 308 // Test that CharsetDecoder and fast-path decoder are consistent when handling ill-formed 309 // sequence. http://b/69599767 310 // This test was originally created for the bug 311 // https://code.google.com/p/android/issues/detail?id=23831 312 public void test_69599767() throws Exception { 313 byte[] bytes = { (byte) 0xf5, (byte) 0xa9, (byte) 0xea, (byte) 0x21 }; 314 String expected = "\ufffd\ufffd\ufffd\u0021"; 315 316 // Since we use ICU4C for CharsetDecoder... 317 CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); 318 decoder.onMalformedInput(CodingErrorAction.REPLACE); 319 assertEquals(expected, decoder.decode(ByteBuffer.wrap(bytes)).toString()); 320 321 // Our fast-path code in String should behave the same... 322 assertEquals(expected, new String(bytes, "UTF-8")); 323 } 324 325 public void testFastPathString_wellFormedUtf8Sequence() throws Exception { 326 // U+0000 null 327 assertFastPathUtf8DecodedEquals("\u0000", "00"); 328 // U+0031 ASCII char '1' 329 assertFastPathUtf8DecodedEquals("1", "31"); 330 // U+007f 331 assertFastPathUtf8DecodedEquals("\u007f", "7f"); 332 // 2-byte UTF-8 sequence 333 assertFastPathUtf8DecodedEquals("\u0080", "c2 80"); 334 assertFastPathUtf8DecodedEquals("\u07ff", "df bf"); 335 // 3-byte UTF-8 sequence 336 assertFastPathUtf8DecodedEquals("\u0800", "e0 a0 80"); 337 assertFastPathUtf8DecodedEquals("\ud7ff", "ed 9f bf"); // last code point before surrogate 338 assertFastPathUtf8DecodedEquals("\ue000", "ee 80 80"); // first code point after surrogate 339 assertFastPathUtf8DecodedEquals("\uffff", "ef bf bf"); 340 // U+10000 The minimum value of a Unicode supplementary code point 341 assertEquals("\ud800\udc00", String.valueOf(Character.toChars(0x10000))); 342 assertFastPathUtf8DecodedEquals("\ud800\udc00", "f0 90 80 80"); 343 // U+10ffff The maximum value of a Unicode code point 344 assertEquals("\udbff\udfff", String.valueOf(Character.toChars(0x10ffff))); 345 assertFastPathUtf8DecodedEquals("\udbff\udfff", "f4 8f bf bf"); 346 347 // Null in the middle 348 assertFastPathUtf8DecodedEquals("1\u00002\u07ff", "31 00 32 df bf"); 349 350 assertFastPathUtf8DecodedEquals("\u0800\udbff\udfff\uffff1\u0080", 351 "e0 a0 80 f4 8f bf bf ef bf bf 31 c2 80"); 352 353 // Check UTF8 sequences of all code points is decoded correctly. 354 // Validate the decoder using byte sequence generated by UTF-8 encoder. 355 for (int codePoint = Character.MIN_CODE_POINT; 356 codePoint <= Character.MAX_CODE_POINT; 357 codePoint++) { 358 if (codePoint < Character.MIN_SURROGATE || codePoint > Character.MAX_SURROGATE) { 359 String expected = UCharacter.toString(codePoint); 360 // Android platform default is always UTF-8. 361 byte[] utf8Bytes = expected.getBytes(); 362 assertEquals(expected, new String(utf8Bytes)); 363 } 364 } 365 } 366 367 public void testFastPathString_illFormedUtf8Sequence() throws Exception { 368 // Overlong Sequence of ASCII char '1' 369 assertFastPathUtf8DecodedEquals("\ufffd\ufffd", "c0 b1"); 370 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "e0 80 b1"); 371 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd", "f0 80 80 b1"); 372 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd", "f8 80 80 80 b1"); 373 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd", "fc 80 80 80 80 b1"); 374 375 // Overlong null \u0000 376 // "c0 80" is a Modified UTF-8 sequence representing \u0000, but illegal in UTF-8. 377 assertEquals("\u0000", decodeModifiedUTF8("c0 80")); 378 assertFastPathUtf8DecodedEquals("\ufffd\ufffd", "c0 80"); 379 380 // Overlong BMP char U+0080. The correct UTF-8 encoded form of U+0080 is 2-byte "c2 80". 381 // The overlong form can be obtained by filling 0x80 into 1110xxxx 10xxxxxx 10xxxxxx 382 // == 1110000 10000010 10000000. (hex form e0 82 80) 383 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "e0 82 80"); 384 385 // Overlong Supplementary Characters U+10000. 386 // The correct UTF-8 encoded form of U+10000 is 4-byte "f0 90 80 80". 387 // The overlong form can be obtained by filling 0x10000 into 388 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 389 // == 1110000 10000000 10010000 10000000 10000000. (hex form f8 80 90 80 80) 390 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd", "f8 80 90 80 80"); 391 392 // Single surrogate in CESU-8 encoding 393 // A CESU-8 sequence, but illegal in UTF-8. 394 assertEquals("\ud800", decodeCESU8("ed a0 80")); 395 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "ed a0 80"); 396 397 // Surrogate pair in CESU-8 encoding. The value is bytes U+10000 398 // Assert the bytes are valid CESU-8 sequence before decoding using UTF-8 399 String surrogatePair = decodeCESU8("ed a0 80 ed b0 80"); 400 assertEquals("\ud800\udc00", surrogatePair); 401 assertEquals(0x10000, Character.codePointAt(surrogatePair.toCharArray(), 0)); 402 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd", 403 "ed a0 80 ed b0 80"); 404 405 // Illegal first-byte 406 assertFastPathUtf8DecodedEquals("\ufffd", "c0"); 407 assertFastPathUtf8DecodedEquals("\ufffd", "80"); 408 409 // Maximal valid subpart. byte 0x31 should be decoded into ASCII char '1', not part of 410 // ill-formed byte sequence 411 assertFastPathUtf8DecodedEquals("\ufffd1", "c2 31"); 412 assertFastPathUtf8DecodedEquals("\ufffd1", "e1 31"); 413 assertFastPathUtf8DecodedEquals("\ufffd1", "e1 80 31"); 414 assertFastPathUtf8DecodedEquals("\ufffd1", "f1 31"); 415 assertFastPathUtf8DecodedEquals("\ufffd1", "f1 80 31"); 416 assertFastPathUtf8DecodedEquals("\ufffd1", "f1 80 80 31");; 417 418 // Ill-formed sequence in the end of stream 419 assertFastPathUtf8DecodedEquals("1\ufffd", "31 c2"); 420 assertFastPathUtf8DecodedEquals("1\ufffd", "31 e1"); 421 assertFastPathUtf8DecodedEquals("1\ufffd", "31 e1 80"); 422 assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1"); 423 assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1 80"); 424 assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1 80 80"); 425 426 // Test lower and upper bound of first trail byte when leading byte is e0/ed/f0/f4 427 // Valid range of trail byte is A0..BF. 428 assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 e0 9f"); 429 assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 e0 c0"); 430 // Valid range of trail byte is 80..9F. 431 assertFastPathUtf8DecodedEquals("1\ufffd\u007f", "31 ed 7f"); 432 assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 ed a0"); 433 // Valid range of trail byte is 90..BF. 434 assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f0 8f"); 435 assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f0 c0"); 436 // Valid range of trail byte is 80..8F. 437 assertFastPathUtf8DecodedEquals("1\ufffd\u007f", "31 f4 7f"); 438 assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f4 90"); 439 440 // More ill-formed sequences 441 assertFastPathUtf8DecodedEquals("\ufffd\ufffd1", "f1 80 80 e1 80 31"); 442 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd1", "f1 80 80 c0 b1 31"); 443 assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd1", "f1 80 80 ed a0 31"); 444 assertFastPathUtf8DecodedEquals("A\ufffd\ufffdA\ufffdA", "41 C0 AF 41 F4 80 80 41"); 445 } 446 447 private void assertFastPathUtf8DecodedEquals(String expected, String hexString) 448 throws Exception { 449 String actual = new String(hexStringtoBytes(hexString)); 450 assertEquals("Fast-path UTF-8 decoder decodes sequence [" + hexString 451 + "] into unexpected String", 452 expected, actual); 453 // Since we use ICU4C for CharsetDecoder, 454 // check UTF-8 CharsetDecoder has the same result as the fast-path decoder 455 CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() 456 .onMalformedInput(CodingErrorAction.REPLACE); 457 assertEquals("Fast-path UTF-8 decoder and UTF-8 CharsetDecoder has a different conversion" 458 + " result for sequence [" + hexString + "]", 459 decoder.decode(ByteBuffer.wrap(hexStringtoBytes(hexString))).toString(), actual); 460 } 461 462 private static String decodeCESU8(String hexString) throws IOException { 463 CharsetDecoder cesu8Decoder = Charset.forName("CESU-8").newDecoder(); 464 return cesu8Decoder.decode(ByteBuffer.wrap(hexStringtoBytes(hexString))).toString(); 465 } 466 467 private static String decodeModifiedUTF8(String hexString) throws IOException { 468 byte[] bytes = hexStringtoBytes(hexString); 469 // DataInputStream stores length as 2-byte short. Check the length before decoding 470 if (bytes.length > 0xffff) { 471 throw new IllegalArgumentException("Modified UTF-8 bytes are too long."); 472 } 473 byte[] buf = new byte[bytes.length + 2]; 474 buf[0] = (byte)(bytes.length >>> 8); 475 buf[1] = (byte) bytes.length; 476 System.arraycopy(bytes, 0, buf, 2, bytes.length); 477 DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf)); 478 return dis.readUTF(); 479 } 480 481 private static byte[] hexStringtoBytes(String input) { 482 String[] parts = input.split(" "); 483 byte[] bytes = new byte[parts.length]; 484 for (int i = 0; i < parts.length; i++) { 485 int val = Integer.parseInt(parts[i], 16); 486 if (val < 0 || val > 255) { 487 throw new IllegalArgumentException(); 488 } 489 bytes[i] = (byte) (0xff & val); 490 } 491 return bytes; 492 } 493 494 // https://code.google.com/p/android/issues/detail?id=55129 495 public void test_55129() throws Exception { 496 assertEquals("-h-e-l-l-o- -w-o-r-l-d-", "hello world".replace("", "-")); 497 assertEquals("-w-o-r-l-d-", "hello world".substring(6).replace("", "-")); 498 assertEquals("-*-w-*-o-*-r-*-l-*-d-*-", "hello world".substring(6).replace("", "-*-")); 499 500 // Replace on an empty string with an empty target should insert the pattern 501 // precisely once. 502 assertEquals("", "".replace("", "")); 503 assertEquals("food", "".replace("", "food")); 504 } 505 506 public void test_replace() { 507 // Replace on an empty string is a no-op. 508 assertEquals("", "".replace("foo", "bar")); 509 // Replace on a string which doesn't contain the target sequence is a no-op. 510 assertEquals("baz", "baz".replace("foo", "bar")); 511 // Test that we iterate forward on the string. 512 assertEquals("mmmba", "bababa".replace("baba", "mmm")); 513 // Test replacements at the end of the string. 514 assertEquals("foodie", "foolish".replace("lish", "die")); 515 // Test a string that has multiple replacements. 516 assertEquals("hahahaha", "kkkk".replace("k", "ha")); 517 } 518 519 public void test_String_getBytes() throws Exception { 520 // http://b/11571917 521 assertEquals("[-126, -96]", Arrays.toString("".getBytes("Shift_JIS"))); 522 assertEquals("[-126, -87]", Arrays.toString("".getBytes("Shift_JIS"))); 523 assertEquals("[-105, 67]", Arrays.toString("".getBytes("Shift_JIS"))); 524 assertEquals("[36]", Arrays.toString("$".getBytes("Shift_JIS"))); 525 assertEquals("[-29, -127, -117]", Arrays.toString("".getBytes("UTF-8"))); 526 527 // http://b/11639117 528 assertEquals("[-79, -72, -70, -48]", Arrays.toString("".getBytes("EUC-KR"))); 529 530 531 // https://code.google.com/p/android/issues/detail?id=63188 532 assertEquals("[-77, -10, -64, -76, -63, -53]", Arrays.toString("".getBytes("gbk"))); 533 assertEquals("[-77, -10, -64, -76]", Arrays.toString("".getBytes("gbk"))); 534 assertEquals("[-77, -10]", Arrays.toString("".getBytes("gbk"))); 535 } 536 537 public void test_compareTo() throws Exception { 538 // For strings where a character differs, the result is 539 // the difference between the characters. 540 assertEquals(-1, "a".compareTo("b")); 541 assertEquals(-2, "a".compareTo("c")); 542 assertEquals(1, "b".compareTo("a")); 543 assertEquals(2, "c".compareTo("a")); 544 545 // For strings where the characters match up to the length of the shorter, 546 // the result is the difference between the strings' lengths. 547 assertEquals(0, "a".compareTo("a")); 548 assertEquals(-1, "a".compareTo("aa")); 549 assertEquals(-1, "a".compareTo("az")); 550 assertEquals(-2, "a".compareTo("aaa")); 551 assertEquals(-2, "a".compareTo("azz")); 552 assertEquals(-3, "a".compareTo("aaaa")); 553 assertEquals(-3, "a".compareTo("azzz")); 554 assertEquals(0, "a".compareTo("a")); 555 assertEquals(1, "aa".compareTo("a")); 556 assertEquals(1, "az".compareTo("a")); 557 assertEquals(2, "aaa".compareTo("a")); 558 assertEquals(2, "azz".compareTo("a")); 559 assertEquals(3, "aaaa".compareTo("a")); 560 assertEquals(3, "azzz".compareTo("a")); 561 } 562 563 public void test_compareToIgnoreCase() throws Exception { 564 // For strings where a character differs, the result is 565 // the difference between the characters. 566 assertEquals(-1, "a".compareToIgnoreCase("b")); 567 assertEquals(-1, "a".compareToIgnoreCase("B")); 568 assertEquals(-2, "a".compareToIgnoreCase("c")); 569 assertEquals(-2, "a".compareToIgnoreCase("C")); 570 assertEquals(1, "b".compareToIgnoreCase("a")); 571 assertEquals(1, "B".compareToIgnoreCase("a")); 572 assertEquals(2, "c".compareToIgnoreCase("a")); 573 assertEquals(2, "C".compareToIgnoreCase("a")); 574 575 // For strings where the characters match up to the length of the shorter, 576 // the result is the difference between the strings' lengths. 577 assertEquals(0, "a".compareToIgnoreCase("a")); 578 assertEquals(0, "a".compareToIgnoreCase("A")); 579 assertEquals(0, "A".compareToIgnoreCase("a")); 580 assertEquals(0, "A".compareToIgnoreCase("A")); 581 assertEquals(-1, "a".compareToIgnoreCase("aa")); 582 assertEquals(-1, "a".compareToIgnoreCase("aA")); 583 assertEquals(-1, "a".compareToIgnoreCase("Aa")); 584 assertEquals(-1, "a".compareToIgnoreCase("az")); 585 assertEquals(-1, "a".compareToIgnoreCase("aZ")); 586 assertEquals(-2, "a".compareToIgnoreCase("aaa")); 587 assertEquals(-2, "a".compareToIgnoreCase("AAA")); 588 assertEquals(-2, "a".compareToIgnoreCase("azz")); 589 assertEquals(-2, "a".compareToIgnoreCase("AZZ")); 590 assertEquals(-3, "a".compareToIgnoreCase("aaaa")); 591 assertEquals(-3, "a".compareToIgnoreCase("AAAA")); 592 assertEquals(-3, "a".compareToIgnoreCase("azzz")); 593 assertEquals(-3, "a".compareToIgnoreCase("AZZZ")); 594 assertEquals(1, "aa".compareToIgnoreCase("a")); 595 assertEquals(1, "aA".compareToIgnoreCase("a")); 596 assertEquals(1, "Aa".compareToIgnoreCase("a")); 597 assertEquals(1, "az".compareToIgnoreCase("a")); 598 assertEquals(2, "aaa".compareToIgnoreCase("a")); 599 assertEquals(2, "azz".compareToIgnoreCase("a")); 600 assertEquals(3, "aaaa".compareToIgnoreCase("a")); 601 assertEquals(3, "azzz".compareToIgnoreCase("a")); 602 } 603 604 // http://b/25943996 605 public void testSplit_trailingSeparators() { 606 String[] splits = "test\0message\0\0\0\0\0\0".split("\0", -1); 607 assertEquals("test", splits[0]); 608 assertEquals("message", splits[1]); 609 assertEquals("", splits[2]); 610 assertEquals("", splits[3]); 611 assertEquals("", splits[4]); 612 assertEquals("", splits[5]); 613 assertEquals("", splits[6]); 614 assertEquals("", splits[7]); 615 } 616 617 // http://b/63745717 618 // A buffer overflow bug was found in ICU4C. A native crash occurs only when ASAN is enabled. 619 public void testSplit_lookBehind() { 620 String string = "a"; 621 String[] words = string.split("(?<!(^|[A-Z]))(?=[A-Z])|(?<!^)(?=[A-Z][a-z])| |_|-"); 622 assertEquals(1, words.length); 623 assertEquals(string, words[0]); 624 } 625 626 // http://b/26126818 627 public void testCodePointCount() { 628 String hello = "Hello, fools"; 629 630 assertEquals(5, hello.codePointCount(0, 5)); 631 assertEquals(7, hello.codePointCount(5, 12)); 632 assertEquals(2, hello.codePointCount(10, 12)); 633 } 634 635 // http://b/26444984 636 public void testGetCharsOverflow() { 637 int srcBegin = Integer.MAX_VALUE; //2147483647 638 int srcEnd = srcBegin + 10; //-2147483639 639 try { 640 // The output array size must be larger than |srcEnd - srcBegin|. 641 "yes".getChars(srcBegin, srcEnd, new char[256], 0); 642 fail(); 643 } catch (StringIndexOutOfBoundsException expected) { 644 } 645 } 646 647 // http://b/28998511 648 public void testGetCharsBoundsChecks() { 649 // This is the explicit case from the bug: dstBegin == srcEnd - srcBegin 650 assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[0], -4); 651 652 // Some valid cases. 653 char[] dst = new char[1]; 654 "abcd".getChars(0, 1, dst, 0); 655 assertEquals('a', dst[0]); 656 "abcd".getChars(3, 4, dst, 0); 657 assertEquals('d', dst[0]); 658 dst = new char[4]; 659 "abcd".getChars(0, 4, dst, 0); 660 assertTrue(Arrays.equals("abcd".toCharArray(), dst)); 661 662 // Zero length src. 663 "abcd".getChars(0, 0, new char[0], 0); // dstBegin == 0 is ok if copying zero chars 664 "abcd".getChars(0, 0, new char[1], 1); // dstBegin == 1 is ok if copying zero chars 665 "".getChars(0, 0, new char[0], 0); 666 "abcd".getChars(1, 1, new char[1], 0); 667 "abcd".getChars(1, 1, new char[1], 1); 668 669 // Valid src args, invalid dst args. 670 assertGetCharsThrowsAIOOBException("abcd", 3, 4, new char[1], 1); // Out of range dstBegin 671 assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[3], 0); // Small dst 672 assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], -1); // Negative dstBegin 673 674 // dstBegin + (srcEnd - srcBegin) -> integer overflow OR dstBegin >= dst.length 675 assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], Integer.MAX_VALUE - 1); 676 677 // Invalid src args, valid dst args. 678 assertGetCharsThrowsSIOOBException("abcd", 2, 1, new char[4], 0); // srcBegin > srcEnd 679 assertGetCharsThrowsSIOOBException("abcd", -1, 3, new char[4], 0); // Negative srcBegin 680 assertGetCharsThrowsSIOOBException("abcd", 0, 5, new char[4], 0); // Out of range srcEnd 681 assertGetCharsThrowsSIOOBException("abcd", 0, -1, new char[4], 0); // Negative srcEnd 682 683 // Valid src args, invalid dst args. 684 assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], 1); // Bad dstBegin 685 686 // Zero length src copy, invalid dst args. 687 assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[4], -1); // Negative dstBegin 688 assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[0], 1); // Out of range dstBegin 689 assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[1], 2); // Out of range dstBegin 690 assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[4], 5); // Out of range dstBegin 691 } 692 693 private static void assertGetCharsThrowsAIOOBException(String s, int srcBegin, int srcEnd, 694 char[] dst, int dstBegin) { 695 try { 696 s.getChars(srcBegin, srcEnd, dst, dstBegin); 697 fail(); 698 } catch (ArrayIndexOutOfBoundsException expected) { 699 } 700 } 701 702 private static void assertGetCharsThrowsSIOOBException(String s, int srcBegin, int srcEnd, 703 char[] dst, int dstBegin) { 704 try { 705 s.getChars(srcBegin, srcEnd, dst, dstBegin); 706 fail(); 707 } catch (StringIndexOutOfBoundsException expected) { 708 } 709 } 710 711 public void testChars() { 712 String s = "Hello\n\tworld"; 713 int[] expected = new int[s.length()]; 714 for (int i = 0; i < s.length(); ++i) { 715 expected[i] = (int) s.charAt(i); 716 } 717 assertTrue(Arrays.equals(expected, s.chars().toArray())); 718 719 // Surrogate code point 720 char high = '\uD83D', low = '\uDE02'; 721 String surrogateCP = new String(new char[]{high, low, low}); 722 assertTrue(Arrays.equals(new int[]{high, low, low}, surrogateCP.chars().toArray())); 723 } 724 725 public void testCodePoints() { 726 String s = "Hello\n\tworld"; 727 int[] expected = new int[s.length()]; 728 for (int i = 0; i < s.length(); ++i) { 729 expected[i] = (int) s.charAt(i); 730 } 731 assertTrue(Arrays.equals(expected, s.codePoints().toArray())); 732 733 // Surrogate code point 734 char high = '\uD83D', low = '\uDE02'; 735 String surrogateCP = new String(new char[]{high, low, low, '0'}); 736 assertEquals(Character.toCodePoint(high, low), surrogateCP.codePoints().toArray()[0]); 737 assertEquals((int) low, surrogateCP.codePoints().toArray()[1]); // Unmatched surrogate. 738 assertEquals((int) '0', surrogateCP.codePoints().toArray()[2]); 739 } 740 741 public void testJoin_CharSequenceArray() { 742 assertEquals("", String.join("-")); 743 assertEquals("", String.join("-", "")); 744 assertEquals("foo", String.join("-", "foo")); 745 assertEquals("foo---bar---boo", String.join("---", "foo", "bar", "boo")); 746 assertEquals("foobarboo", String.join("", "foo", "bar", "boo")); 747 assertEquals("null-null", String.join("-", null, null)); 748 assertEquals("\\_()_/", String.join("()", "\\_", "_/")); 749 } 750 751 public void testJoin_CharSequenceArray_NPE() { 752 try { 753 String.join(null, "foo", "bar"); 754 fail(); 755 } catch (NullPointerException expected) {} 756 } 757 758 public void testJoin_Iterable() { 759 ArrayList<String> iterable = new ArrayList<>(); 760 assertEquals("", String.join("-", iterable)); 761 762 iterable.add("foo"); 763 assertEquals("foo", String.join("-", iterable)); 764 765 iterable.add("bar"); 766 assertEquals("foo...bar", String.join("...", iterable)); 767 768 iterable.add("foo"); 769 assertEquals("foo-bar-foo", String.join("-", iterable)); 770 assertEquals("foobarfoo", String.join("", iterable)); 771 } 772 773 public void testJoin_Iterable_NPE() { 774 try { 775 String.join(null, new ArrayList<String>()); 776 fail(); 777 } catch (NullPointerException expected) {} 778 779 try { 780 String.join("-", (Iterable<String>)null); 781 fail(); 782 } catch (NullPointerException expected) {} 783 } 784 } 785