1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.java.lang; 18 19 import java.lang.reflect.Field; 20 import java.lang.reflect.Modifier; 21 import java.nio.ByteBuffer; 22 import java.nio.CharBuffer; 23 import java.nio.ReadOnlyBufferException; 24 import java.nio.charset.Charset; 25 import java.nio.charset.CharsetDecoder; 26 import java.nio.charset.CharsetEncoder; 27 import java.nio.charset.CoderResult; 28 import java.util.Arrays; 29 import java.util.Locale; 30 import junit.framework.TestCase; 31 32 public class StringTest extends TestCase { 33 public void testIsEmpty() { 34 assertTrue("".isEmpty()); 35 assertFalse("x".isEmpty()); 36 } 37 38 // The evil decoder keeps hold of the CharBuffer it wrote to. 39 private static final class EvilCharsetDecoder extends CharsetDecoder { 40 private static char[] chars; 41 public EvilCharsetDecoder(Charset cs) { 42 super(cs, 1.0f, 1.0f); 43 } 44 protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { 45 chars = out.array(); 46 int inLength = in.remaining(); 47 for (int i = 0; i < inLength; ++i) { 48 in.put((byte) 'X'); 49 out.put('Y'); 50 } 51 return CoderResult.UNDERFLOW; 52 } 53 public static void corrupt() { 54 for (int i = 0; i < chars.length; ++i) { 55 chars[i] = '$'; 56 } 57 } 58 } 59 60 // The evil encoder tries to write to the CharBuffer it was given to 61 // read from. 62 private static final class EvilCharsetEncoder extends CharsetEncoder { 63 public EvilCharsetEncoder(Charset cs) { 64 super(cs, 1.0f, 1.0f); 65 } 66 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 67 int inLength = in.remaining(); 68 for (int i = 0; i < inLength; ++i) { 69 in.put('x'); 70 out.put((byte) 'y'); 71 } 72 return CoderResult.UNDERFLOW; 73 } 74 } 75 76 private static final Charset EVIL_CHARSET = new Charset("evil", null) { 77 public boolean contains(Charset charset) { return false; } 78 public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); } 79 public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); } 80 }; 81 82 public void testGetBytes_MaliciousCharset() { 83 try { 84 String s = "hi"; 85 // Check that our encoder can't write to the input CharBuffer 86 // it was given. 87 s.getBytes(EVIL_CHARSET); 88 fail(); // We shouldn't have got here! 89 } catch (ReadOnlyBufferException expected) { 90 // We caught you trying to be naughty! 91 } 92 } 93 94 public void testStringFromCharset() { 95 Charset cs = Charset.forName("UTF-8"); 96 byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'}; 97 assertEquals("hi", new String(bytes, cs)); 98 } 99 100 public void testStringFromCharset_MaliciousCharset() { 101 Charset cs = EVIL_CHARSET; 102 byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'}; 103 final String result = new String(bytes, cs); 104 assertEquals("YY", result); // (Our decoder always outputs 'Y's.) 105 // Check that even if the decoder messes with the output CharBuffer 106 // after we've created a string from it, it doesn't affect the string. 107 EvilCharsetDecoder.corrupt(); 108 assertEquals("YY", result); 109 } 110 111 public void test_getBytes_bad() throws Exception { 112 // Check that we use '?' as the replacement byte for invalid characters. 113 assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII"))); 114 assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII")))); 115 } 116 117 public void test_getBytes_UTF_8() { 118 // We have a fast path implementation of String.getBytes for UTF-8. 119 Charset cs = Charset.forName("UTF-8"); 120 121 // Test the empty string. 122 assertEquals("[]", Arrays.toString("".getBytes(cs))); 123 124 // Test one-byte characters. 125 assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs))); 126 assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs))); 127 assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs))); 128 129 // Test two-byte characters. 130 assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs))); 131 assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs))); 132 assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs))); 133 assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs))); 134 135 // Test three-byte characters. 136 assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs))); 137 assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs))); 138 assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs))); 139 assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs))); 140 141 // Test supplementary characters. 142 // Minimum supplementary character: U+10000 143 assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs))); 144 // Random supplementary character: U+10381 Ugaritic letter beta 145 assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs))); 146 // Maximum supplementary character: U+10FFFF 147 assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs))); 148 // A high surrogate at end of string is an error replaced with '?'. 149 assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs))); 150 // A high surrogate not followed by a low surrogate is an error replaced with '?'. 151 assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs))); 152 } 153 154 public void test_new_String_bad() throws Exception { 155 // Check that we use U+FFFD as the replacement string for invalid bytes. 156 assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII")); 157 assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII"))); 158 } 159 160 /** 161 * Tests a widely assumed performance characteristic of String.substring(): 162 * that it reuses the original's backing array. Although behaviour should be 163 * correct even if this test fails, many applications may suffer 164 * significant performance degradation. 165 */ 166 public void testSubstringSharesBackingArray() throws IllegalAccessException { 167 String abcdefghij = "ABCDEFGHIJ"; 168 String cdefg = abcdefghij.substring(2, 7); 169 assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); 170 } 171 172 /** 173 * Tests a widely assumed performance characteristic of string's copy 174 * constructor: that it ensures the backing array is the same length as the 175 * string. Although behaviour should be correct even if this test fails, 176 * many applications may suffer significant performance degradation. 177 */ 178 public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException { 179 String abcdefghij = "ABCDEFGHIJ"; 180 assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij))); 181 182 String cdefg = abcdefghij.substring(2, 7); 183 assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); 184 assertEquals(5, getBackingArray(new String(cdefg)).length); 185 } 186 187 /** 188 * Uses reflection to return the char[] backing the given string. This 189 * returns the actual backing array; which must not be modified. 190 */ 191 private char[] getBackingArray(String string) throws IllegalAccessException { 192 for (Field f : String.class.getDeclaredFields()) { 193 if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) { 194 f.setAccessible(true); 195 return (char[]) f.get(string); 196 } 197 } 198 throw new UnsupportedOperationException("No chars[] field on String!"); 199 } 200 201 /** 202 * Test that strings interned manually and then later loaded as literals 203 * maintain reference equality. http://b/3098960 204 */ 205 public void testInternBeforeLiteralIsLoaded() throws Exception{ 206 String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern(); 207 String literal = (String) Class.forName("libcore.java.lang.StringTest$HasLiteral") 208 .getDeclaredField("literal").get(null); 209 assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal)); 210 assertSame(programmatic, literal); 211 } 212 213 static class HasLiteral { 214 static String literal = "[5058, 9962, 1563, 5744]"; 215 } 216 217 private static final String LATIN_CAPITAL_I = "I"; 218 private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130"; 219 private static final String LATIN_SMALL_I = "i"; 220 private static final String LATIN_SMALL_DOTLESS_I = "\u0131"; 221 222 private static final String[] LATIN_I_VARIANTS = { 223 LATIN_SMALL_I, 224 LATIN_SMALL_DOTLESS_I, 225 LATIN_CAPITAL_I, 226 LATIN_CAPITAL_I_WITH_DOT_ABOVE, 227 }; 228 229 public void testCaseMapping_tr_TR() { 230 Locale trTR = new Locale("tr", "TR"); 231 assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(trTR)); 232 assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(trTR)); 233 assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(trTR)); 234 235 assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(trTR)); 236 assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(trTR)); 237 assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_SMALL_I.toUpperCase(trTR)); 238 239 assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(trTR)); 240 assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(trTR)); 241 } 242 243 public void testCaseMapping_en_US() { 244 Locale enUs = new Locale("en", "US"); 245 assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(enUs)); 246 assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(enUs)); 247 assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(enUs)); 248 249 assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(enUs)); 250 assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(enUs)); 251 assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(enUs)); 252 253 assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(enUs)); 254 // http://b/3325799: Android fails this with an extra combining "dot above". 255 assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(enUs)); 256 } 257 258 public void testEqualsIgnoreCase_tr_TR() { 259 testEqualsIgnoreCase(new Locale("tr", "TR")); 260 } 261 262 public void testEqualsIgnoreCase_en_US() { 263 testEqualsIgnoreCase(new Locale("en", "US")); 264 } 265 266 /** 267 * String.equalsIgnoreCase should not depend on the locale. 268 */ 269 private void testEqualsIgnoreCase(Locale locale) { 270 Locale defaultLocale = Locale.getDefault(); 271 Locale.setDefault(locale); 272 try { 273 for (String a : LATIN_I_VARIANTS) { 274 for (String b : LATIN_I_VARIANTS) { 275 if (!a.equalsIgnoreCase(b)) { 276 fail("Expected " + a + " to equal " + b + " in " + locale); 277 } 278 } 279 } 280 } finally { 281 Locale.setDefault(defaultLocale); 282 } 283 } 284 285 public void testRegionMatches_ignoreCase_en_US() { 286 testRegionMatches_ignoreCase(new Locale("en", "US")); 287 } 288 289 public void testRegionMatches_ignoreCase_tr_TR() { 290 testRegionMatches_ignoreCase(new Locale("tr", "TR")); 291 } 292 293 private void testRegionMatches_ignoreCase(Locale locale) { 294 Locale defaultLocale = Locale.getDefault(); 295 Locale.setDefault(locale); 296 try { 297 for (String a : LATIN_I_VARIANTS) { 298 for (String b : LATIN_I_VARIANTS) { 299 if (!a.regionMatches(true, 0, b, 0, b.length())) { 300 fail("Expected " + a + " to equal " + b + " in " + locale); 301 } 302 } 303 } 304 } finally { 305 Locale.setDefault(defaultLocale); 306 } 307 } 308 309 // http://code.google.com/p/android/issues/detail?id=15266 310 public void test_replaceAll() throws Exception { 311 assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1")); 312 } 313 } 314