1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.java.nio.charset; 18 19 import java.nio.ByteBuffer; 20 import java.nio.CharBuffer; 21 import java.nio.charset.Charset; 22 import java.nio.charset.CharsetEncoder; 23 import java.nio.charset.CharsetDecoder; 24 import java.nio.charset.CoderResult; 25 import java.nio.charset.CodingErrorAction; 26 import java.nio.charset.StandardCharsets; 27 import java.util.Arrays; 28 29 public class CharsetEncoderTest extends junit.framework.TestCase { 30 // None of the harmony or jtreg tests actually check that replaceWith does the right thing! 31 public void test_replaceWith() throws Exception { 32 Charset ascii = Charset.forName("US-ASCII"); 33 CharsetEncoder e = ascii.newEncoder(); 34 e.onMalformedInput(CodingErrorAction.REPLACE); 35 e.onUnmappableCharacter(CodingErrorAction.REPLACE); 36 e.replaceWith("=".getBytes("US-ASCII")); 37 String input = "hello\u0666world"; 38 String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString(); 39 assertEquals("hello=world", output); 40 } 41 42 private void assertReplacementBytesForEncoder(String charset, byte[] bytes) { 43 byte[] result = Charset.forName(charset).newEncoder().replacement(); 44 assertEquals(Arrays.toString(bytes), Arrays.toString(result)); 45 } 46 47 // For all the guaranteed built-in charsets, check that we have the right default replacements. 48 public void test_defaultReplacementBytesIso_8859_1() throws Exception { 49 assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' }); 50 } 51 public void test_defaultReplacementBytesUs_Ascii() throws Exception { 52 assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' }); 53 } 54 public void test_defaultReplacementBytesUtf_16() throws Exception { 55 assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd }); 56 } 57 public void test_defaultReplacementBytesUtf_16be() throws Exception { 58 assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd }); 59 } 60 public void test_defaultReplacementBytesUtf_16le() throws Exception { 61 assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff }); 62 } 63 public void test_defaultReplacementBytesUtf_8() throws Exception { 64 assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' }); 65 } 66 67 public void testSurrogatePairAllAtOnce() throws Exception { 68 // okay: surrogate pair seen all at once is decoded to U+20b9f. 69 Charset cs = Charset.forName("UTF-32BE"); 70 CharsetEncoder e = cs.newEncoder(); 71 ByteBuffer bb = ByteBuffer.allocate(128); 72 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false); 73 assertEquals(CoderResult.UNDERFLOW, cr); 74 assertEquals(4, bb.position()); 75 assertEquals((byte) 0x00, bb.get(0)); 76 assertEquals((byte) 0x02, bb.get(1)); 77 assertEquals((byte) 0x0b, bb.get(2)); 78 assertEquals((byte) 0x9f, bb.get(3)); 79 } 80 81 public void testMalformedSurrogatePair() throws Exception { 82 // malformed: low surrogate first is detected as an error. 83 Charset cs = Charset.forName("UTF-32BE"); 84 CharsetEncoder e = cs.newEncoder(); 85 ByteBuffer bb = ByteBuffer.allocate(128); 86 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); 87 assertTrue(cr.toString(), cr.isMalformed()); 88 assertEquals(1, cr.length()); 89 } 90 91 public void testCharsetEncoderSplitSurrogates_IGNORE() throws Exception { 92 testCharsetEncoderSplitSurrogates(CodingErrorAction.IGNORE); 93 } 94 95 public void testCharsetEncoderSplitSurrogates_REPORT() throws Exception { 96 testCharsetEncoderSplitSurrogates(CodingErrorAction.REPORT); 97 } 98 99 public void testCharsetEncoderSplitSurrogates_REPLACE() throws Exception { 100 testCharsetEncoderSplitSurrogates(CodingErrorAction.REPLACE); 101 } 102 103 private void testCharsetEncoderSplitSurrogates(CodingErrorAction cea) throws Exception { 104 // Writing the two halves of the surrogate pair in separate writes should work just fine. 105 // This is true of Android and ICU, but not of the RI. 106 107 // On the RI, writing the two halves of the surrogate pair in separate writes 108 // is an error because the CharsetEncoder doesn't remember it's half-way through a 109 // surrogate pair across the two calls! 110 111 // IGNORE just ignores both characters, REPORT complains that the second is 112 // invalid (because it doesn't remember seeing the first), and REPLACE inserts a 113 // replacement character U+fffd when it sees the second character (because it too 114 // doesn't remember seeing the first). 115 116 // Android just does the right thing. 117 118 Charset cs = Charset.forName("UTF-32BE"); 119 CharsetEncoder e = cs.newEncoder(); 120 e.onMalformedInput(cea); 121 e.onUnmappableCharacter(cea); 122 ByteBuffer bb = ByteBuffer.allocate(128); 123 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false); 124 assertEquals(CoderResult.UNDERFLOW, cr); 125 assertEquals(0, bb.position()); 126 cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); 127 assertEquals(CoderResult.UNDERFLOW, cr); 128 int expectedPosition = 4; 129 assertEquals(expectedPosition, bb.position()); 130 System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position()))); 131 assertEquals((byte) 0x00, bb.get(0)); 132 assertEquals((byte) 0x02, bb.get(1)); 133 assertEquals((byte) 0x0b, bb.get(2)); 134 assertEquals((byte) 0x9f, bb.get(3)); 135 cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true); 136 assertEquals(CoderResult.UNDERFLOW, cr); 137 assertEquals(expectedPosition, bb.position()); 138 cr = e.flush(bb); 139 assertEquals(CoderResult.UNDERFLOW, cr); 140 assertEquals(expectedPosition, bb.position()); 141 } 142 143 public void testFlushWithoutEndOfInput() throws Exception { 144 Charset cs = Charset.forName("UTF-32BE"); 145 CharsetEncoder e = cs.newEncoder(); 146 ByteBuffer bb = ByteBuffer.allocate(128); 147 CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false); 148 assertEquals(CoderResult.UNDERFLOW, cr); 149 assertEquals(4, bb.position()); 150 try { 151 cr = e.flush(bb); 152 fail(); 153 } catch (IllegalStateException expected) { 154 // You must call encode with endOfInput true before you can flush. 155 } 156 157 // We had a bug where we wouldn't reset inEnd before calling encode in implFlush. 158 // That would result in flush outputting garbage. 159 cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true); 160 assertEquals(CoderResult.UNDERFLOW, cr); 161 assertEquals(8, bb.position()); 162 cr = e.flush(bb); 163 assertEquals(CoderResult.UNDERFLOW, cr); 164 assertEquals(8, bb.position()); 165 } 166 167 // Discards all input. Outputs a single byte 'X' on flush. 168 private static final class MockCharset extends Charset { 169 static final Charset INSTANCE = new MockCharset(); 170 171 private MockCharset() { 172 super("MockCharset", new String[0]); 173 } 174 175 public boolean contains(Charset charset) { 176 return false; 177 } 178 179 public CharsetEncoder newEncoder() { 180 return new CharsetEncoder(INSTANCE, 1.f, 1.f) { 181 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 182 in.position(in.limit()); 183 return CoderResult.UNDERFLOW; 184 } 185 186 protected CoderResult implFlush(ByteBuffer out) { 187 out.put((byte) 'X'); 188 return CoderResult.UNDERFLOW; 189 } 190 }; 191 } 192 193 public CharsetDecoder newDecoder() { 194 return new CharsetDecoder(INSTANCE, 1.f, 1.f) { 195 protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { 196 in.position(in.limit()); 197 return CoderResult.UNDERFLOW; 198 } 199 }; 200 } 201 } 202 203 // Repeated calls to flush() should not result in repeated calls to implFlush(). 204 public void testFlushNotCallingImplFlushRepeatedly() { 205 CharsetEncoder e = MockCharset.INSTANCE.newEncoder(); 206 ByteBuffer bb = ByteBuffer.allocate(4); 207 CoderResult cr = e.encode(CharBuffer.allocate(0), bb, true); 208 assertEquals(CoderResult.UNDERFLOW, cr); 209 cr = e.flush(bb); 210 assertEquals(CoderResult.UNDERFLOW, cr); 211 cr = e.flush(bb); 212 assertEquals(CoderResult.UNDERFLOW, cr); 213 assertEquals(1, bb.position()); 214 assertEquals((byte) 'X', bb.get(0)); 215 assertEquals(0x00, bb.get(1)); 216 assertEquals(0x00, bb.get(2)); 217 assertEquals(0x00, bb.get(3)); 218 } 219 220 // http://b/19185235 221 public void testFlushWithIncompleteInput() { 222 CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder(); 223 ByteBuffer output = ByteBuffer.allocate(10); 224 CoderResult result = encoder.encode(CharBuffer.wrap("\ud800"), output, 225 true /* endOfInput */); 226 assertTrue(result.isUnderflow()); 227 228 result = encoder.flush(output); 229 assertTrue(result.isMalformed()); 230 assertEquals(1, result.length()); 231 assertEquals(0, output.position()); 232 } 233 } 234