Home | History | Annotate | Download | only in charset
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package libcore.java.nio.charset;
     18 
     19 import java.nio.ByteBuffer;
     20 import java.nio.CharBuffer;
     21 import java.nio.charset.Charset;
     22 import java.nio.charset.CharsetEncoder;
     23 import java.nio.charset.CharsetDecoder;
     24 import java.nio.charset.CoderResult;
     25 import java.nio.charset.CodingErrorAction;
     26 import java.nio.charset.StandardCharsets;
     27 import java.util.Arrays;
     28 
     29 public class CharsetEncoderTest extends junit.framework.TestCase {
     30     // None of the harmony or jtreg tests actually check that replaceWith does the right thing!
     31     public void test_replaceWith() throws Exception {
     32         Charset ascii = Charset.forName("US-ASCII");
     33         CharsetEncoder e = ascii.newEncoder();
     34         e.onMalformedInput(CodingErrorAction.REPLACE);
     35         e.onUnmappableCharacter(CodingErrorAction.REPLACE);
     36         e.replaceWith("=".getBytes("US-ASCII"));
     37         String input = "hello\u0666world";
     38         String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
     39         assertEquals("hello=world", output);
     40     }
     41 
     42     private void assertReplacementBytesForEncoder(String charset, byte[] bytes) {
     43         byte[] result = Charset.forName(charset).newEncoder().replacement();
     44         assertEquals(Arrays.toString(bytes), Arrays.toString(result));
     45     }
     46 
     47     // For all the guaranteed built-in charsets, check that we have the right default replacements.
     48     public void test_defaultReplacementBytesIso_8859_1() throws Exception {
     49         assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' });
     50     }
     51     public void test_defaultReplacementBytesUs_Ascii() throws Exception {
     52         assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' });
     53     }
     54     public void test_defaultReplacementBytesUtf_16() throws Exception {
     55         assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd });
     56     }
     57     public void test_defaultReplacementBytesUtf_16be() throws Exception {
     58         assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd });
     59     }
     60     public void test_defaultReplacementBytesUtf_16le() throws Exception {
     61         assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff });
     62     }
     63     public void test_defaultReplacementBytesUtf_8() throws Exception {
     64         assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' });
     65     }
     66 
     67     public void testSurrogatePairAllAtOnce() throws Exception {
     68         // okay: surrogate pair seen all at once is decoded to U+20b9f.
     69         Charset cs = Charset.forName("UTF-32BE");
     70         CharsetEncoder e = cs.newEncoder();
     71         ByteBuffer bb = ByteBuffer.allocate(128);
     72         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false);
     73         assertEquals(CoderResult.UNDERFLOW, cr);
     74         assertEquals(4, bb.position());
     75         assertEquals((byte) 0x00, bb.get(0));
     76         assertEquals((byte) 0x02, bb.get(1));
     77         assertEquals((byte) 0x0b, bb.get(2));
     78         assertEquals((byte) 0x9f, bb.get(3));
     79     }
     80 
     81     public void testMalformedSurrogatePair() throws Exception {
     82         // malformed: low surrogate first is detected as an error.
     83         Charset cs = Charset.forName("UTF-32BE");
     84         CharsetEncoder e = cs.newEncoder();
     85         ByteBuffer bb = ByteBuffer.allocate(128);
     86         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
     87         assertTrue(cr.toString(), cr.isMalformed());
     88         assertEquals(1, cr.length());
     89     }
     90 
     91     public void testCharsetEncoderSplitSurrogates_IGNORE() throws Exception {
     92         testCharsetEncoderSplitSurrogates(CodingErrorAction.IGNORE);
     93     }
     94 
     95     public void testCharsetEncoderSplitSurrogates_REPORT() throws Exception {
     96         testCharsetEncoderSplitSurrogates(CodingErrorAction.REPORT);
     97     }
     98 
     99     public void testCharsetEncoderSplitSurrogates_REPLACE() throws Exception {
    100         testCharsetEncoderSplitSurrogates(CodingErrorAction.REPLACE);
    101     }
    102 
    103     private void testCharsetEncoderSplitSurrogates(CodingErrorAction cea) throws Exception {
    104         // Writing the two halves of the surrogate pair in separate writes should work just fine.
    105         // This is true of Android and ICU, but not of the RI.
    106 
    107         // On the RI, writing the two halves of the surrogate pair in separate writes
    108         // is an error because the CharsetEncoder doesn't remember it's half-way through a
    109         // surrogate pair across the two calls!
    110 
    111         // IGNORE just ignores both characters, REPORT complains that the second is
    112         // invalid (because it doesn't remember seeing the first), and REPLACE inserts a
    113         // replacement character U+fffd when it sees the second character (because it too
    114         // doesn't remember seeing the first).
    115 
    116         // Android just does the right thing.
    117 
    118         Charset cs = Charset.forName("UTF-32BE");
    119         CharsetEncoder e = cs.newEncoder();
    120         e.onMalformedInput(cea);
    121         e.onUnmappableCharacter(cea);
    122         ByteBuffer bb = ByteBuffer.allocate(128);
    123         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
    124         assertEquals(CoderResult.UNDERFLOW, cr);
    125         assertEquals(0, bb.position());
    126         cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
    127         assertEquals(CoderResult.UNDERFLOW, cr);
    128         int expectedPosition = 4;
    129         assertEquals(expectedPosition, bb.position());
    130         System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
    131         assertEquals((byte) 0x00, bb.get(0));
    132         assertEquals((byte) 0x02, bb.get(1));
    133         assertEquals((byte) 0x0b, bb.get(2));
    134         assertEquals((byte) 0x9f, bb.get(3));
    135         cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
    136         assertEquals(CoderResult.UNDERFLOW, cr);
    137         assertEquals(expectedPosition, bb.position());
    138         cr = e.flush(bb);
    139         assertEquals(CoderResult.UNDERFLOW, cr);
    140         assertEquals(expectedPosition, bb.position());
    141     }
    142 
    143     public void testFlushWithoutEndOfInput() throws Exception {
    144         Charset cs = Charset.forName("UTF-32BE");
    145         CharsetEncoder e = cs.newEncoder();
    146         ByteBuffer bb = ByteBuffer.allocate(128);
    147         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false);
    148         assertEquals(CoderResult.UNDERFLOW, cr);
    149         assertEquals(4, bb.position());
    150         try {
    151             cr = e.flush(bb);
    152             fail();
    153         } catch (IllegalStateException expected) {
    154             // You must call encode with endOfInput true before you can flush.
    155         }
    156 
    157         // We had a bug where we wouldn't reset inEnd before calling encode in implFlush.
    158         // That would result in flush outputting garbage.
    159         cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true);
    160         assertEquals(CoderResult.UNDERFLOW, cr);
    161         assertEquals(8, bb.position());
    162         cr = e.flush(bb);
    163         assertEquals(CoderResult.UNDERFLOW, cr);
    164         assertEquals(8, bb.position());
    165     }
    166 
    167     // Discards all input. Outputs a single byte 'X' on flush.
    168     private static final class MockCharset extends Charset {
    169         static final Charset INSTANCE = new MockCharset();
    170 
    171         private MockCharset() {
    172             super("MockCharset", new String[0]);
    173         }
    174 
    175         public boolean contains(Charset charset) {
    176             return false;
    177         }
    178 
    179         public CharsetEncoder newEncoder() {
    180             return new CharsetEncoder(INSTANCE, 1.f, 1.f) {
    181                 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
    182                     in.position(in.limit());
    183                     return CoderResult.UNDERFLOW;
    184                 }
    185 
    186                 protected CoderResult implFlush(ByteBuffer out) {
    187                     out.put((byte) 'X');
    188                     return CoderResult.UNDERFLOW;
    189                 }
    190             };
    191         }
    192 
    193         public CharsetDecoder newDecoder() {
    194             return new CharsetDecoder(INSTANCE, 1.f, 1.f) {
    195                 protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
    196                     in.position(in.limit());
    197                     return CoderResult.UNDERFLOW;
    198                 }
    199             };
    200         }
    201     }
    202 
    203     // Repeated calls to flush() should not result in repeated calls to implFlush().
    204     public void testFlushNotCallingImplFlushRepeatedly() {
    205         CharsetEncoder e = MockCharset.INSTANCE.newEncoder();
    206         ByteBuffer bb = ByteBuffer.allocate(4);
    207         CoderResult cr = e.encode(CharBuffer.allocate(0), bb, true);
    208         assertEquals(CoderResult.UNDERFLOW, cr);
    209         cr = e.flush(bb);
    210         assertEquals(CoderResult.UNDERFLOW, cr);
    211         cr = e.flush(bb);
    212         assertEquals(CoderResult.UNDERFLOW, cr);
    213         assertEquals(1, bb.position());
    214         assertEquals((byte) 'X', bb.get(0));
    215         assertEquals(0x00, bb.get(1));
    216         assertEquals(0x00, bb.get(2));
    217         assertEquals(0x00, bb.get(3));
    218     }
    219 
    220     // http://b/19185235
    221     public void testFlushWithIncompleteInput() {
    222         CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
    223         ByteBuffer output = ByteBuffer.allocate(10);
    224         CoderResult result = encoder.encode(CharBuffer.wrap("\ud800"), output,
    225                 true /* endOfInput */);
    226         assertTrue(result.isUnderflow());
    227 
    228         result = encoder.flush(output);
    229         assertTrue(result.isMalformed());
    230         assertEquals(1, result.length());
    231         assertEquals(0, output.position());
    232     }
    233 }
    234