Home | History | Annotate | Download | only in charset
      1 /**
      2 *******************************************************************************
      3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                  *
      5 *******************************************************************************
      6 *
      7 *******************************************************************************
      8 */
      9 /**
     10  * A JNI interface for ICU converters.
     11  *
     12  *
     13  * @author Ram Viswanadha, IBM
     14  */
     15 package java.nio.charset;
     16 
     17 import dalvik.annotation.optimization.ReachabilitySensitive;
     18 import java.nio.ByteBuffer;
     19 import java.nio.CharBuffer;
     20 import java.util.HashMap;
     21 import java.util.Map;
     22 import libcore.icu.ICU;
     23 import libcore.icu.NativeConverter;
     24 import libcore.util.EmptyArray;
     25 
     26 final class CharsetEncoderICU extends CharsetEncoder {
     27     private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>();
     28     static {
     29         // ICU has different default replacements to the RI in some cases. There are many
     30         // additional cases, but this covers all the charsets that Java guarantees will be
     31         // available, which is where compatibility seems most important. (The RI even uses
     32         // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that
     33         // byte corresponds to an entirely different character.)
     34         // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it
     35         // can represent it, but this is what the RI does...
     36         byte[] questionMark = new byte[] { (byte) '?' };
     37         DEFAULT_REPLACEMENTS.put("UTF-8",      questionMark);
     38         DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark);
     39         DEFAULT_REPLACEMENTS.put("US-ASCII",   questionMark);
     40     }
     41 
     42     private static final int INPUT_OFFSET = 0;
     43     private static final int OUTPUT_OFFSET = 1;
     44     private static final int INVALID_CHAR_COUNT = 2;
     45     /*
     46      * data[INPUT_OFFSET]   = on input contains the start of input and on output the number of input chars consumed
     47      * data[OUTPUT_OFFSET]  = on input contains the start of output and on output the number of output bytes written
     48      * data[INVALID_CHARS]  = number of invalid chars
     49      */
     50     private int[] data = new int[3];
     51 
     52     /* handle to the ICU converter that is opened */
     53     @ReachabilitySensitive
     54     private final long converterHandle;
     55 
     56     private char[] input = null;
     57     private byte[] output = null;
     58 
     59     private char[] allocatedInput = null;
     60     private byte[] allocatedOutput = null;
     61 
     62     // These instance variables are always assigned in the methods before being used. This class
     63     // is inherently thread-unsafe so we don't have to worry about synchronization.
     64     private int inEnd;
     65     private int outEnd;
     66 
     67     public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) {
     68         // This complexity is necessary to ensure that even if the constructor, superclass
     69         // constructor, or call to updateCallback throw, we still free the native peer.
     70         long address = 0;
     71         CharsetEncoderICU result;
     72         try {
     73             address = NativeConverter.openConverter(icuCanonicalName);
     74             float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address);
     75             float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address);
     76             byte[] replacement = makeReplacement(icuCanonicalName, address);
     77             result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address);
     78         } catch (Throwable t) {
     79             if (address != 0) {
     80                 NativeConverter.closeConverter(address);
     81             }
     82             throw t;
     83         }
     84         // An exception in registerConverter() will deallocate address:
     85         NativeConverter.registerConverter(result, address);
     86         result.updateCallback();
     87         return result;
     88     }
     89 
     90     private static byte[] makeReplacement(String icuCanonicalName, long address) {
     91         // We have our own map of RI-compatible default replacements (where ICU disagrees)...
     92         byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName);
     93         if (replacement != null) {
     94             return replacement.clone();
     95         }
     96         // ...but fall back to asking ICU.
     97         return NativeConverter.getSubstitutionBytes(address);
     98     }
     99 
    100     private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) {
    101         super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true);
    102         // Our native peer needs to know what just happened...
    103         this.converterHandle = address;
    104     }
    105 
    106     @Override protected void implReplaceWith(byte[] newReplacement) {
    107         updateCallback();
    108     }
    109 
    110     @Override protected void implOnMalformedInput(CodingErrorAction newAction) {
    111         updateCallback();
    112     }
    113 
    114     @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
    115         updateCallback();
    116     }
    117 
    118     private void updateCallback() {
    119         NativeConverter.setCallbackEncode(converterHandle, this);
    120     }
    121 
    122     @Override protected void implReset() {
    123         NativeConverter.resetCharToByte(converterHandle);
    124         data[INPUT_OFFSET] = 0;
    125         data[OUTPUT_OFFSET] = 0;
    126         data[INVALID_CHAR_COUNT] = 0;
    127         output = null;
    128         input = null;
    129         allocatedInput = null;
    130         allocatedOutput = null;
    131         inEnd = 0;
    132         outEnd = 0;
    133     }
    134 
    135     @Override protected CoderResult implFlush(ByteBuffer out) {
    136         try {
    137             // ICU needs to see an empty input.
    138             input = EmptyArray.CHAR;
    139             inEnd = 0;
    140             data[INPUT_OFFSET] = 0;
    141 
    142             data[OUTPUT_OFFSET] = getArray(out);
    143             data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
    144 
    145             int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true);
    146             if (ICU.U_FAILURE(error)) {
    147                 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
    148                     return CoderResult.OVERFLOW;
    149                 } else if (error == ICU.U_TRUNCATED_CHAR_FOUND) {
    150                     if (data[INVALID_CHAR_COUNT] > 0) {
    151                         return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
    152                     }
    153                 }
    154             }
    155             return CoderResult.UNDERFLOW;
    156         } finally {
    157             setPosition(out);
    158             implReset();
    159         }
    160     }
    161 
    162     @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
    163         if (!in.hasRemaining()) {
    164             return CoderResult.UNDERFLOW;
    165         }
    166 
    167         data[INPUT_OFFSET] = getArray(in);
    168         data[OUTPUT_OFFSET]= getArray(out);
    169         data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
    170 
    171         try {
    172             int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false);
    173             if (ICU.U_FAILURE(error)) {
    174                 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
    175                     return CoderResult.OVERFLOW;
    176                 } else if (error == ICU.U_INVALID_CHAR_FOUND) {
    177                     return CoderResult.unmappableForLength(data[INVALID_CHAR_COUNT]);
    178                 } else if (error == ICU.U_ILLEGAL_CHAR_FOUND) {
    179                     return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
    180                 } else {
    181                     throw new AssertionError(error);
    182                 }
    183             }
    184             // Decoding succeeded: give us more data.
    185             return CoderResult.UNDERFLOW;
    186         } finally {
    187             setPosition(in);
    188             setPosition(out);
    189         }
    190     }
    191 
    192     private int getArray(ByteBuffer out) {
    193         if (out.hasArray()) {
    194             output = out.array();
    195             outEnd = out.arrayOffset() + out.limit();
    196             return out.arrayOffset() + out.position();
    197         } else {
    198             outEnd = out.remaining();
    199             if (allocatedOutput == null || outEnd > allocatedOutput.length) {
    200                 allocatedOutput = new byte[outEnd];
    201             }
    202             // The array's start position is 0
    203             output = allocatedOutput;
    204             return 0;
    205         }
    206     }
    207 
    208     private int getArray(CharBuffer in) {
    209         if (in.hasArray()) {
    210             input = in.array();
    211             inEnd = in.arrayOffset() + in.limit();
    212             return in.arrayOffset() + in.position();
    213         } else {
    214             inEnd = in.remaining();
    215             if (allocatedInput == null || inEnd > allocatedInput.length) {
    216                 allocatedInput = new char[inEnd];
    217             }
    218             // Copy the input buffer into the allocated array.
    219             int pos = in.position();
    220             in.get(allocatedInput, 0, inEnd);
    221             in.position(pos);
    222             // The array's start position is 0
    223             input = allocatedInput;
    224             return 0;
    225         }
    226     }
    227 
    228     private void setPosition(ByteBuffer out) {
    229         if (out.hasArray()) {
    230             out.position(data[OUTPUT_OFFSET] - out.arrayOffset());
    231         } else {
    232             out.put(output, 0, data[OUTPUT_OFFSET]);
    233         }
    234         // release reference to output array, which may not be ours
    235         output = null;
    236     }
    237 
    238     private void setPosition(CharBuffer in) {
    239         int position = in.position() + data[INPUT_OFFSET] - data[INVALID_CHAR_COUNT];
    240         if (position < 0) {
    241             // The calculated position might be negative if we encountered an
    242             // invalid char that spanned input buffers. We adjust it to 0 in this case.
    243             //
    244             // NOTE: The API doesn't allow us to adjust the position of the previous
    245             // input buffer. (Doing that wouldn't serve any useful purpose anyway.)
    246             position = 0;
    247         }
    248 
    249         in.position(position);
    250         // release reference to input array, which may not be ours
    251         input = null;
    252     }
    253 }
    254