Home | History | Annotate | Download | only in charset
      1 /**
      2 *******************************************************************************
      3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                  *
      5 *******************************************************************************
      6 *
      7 *******************************************************************************
      8 */
      9 /**
     10  * A JNI interface for ICU converters.
     11  *
     12  *
     13  * @author Ram Viswanadha, IBM
     14  */
     15 package java.nio.charset;
     16 
     17 import java.nio.ByteBuffer;
     18 import java.nio.CharBuffer;
     19 import java.util.HashMap;
     20 import java.util.Map;
     21 import libcore.icu.ICU;
     22 import libcore.icu.NativeConverter;
     23 import libcore.util.EmptyArray;
     24 import libcore.util.NativeAllocationRegistry;
     25 
     26 final class CharsetEncoderICU extends CharsetEncoder {
     27     private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>();
     28     static {
     29         // ICU has different default replacements to the RI in some cases. There are many
     30         // additional cases, but this covers all the charsets that Java guarantees will be
     31         // available, which is where compatibility seems most important. (The RI even uses
     32         // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that
     33         // byte corresponds to an entirely different character.)
     34         // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it
     35         // can represent it, but this is what the RI does...
     36         byte[] questionMark = new byte[] { (byte) '?' };
     37         DEFAULT_REPLACEMENTS.put("UTF-8",      questionMark);
     38         DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark);
     39         DEFAULT_REPLACEMENTS.put("US-ASCII",   questionMark);
     40     }
     41 
     42     private static final int INPUT_OFFSET = 0;
     43     private static final int OUTPUT_OFFSET = 1;
     44     private static final int INVALID_CHAR_COUNT = 2;
     45     /*
     46      * data[INPUT_OFFSET]   = on input contains the start of input and on output the number of input chars consumed
     47      * data[OUTPUT_OFFSET]  = on input contains the start of output and on output the number of output bytes written
     48      * data[INVALID_CHARS]  = number of invalid chars
     49      */
     50     private int[] data = new int[3];
     51 
     52     /* handle to the ICU converter that is opened */
     53     private final long converterHandle;
     54 
     55     private char[] input = null;
     56     private byte[] output = null;
     57 
     58     private char[] allocatedInput = null;
     59     private byte[] allocatedOutput = null;
     60 
     61     // These instance variables are always assigned in the methods before being used. This class
     62     // is inherently thread-unsafe so we don't have to worry about synchronization.
     63     private int inEnd;
     64     private int outEnd;
     65 
     66     public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) {
     67         // This complexity is necessary to ensure that even if the constructor, superclass
     68         // constructor, or call to updateCallback throw, we still free the native peer.
     69         long address = 0;
     70         try {
     71             address = NativeConverter.openConverter(icuCanonicalName);
     72             float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address);
     73             float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address);
     74             byte[] replacement = makeReplacement(icuCanonicalName, address);
     75             CharsetEncoderICU result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address);
     76             address = 0; // CharsetEncoderICU has taken ownership; its finalizer will do the free.
     77             return result;
     78         } finally {
     79             if (address != 0) {
     80                 NativeConverter.closeConverter(address);
     81             }
     82         }
     83     }
     84 
     85     private static byte[] makeReplacement(String icuCanonicalName, long address) {
     86         // We have our own map of RI-compatible default replacements (where ICU disagrees)...
     87         byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName);
     88         if (replacement != null) {
     89             return replacement.clone();
     90         }
     91         // ...but fall back to asking ICU.
     92         return NativeConverter.getSubstitutionBytes(address);
     93     }
     94 
     95     private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) {
     96         super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true);
     97         // Our native peer needs to know what just happened...
     98         this.converterHandle = address;
     99         NativeConverter.registerConverter(this, converterHandle);
    100         updateCallback();
    101     }
    102 
    103     @Override protected void implReplaceWith(byte[] newReplacement) {
    104         updateCallback();
    105     }
    106 
    107     @Override protected void implOnMalformedInput(CodingErrorAction newAction) {
    108         updateCallback();
    109     }
    110 
    111     @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
    112         updateCallback();
    113     }
    114 
    115     private void updateCallback() {
    116         NativeConverter.setCallbackEncode(converterHandle, this);
    117     }
    118 
    119     @Override protected void implReset() {
    120         NativeConverter.resetCharToByte(converterHandle);
    121         data[INPUT_OFFSET] = 0;
    122         data[OUTPUT_OFFSET] = 0;
    123         data[INVALID_CHAR_COUNT] = 0;
    124         output = null;
    125         input = null;
    126         allocatedInput = null;
    127         allocatedOutput = null;
    128         inEnd = 0;
    129         outEnd = 0;
    130     }
    131 
    132     @Override protected CoderResult implFlush(ByteBuffer out) {
    133         try {
    134             // ICU needs to see an empty input.
    135             input = EmptyArray.CHAR;
    136             inEnd = 0;
    137             data[INPUT_OFFSET] = 0;
    138 
    139             data[OUTPUT_OFFSET] = getArray(out);
    140             data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
    141 
    142             int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true);
    143             if (ICU.U_FAILURE(error)) {
    144                 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
    145                     return CoderResult.OVERFLOW;
    146                 } else if (error == ICU.U_TRUNCATED_CHAR_FOUND) {
    147                     if (data[INVALID_CHAR_COUNT] > 0) {
    148                         return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
    149                     }
    150                 }
    151             }
    152             return CoderResult.UNDERFLOW;
    153         } finally {
    154             setPosition(out);
    155             implReset();
    156         }
    157     }
    158 
    159     @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
    160         if (!in.hasRemaining()) {
    161             return CoderResult.UNDERFLOW;
    162         }
    163 
    164         data[INPUT_OFFSET] = getArray(in);
    165         data[OUTPUT_OFFSET]= getArray(out);
    166         data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors.
    167 
    168         try {
    169             int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false);
    170             if (ICU.U_FAILURE(error)) {
    171                 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) {
    172                     return CoderResult.OVERFLOW;
    173                 } else if (error == ICU.U_INVALID_CHAR_FOUND) {
    174                     return CoderResult.unmappableForLength(data[INVALID_CHAR_COUNT]);
    175                 } else if (error == ICU.U_ILLEGAL_CHAR_FOUND) {
    176                     return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]);
    177                 } else {
    178                     throw new AssertionError(error);
    179                 }
    180             }
    181             // Decoding succeeded: give us more data.
    182             return CoderResult.UNDERFLOW;
    183         } finally {
    184             setPosition(in);
    185             setPosition(out);
    186         }
    187     }
    188 
    189     private int getArray(ByteBuffer out) {
    190         if (out.hasArray()) {
    191             output = out.array();
    192             outEnd = out.arrayOffset() + out.limit();
    193             return out.arrayOffset() + out.position();
    194         } else {
    195             outEnd = out.remaining();
    196             if (allocatedOutput == null || outEnd > allocatedOutput.length) {
    197                 allocatedOutput = new byte[outEnd];
    198             }
    199             // The array's start position is 0
    200             output = allocatedOutput;
    201             return 0;
    202         }
    203     }
    204 
    205     private int getArray(CharBuffer in) {
    206         if (in.hasArray()) {
    207             input = in.array();
    208             inEnd = in.arrayOffset() + in.limit();
    209             return in.arrayOffset() + in.position();
    210         } else {
    211             inEnd = in.remaining();
    212             if (allocatedInput == null || inEnd > allocatedInput.length) {
    213                 allocatedInput = new char[inEnd];
    214             }
    215             // Copy the input buffer into the allocated array.
    216             int pos = in.position();
    217             in.get(allocatedInput, 0, inEnd);
    218             in.position(pos);
    219             // The array's start position is 0
    220             input = allocatedInput;
    221             return 0;
    222         }
    223     }
    224 
    225     private void setPosition(ByteBuffer out) {
    226         if (out.hasArray()) {
    227             out.position(data[OUTPUT_OFFSET] - out.arrayOffset());
    228         } else {
    229             out.put(output, 0, data[OUTPUT_OFFSET]);
    230         }
    231         // release reference to output array, which may not be ours
    232         output = null;
    233     }
    234 
    235     private void setPosition(CharBuffer in) {
    236         int position = in.position() + data[INPUT_OFFSET] - data[INVALID_CHAR_COUNT];
    237         if (position < 0) {
    238             // The calculated position might be negative if we encountered an
    239             // invalid char that spanned input buffers. We adjust it to 0 in this case.
    240             //
    241             // NOTE: The API doesn't allow us to adjust the position of the previous
    242             // input buffer. (Doing that wouldn't serve any useful purpose anyway.)
    243             position = 0;
    244         }
    245 
    246         in.position(position);
    247         // release reference to input array, which may not be ours
    248         input = null;
    249     }
    250 }
    251