1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2006, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 ******************************************************************************* 8 */ 9 /** 10 * A JNI interface for ICU converters. 11 * 12 * 13 * @author Ram Viswanadha, IBM 14 */ 15 package java.nio.charset; 16 17 import dalvik.annotation.optimization.ReachabilitySensitive; 18 import java.nio.ByteBuffer; 19 import java.nio.CharBuffer; 20 import java.util.HashMap; 21 import java.util.Map; 22 import libcore.icu.ICU; 23 import libcore.icu.NativeConverter; 24 import libcore.util.EmptyArray; 25 26 final class CharsetEncoderICU extends CharsetEncoder { 27 private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>(); 28 static { 29 // ICU has different default replacements to the RI in some cases. There are many 30 // additional cases, but this covers all the charsets that Java guarantees will be 31 // available, which is where compatibility seems most important. (The RI even uses 32 // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that 33 // byte corresponds to an entirely different character.) 34 // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it 35 // can represent it, but this is what the RI does... 36 byte[] questionMark = new byte[] { (byte) '?' }; 37 DEFAULT_REPLACEMENTS.put("UTF-8", questionMark); 38 DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark); 39 DEFAULT_REPLACEMENTS.put("US-ASCII", questionMark); 40 } 41 42 private static final int INPUT_OFFSET = 0; 43 private static final int OUTPUT_OFFSET = 1; 44 private static final int INVALID_CHAR_COUNT = 2; 45 /* 46 * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed 47 * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written 48 * data[INVALID_CHARS] = number of invalid chars 49 */ 50 private int[] data = new int[3]; 51 52 /* handle to the ICU converter that is opened */ 53 @ReachabilitySensitive 54 private final long converterHandle; 55 56 private char[] input = null; 57 private byte[] output = null; 58 59 private char[] allocatedInput = null; 60 private byte[] allocatedOutput = null; 61 62 // These instance variables are always assigned in the methods before being used. This class 63 // is inherently thread-unsafe so we don't have to worry about synchronization. 64 private int inEnd; 65 private int outEnd; 66 67 public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) { 68 // This complexity is necessary to ensure that even if the constructor, superclass 69 // constructor, or call to updateCallback throw, we still free the native peer. 70 long address = 0; 71 CharsetEncoderICU result; 72 try { 73 address = NativeConverter.openConverter(icuCanonicalName); 74 float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address); 75 float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address); 76 byte[] replacement = makeReplacement(icuCanonicalName, address); 77 result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address); 78 } catch (Throwable t) { 79 if (address != 0) { 80 NativeConverter.closeConverter(address); 81 } 82 throw t; 83 } 84 // An exception in registerConverter() will deallocate address: 85 NativeConverter.registerConverter(result, address); 86 result.updateCallback(); 87 return result; 88 } 89 90 private static byte[] makeReplacement(String icuCanonicalName, long address) { 91 // We have our own map of RI-compatible default replacements (where ICU disagrees)... 92 byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName); 93 if (replacement != null) { 94 return replacement.clone(); 95 } 96 // ...but fall back to asking ICU. 97 return NativeConverter.getSubstitutionBytes(address); 98 } 99 100 private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) { 101 super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true); 102 // Our native peer needs to know what just happened... 103 this.converterHandle = address; 104 } 105 106 @Override protected void implReplaceWith(byte[] newReplacement) { 107 updateCallback(); 108 } 109 110 @Override protected void implOnMalformedInput(CodingErrorAction newAction) { 111 updateCallback(); 112 } 113 114 @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) { 115 updateCallback(); 116 } 117 118 private void updateCallback() { 119 NativeConverter.setCallbackEncode(converterHandle, this); 120 } 121 122 @Override protected void implReset() { 123 NativeConverter.resetCharToByte(converterHandle); 124 data[INPUT_OFFSET] = 0; 125 data[OUTPUT_OFFSET] = 0; 126 data[INVALID_CHAR_COUNT] = 0; 127 output = null; 128 input = null; 129 allocatedInput = null; 130 allocatedOutput = null; 131 inEnd = 0; 132 outEnd = 0; 133 } 134 135 @Override protected CoderResult implFlush(ByteBuffer out) { 136 try { 137 // ICU needs to see an empty input. 138 input = EmptyArray.CHAR; 139 inEnd = 0; 140 data[INPUT_OFFSET] = 0; 141 142 data[OUTPUT_OFFSET] = getArray(out); 143 data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors. 144 145 int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true); 146 if (ICU.U_FAILURE(error)) { 147 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) { 148 return CoderResult.OVERFLOW; 149 } else if (error == ICU.U_TRUNCATED_CHAR_FOUND) { 150 if (data[INVALID_CHAR_COUNT] > 0) { 151 return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]); 152 } 153 } 154 } 155 return CoderResult.UNDERFLOW; 156 } finally { 157 setPosition(out); 158 implReset(); 159 } 160 } 161 162 @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 163 if (!in.hasRemaining()) { 164 return CoderResult.UNDERFLOW; 165 } 166 167 data[INPUT_OFFSET] = getArray(in); 168 data[OUTPUT_OFFSET]= getArray(out); 169 data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors. 170 171 try { 172 int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false); 173 if (ICU.U_FAILURE(error)) { 174 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) { 175 return CoderResult.OVERFLOW; 176 } else if (error == ICU.U_INVALID_CHAR_FOUND) { 177 return CoderResult.unmappableForLength(data[INVALID_CHAR_COUNT]); 178 } else if (error == ICU.U_ILLEGAL_CHAR_FOUND) { 179 return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]); 180 } else { 181 throw new AssertionError(error); 182 } 183 } 184 // Decoding succeeded: give us more data. 185 return CoderResult.UNDERFLOW; 186 } finally { 187 setPosition(in); 188 setPosition(out); 189 } 190 } 191 192 private int getArray(ByteBuffer out) { 193 if (out.hasArray()) { 194 output = out.array(); 195 outEnd = out.arrayOffset() + out.limit(); 196 return out.arrayOffset() + out.position(); 197 } else { 198 outEnd = out.remaining(); 199 if (allocatedOutput == null || outEnd > allocatedOutput.length) { 200 allocatedOutput = new byte[outEnd]; 201 } 202 // The array's start position is 0 203 output = allocatedOutput; 204 return 0; 205 } 206 } 207 208 private int getArray(CharBuffer in) { 209 if (in.hasArray()) { 210 input = in.array(); 211 inEnd = in.arrayOffset() + in.limit(); 212 return in.arrayOffset() + in.position(); 213 } else { 214 inEnd = in.remaining(); 215 if (allocatedInput == null || inEnd > allocatedInput.length) { 216 allocatedInput = new char[inEnd]; 217 } 218 // Copy the input buffer into the allocated array. 219 int pos = in.position(); 220 in.get(allocatedInput, 0, inEnd); 221 in.position(pos); 222 // The array's start position is 0 223 input = allocatedInput; 224 return 0; 225 } 226 } 227 228 private void setPosition(ByteBuffer out) { 229 if (out.hasArray()) { 230 out.position(data[OUTPUT_OFFSET] - out.arrayOffset()); 231 } else { 232 out.put(output, 0, data[OUTPUT_OFFSET]); 233 } 234 // release reference to output array, which may not be ours 235 output = null; 236 } 237 238 private void setPosition(CharBuffer in) { 239 int position = in.position() + data[INPUT_OFFSET] - data[INVALID_CHAR_COUNT]; 240 if (position < 0) { 241 // The calculated position might be negative if we encountered an 242 // invalid char that spanned input buffers. We adjust it to 0 in this case. 243 // 244 // NOTE: The API doesn't allow us to adjust the position of the previous 245 // input buffer. (Doing that wouldn't serve any useful purpose anyway.) 246 position = 0; 247 } 248 249 in.position(position); 250 // release reference to input array, which may not be ours 251 input = null; 252 } 253 } 254