1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2006, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 ******************************************************************************* 8 */ 9 /** 10 * A JNI interface for ICU converters. 11 * 12 * 13 * @author Ram Viswanadha, IBM 14 */ 15 package java.nio.charset; 16 17 import java.nio.ByteBuffer; 18 import java.nio.CharBuffer; 19 import java.util.HashMap; 20 import java.util.Map; 21 import libcore.icu.ICU; 22 import libcore.icu.NativeConverter; 23 import libcore.util.EmptyArray; 24 import libcore.util.NativeAllocationRegistry; 25 26 final class CharsetEncoderICU extends CharsetEncoder { 27 private static final Map<String, byte[]> DEFAULT_REPLACEMENTS = new HashMap<String, byte[]>(); 28 static { 29 // ICU has different default replacements to the RI in some cases. There are many 30 // additional cases, but this covers all the charsets that Java guarantees will be 31 // available, which is where compatibility seems most important. (The RI even uses 32 // the byte corresponding to '?' in ASCII as the replacement byte for charsets where that 33 // byte corresponds to an entirely different character.) 34 // It's odd that UTF-8 doesn't use U+FFFD, given that (unlike ISO-8859-1 and US-ASCII) it 35 // can represent it, but this is what the RI does... 36 byte[] questionMark = new byte[] { (byte) '?' }; 37 DEFAULT_REPLACEMENTS.put("UTF-8", questionMark); 38 DEFAULT_REPLACEMENTS.put("ISO-8859-1", questionMark); 39 DEFAULT_REPLACEMENTS.put("US-ASCII", questionMark); 40 } 41 42 private static final int INPUT_OFFSET = 0; 43 private static final int OUTPUT_OFFSET = 1; 44 private static final int INVALID_CHAR_COUNT = 2; 45 /* 46 * data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed 47 * data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written 48 * data[INVALID_CHARS] = number of invalid chars 49 */ 50 private int[] data = new int[3]; 51 52 /* handle to the ICU converter that is opened */ 53 private final long converterHandle; 54 55 private char[] input = null; 56 private byte[] output = null; 57 58 private char[] allocatedInput = null; 59 private byte[] allocatedOutput = null; 60 61 // These instance variables are always assigned in the methods before being used. This class 62 // is inherently thread-unsafe so we don't have to worry about synchronization. 63 private int inEnd; 64 private int outEnd; 65 66 public static CharsetEncoderICU newInstance(Charset cs, String icuCanonicalName) { 67 // This complexity is necessary to ensure that even if the constructor, superclass 68 // constructor, or call to updateCallback throw, we still free the native peer. 69 long address = 0; 70 try { 71 address = NativeConverter.openConverter(icuCanonicalName); 72 float averageBytesPerChar = NativeConverter.getAveBytesPerChar(address); 73 float maxBytesPerChar = NativeConverter.getMaxBytesPerChar(address); 74 byte[] replacement = makeReplacement(icuCanonicalName, address); 75 CharsetEncoderICU result = new CharsetEncoderICU(cs, averageBytesPerChar, maxBytesPerChar, replacement, address); 76 address = 0; // CharsetEncoderICU has taken ownership; its finalizer will do the free. 77 return result; 78 } finally { 79 if (address != 0) { 80 NativeConverter.closeConverter(address); 81 } 82 } 83 } 84 85 private static byte[] makeReplacement(String icuCanonicalName, long address) { 86 // We have our own map of RI-compatible default replacements (where ICU disagrees)... 87 byte[] replacement = DEFAULT_REPLACEMENTS.get(icuCanonicalName); 88 if (replacement != null) { 89 return replacement.clone(); 90 } 91 // ...but fall back to asking ICU. 92 return NativeConverter.getSubstitutionBytes(address); 93 } 94 95 private CharsetEncoderICU(Charset cs, float averageBytesPerChar, float maxBytesPerChar, byte[] replacement, long address) { 96 super(cs, averageBytesPerChar, maxBytesPerChar, replacement, true); 97 // Our native peer needs to know what just happened... 98 this.converterHandle = address; 99 NativeConverter.registerConverter(this, converterHandle); 100 updateCallback(); 101 } 102 103 @Override protected void implReplaceWith(byte[] newReplacement) { 104 updateCallback(); 105 } 106 107 @Override protected void implOnMalformedInput(CodingErrorAction newAction) { 108 updateCallback(); 109 } 110 111 @Override protected void implOnUnmappableCharacter(CodingErrorAction newAction) { 112 updateCallback(); 113 } 114 115 private void updateCallback() { 116 NativeConverter.setCallbackEncode(converterHandle, this); 117 } 118 119 @Override protected void implReset() { 120 NativeConverter.resetCharToByte(converterHandle); 121 data[INPUT_OFFSET] = 0; 122 data[OUTPUT_OFFSET] = 0; 123 data[INVALID_CHAR_COUNT] = 0; 124 output = null; 125 input = null; 126 allocatedInput = null; 127 allocatedOutput = null; 128 inEnd = 0; 129 outEnd = 0; 130 } 131 132 @Override protected CoderResult implFlush(ByteBuffer out) { 133 try { 134 // ICU needs to see an empty input. 135 input = EmptyArray.CHAR; 136 inEnd = 0; 137 data[INPUT_OFFSET] = 0; 138 139 data[OUTPUT_OFFSET] = getArray(out); 140 data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors. 141 142 int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, true); 143 if (ICU.U_FAILURE(error)) { 144 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) { 145 return CoderResult.OVERFLOW; 146 } else if (error == ICU.U_TRUNCATED_CHAR_FOUND) { 147 if (data[INVALID_CHAR_COUNT] > 0) { 148 return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]); 149 } 150 } 151 } 152 return CoderResult.UNDERFLOW; 153 } finally { 154 setPosition(out); 155 implReset(); 156 } 157 } 158 159 @Override protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 160 if (!in.hasRemaining()) { 161 return CoderResult.UNDERFLOW; 162 } 163 164 data[INPUT_OFFSET] = getArray(in); 165 data[OUTPUT_OFFSET]= getArray(out); 166 data[INVALID_CHAR_COUNT] = 0; // Make sure we don't see earlier errors. 167 168 try { 169 int error = NativeConverter.encode(converterHandle, input, inEnd, output, outEnd, data, false); 170 if (ICU.U_FAILURE(error)) { 171 if (error == ICU.U_BUFFER_OVERFLOW_ERROR) { 172 return CoderResult.OVERFLOW; 173 } else if (error == ICU.U_INVALID_CHAR_FOUND) { 174 return CoderResult.unmappableForLength(data[INVALID_CHAR_COUNT]); 175 } else if (error == ICU.U_ILLEGAL_CHAR_FOUND) { 176 return CoderResult.malformedForLength(data[INVALID_CHAR_COUNT]); 177 } else { 178 throw new AssertionError(error); 179 } 180 } 181 // Decoding succeeded: give us more data. 182 return CoderResult.UNDERFLOW; 183 } finally { 184 setPosition(in); 185 setPosition(out); 186 } 187 } 188 189 private int getArray(ByteBuffer out) { 190 if (out.hasArray()) { 191 output = out.array(); 192 outEnd = out.arrayOffset() + out.limit(); 193 return out.arrayOffset() + out.position(); 194 } else { 195 outEnd = out.remaining(); 196 if (allocatedOutput == null || outEnd > allocatedOutput.length) { 197 allocatedOutput = new byte[outEnd]; 198 } 199 // The array's start position is 0 200 output = allocatedOutput; 201 return 0; 202 } 203 } 204 205 private int getArray(CharBuffer in) { 206 if (in.hasArray()) { 207 input = in.array(); 208 inEnd = in.arrayOffset() + in.limit(); 209 return in.arrayOffset() + in.position(); 210 } else { 211 inEnd = in.remaining(); 212 if (allocatedInput == null || inEnd > allocatedInput.length) { 213 allocatedInput = new char[inEnd]; 214 } 215 // Copy the input buffer into the allocated array. 216 int pos = in.position(); 217 in.get(allocatedInput, 0, inEnd); 218 in.position(pos); 219 // The array's start position is 0 220 input = allocatedInput; 221 return 0; 222 } 223 } 224 225 private void setPosition(ByteBuffer out) { 226 if (out.hasArray()) { 227 out.position(data[OUTPUT_OFFSET] - out.arrayOffset()); 228 } else { 229 out.put(output, 0, data[OUTPUT_OFFSET]); 230 } 231 // release reference to output array, which may not be ours 232 output = null; 233 } 234 235 private void setPosition(CharBuffer in) { 236 int position = in.position() + data[INPUT_OFFSET] - data[INVALID_CHAR_COUNT]; 237 if (position < 0) { 238 // The calculated position might be negative if we encountered an 239 // invalid char that spanned input buffers. We adjust it to 0 in this case. 240 // 241 // NOTE: The API doesn't allow us to adjust the position of the previous 242 // input buffer. (Doing that wouldn't serve any useful purpose anyway.) 243 position = 0; 244 } 245 246 in.position(position); 247 // release reference to input array, which may not be ours 248 input = null; 249 } 250 } 251