1 /** 2 ******************************************************************************* 3 * Copyright (C) 2006-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.charset; 10 11 import java.nio.ByteBuffer; 12 import java.nio.CharBuffer; 13 import java.nio.IntBuffer; 14 import java.nio.charset.CharsetDecoder; 15 import java.nio.charset.CharsetEncoder; 16 import java.nio.charset.CoderResult; 17 18 import com.ibm.icu.text.UTF16; 19 import com.ibm.icu.text.UnicodeSet; 20 21 class CharsetASCII extends CharsetICU { 22 protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a }; 23 24 public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) { 25 super(icuCanonicalName, javaCanonicalName, aliases); 26 maxBytesPerChar = 1; 27 minBytesPerChar = 1; 28 maxCharsPerByte = 1; 29 } 30 31 class CharsetDecoderASCII extends CharsetDecoderICU { 32 33 public CharsetDecoderASCII(CharsetICU cs) { 34 super(cs); 35 } 36 37 protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, 38 boolean flush) { 39 if (!source.hasRemaining()) { 40 /* no input, nothing to do */ 41 return CoderResult.UNDERFLOW; 42 } 43 if (!target.hasRemaining()) { 44 /* no output available, can't do anything */ 45 return CoderResult.OVERFLOW; 46 } 47 48 CoderResult cr; 49 int oldSource = source.position(); 50 int oldTarget = target.position(); 51 52 if (source.hasArray() && target.hasArray()) { 53 /* optimized loop */ 54 55 /* 56 * extract arrays from the buffers and obtain various constant values that will be 57 * necessary in the core loop 58 */ 59 byte[] sourceArray = source.array(); 60 int sourceOffset = source.arrayOffset(); 61 int sourceIndex = oldSource + sourceOffset; 62 int sourceLength = source.limit() - oldSource; 63 64 char[] targetArray = target.array(); 65 int targetOffset = target.arrayOffset(); 66 int targetIndex = oldTarget + targetOffset; 67 int targetLength = target.limit() - oldTarget; 68 69 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength) 70 + sourceIndex; 71 int offset = targetIndex - sourceIndex; 72 73 /* 74 * perform the core loop... if it returns null, it must be due to an overflow or 75 * underflow 76 */ 77 cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit); 78 if (cr == null) { 79 if (sourceLength <= targetLength) { 80 source.position(oldSource + sourceLength); 81 target.position(oldTarget + sourceLength); 82 cr = CoderResult.UNDERFLOW; 83 } else { 84 source.position(oldSource + targetLength); 85 target.position(oldTarget + targetLength); 86 cr = CoderResult.OVERFLOW; 87 } 88 } 89 } else { 90 /* unoptimized loop */ 91 cr = decodeLoopCoreUnoptimized(source, target); 92 if (cr == CoderResult.OVERFLOW) { 93 /* the target is full */ 94 source.position(source.position() - 1); /* rewind by 1 */ 95 } 96 } 97 98 /* set offsets since the start */ 99 if (offsets != null) { 100 int count = target.position() - oldTarget; 101 int sourceIndex = -1; 102 while (--count >= 0) offsets.put(++sourceIndex); 103 } 104 105 return cr; 106 } 107 108 protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target, 109 byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) { 110 int i, ch = 0; 111 112 /* 113 * perform ascii conversion from the source array to the target array, making sure each 114 * byte in the source is within the correct range 115 */ 116 for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++) 117 targetArray[i + offset] = (char) ch; 118 119 /* 120 * if some byte was not in the correct range, we need to deal with this byte by calling 121 * decodeMalformedOrUnmappable and move the source and target positions to reflect the 122 * early termination of the loop 123 */ 124 if ((ch & 0x80) != 0) { 125 source.position(i + 1); 126 target.position(i + offset); 127 return decodeMalformedOrUnmappable(ch); 128 } else 129 return null; 130 } 131 132 protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) { 133 int ch = 0; 134 135 /* 136 * perform ascii conversion from the source buffer to the target buffer, making sure 137 * each byte in the source is within the correct range 138 */ 139 while (source.hasRemaining()) { 140 ch = source.get() & 0xff; 141 142 if ((ch & 0x80) == 0) { 143 if (target.hasRemaining()) { 144 target.put((char)ch); 145 } else { 146 return CoderResult.OVERFLOW; 147 } 148 } else { 149 /* 150 * if we reach here, it's because a character was not in the correct range, and we need 151 * to deak with this by calling decodeMalformedOrUnmappable 152 */ 153 return decodeMalformedOrUnmappable(ch); 154 } 155 } 156 157 return CoderResult.UNDERFLOW; 158 } 159 160 protected CoderResult decodeMalformedOrUnmappable(int ch) { 161 /* 162 * put the guilty character into toUBytesArray and return a message saying that the 163 * character was malformed and of length 1. 164 */ 165 toUBytesArray[0] = (byte) ch; 166 toULength = 1; 167 return CoderResult.malformedForLength(1); 168 } 169 } 170 171 class CharsetEncoderASCII extends CharsetEncoderICU { 172 173 public CharsetEncoderASCII(CharsetICU cs) { 174 super(cs, fromUSubstitution); 175 implReset(); 176 } 177 178 private final static int NEED_TO_WRITE_BOM = 1; 179 180 protected void implReset() { 181 super.implReset(); 182 fromUnicodeStatus = NEED_TO_WRITE_BOM; 183 } 184 185 protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, 186 boolean flush) { 187 if (!source.hasRemaining()) { 188 /* no input, nothing to do */ 189 return CoderResult.UNDERFLOW; 190 } 191 if (!target.hasRemaining()) { 192 /* no output available, can't do anything */ 193 return CoderResult.OVERFLOW; 194 } 195 196 CoderResult cr; 197 int oldSource = source.position(); 198 int oldTarget = target.position(); 199 200 if (fromUChar32 != 0) { 201 /* 202 * if we have a leading character in fromUChar32 that needs to be dealt with, we 203 * need to check for a matching trail character and taking the appropriate action as 204 * dictated by encodeTrail. 205 */ 206 cr = encodeTrail(source, (char) fromUChar32, flush); 207 } else { 208 if (source.hasArray() && target.hasArray()) { 209 /* optimized loop */ 210 211 /* 212 * extract arrays from the buffers and obtain various constant values that will 213 * be necessary in the core loop 214 */ 215 char[] sourceArray = source.array(); 216 int sourceOffset = source.arrayOffset(); 217 int sourceIndex = oldSource + sourceOffset; 218 int sourceLength = source.limit() - oldSource; 219 220 byte[] targetArray = target.array(); 221 int targetOffset = target.arrayOffset(); 222 int targetIndex = oldTarget + targetOffset; 223 int targetLength = target.limit() - oldTarget; 224 225 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength) 226 + sourceIndex; 227 int offset = targetIndex - sourceIndex; 228 229 /* 230 * perform the core loop... if it returns null, it must be due to an overflow or 231 * underflow 232 */ 233 cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush); 234 if (cr == null) { 235 if (sourceLength <= targetLength) { 236 source.position(oldSource + sourceLength); 237 target.position(oldTarget + sourceLength); 238 cr = CoderResult.UNDERFLOW; 239 } else { 240 source.position(oldSource + targetLength); 241 target.position(oldTarget + targetLength); 242 cr = CoderResult.OVERFLOW; 243 } 244 } 245 } else { 246 /* unoptimized loop */ 247 248 cr = encodeLoopCoreUnoptimized(source, target, flush); 249 250 if (cr == CoderResult.OVERFLOW) { 251 source.position(source.position() - 1); /* rewind by 1 */ 252 } 253 } 254 } 255 256 /* set offsets since the start */ 257 if (offsets != null) { 258 int count = target.position() - oldTarget; 259 int sourceIndex = -1; 260 while (--count >= 0) offsets.put(++sourceIndex); 261 } 262 263 return cr; 264 } 265 266 protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target, 267 char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit, 268 boolean flush) { 269 int i, ch = 0; 270 271 /* 272 * perform ascii conversion from the source array to the target array, making sure each 273 * char in the source is within the correct range 274 */ 275 for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++) 276 targetArray[i + offset] = (byte) ch; 277 278 /* 279 * if some byte was not in the correct range, we need to deal with this byte by calling 280 * encodeMalformedOrUnmappable and move the source and target positions to reflect the 281 * early termination of the loop 282 */ 283 if ((ch & 0xff80) != 0) { 284 source.position((i + 1) - source.arrayOffset()); 285 target.position(i + offset); 286 return encodeMalformedOrUnmappable(source, ch, flush); 287 } else 288 return null; 289 } 290 291 protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) { 292 int ch; 293 294 /* 295 * perform ascii conversion from the source buffer to the target buffer, making sure 296 * each char in the source is within the correct range 297 */ 298 while (source.hasRemaining()) { 299 ch = (int) source.get(); 300 301 if ((ch & 0xff80) == 0) { 302 if (target.hasRemaining()) { 303 target.put((byte) ch); 304 } else { 305 return CoderResult.OVERFLOW; 306 } 307 } else { 308 /* 309 * if we reach here, it's because a character was not in the correct range, and we need 310 * to deak with this by calling encodeMalformedOrUnmappable. 311 */ 312 return encodeMalformedOrUnmappable(source, ch, flush); 313 } 314 } 315 316 return CoderResult.UNDERFLOW; 317 } 318 319 protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) { 320 /* 321 * if the character is a lead surrogate, we need to call encodeTrail to attempt to match 322 * it up with a trail surrogate. if not, the character is unmappable. 323 */ 324 return (UTF16.isSurrogate((char) ch)) 325 ? encodeTrail(source, (char) ch, flush) 326 : CoderResult.unmappableForLength(1); 327 } 328 329 private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) { 330 /* 331 * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null, 332 * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable. 333 */ 334 CoderResult cr = handleSurrogates(source, lead); 335 if (cr != null) { 336 return cr; 337 } else { 338 //source.position(source.position() - 2); 339 return CoderResult.unmappableForLength(2); 340 } 341 } 342 343 } 344 345 public CharsetDecoder newDecoder() { 346 return new CharsetDecoderASCII(this); 347 } 348 349 public CharsetEncoder newEncoder() { 350 return new CharsetEncoderASCII(this); 351 } 352 353 void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ 354 setFillIn.add(0,0x7f); 355 } 356 } 357