1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2011, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 * 9 ******************************************************************************* 10 */ 11 package com.ibm.icu.charset; 12 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.IntBuffer; 16 import java.nio.charset.CharsetDecoder; 17 import java.nio.charset.CharsetEncoder; 18 import java.nio.charset.CoderResult; 19 20 import com.ibm.icu.text.UTF16; 21 import com.ibm.icu.text.UnicodeSet; 22 23 class CharsetASCII extends CharsetICU { 24 protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a }; 25 26 public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) { 27 super(icuCanonicalName, javaCanonicalName, aliases); 28 maxBytesPerChar = 1; 29 minBytesPerChar = 1; 30 maxCharsPerByte = 1; 31 } 32 33 class CharsetDecoderASCII extends CharsetDecoderICU { 34 35 public CharsetDecoderASCII(CharsetICU cs) { 36 super(cs); 37 } 38 39 @Override 40 protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, 41 boolean flush) { 42 if (!source.hasRemaining()) { 43 /* no input, nothing to do */ 44 return CoderResult.UNDERFLOW; 45 } 46 if (!target.hasRemaining()) { 47 /* no output available, can't do anything */ 48 return CoderResult.OVERFLOW; 49 } 50 51 CoderResult cr; 52 int oldSource = source.position(); 53 int oldTarget = target.position(); 54 55 if (source.hasArray() && target.hasArray()) { 56 /* optimized loop */ 57 58 /* 59 * extract arrays from the buffers and obtain various constant values that will be 60 * necessary in the core loop 61 */ 62 byte[] sourceArray = source.array(); 63 int sourceOffset = source.arrayOffset(); 64 int sourceIndex = oldSource + sourceOffset; 65 int sourceLength = source.limit() - oldSource; 66 67 char[] targetArray = target.array(); 68 int targetOffset = target.arrayOffset(); 69 int targetIndex = oldTarget + targetOffset; 70 int targetLength = target.limit() - oldTarget; 71 72 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength) 73 + sourceIndex; 74 int offset = targetIndex - sourceIndex; 75 76 /* 77 * perform the core loop... if it returns null, it must be due to an overflow or 78 * underflow 79 */ 80 cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit); 81 if (cr == null) { 82 if (sourceLength <= targetLength) { 83 source.position(oldSource + sourceLength); 84 target.position(oldTarget + sourceLength); 85 cr = CoderResult.UNDERFLOW; 86 } else { 87 source.position(oldSource + targetLength); 88 target.position(oldTarget + targetLength); 89 cr = CoderResult.OVERFLOW; 90 } 91 } 92 } else { 93 /* unoptimized loop */ 94 cr = decodeLoopCoreUnoptimized(source, target); 95 if (cr == CoderResult.OVERFLOW) { 96 /* the target is full */ 97 source.position(source.position() - 1); /* rewind by 1 */ 98 } 99 } 100 101 /* set offsets since the start */ 102 if (offsets != null) { 103 int count = target.position() - oldTarget; 104 int sourceIndex = -1; 105 while (--count >= 0) offsets.put(++sourceIndex); 106 } 107 108 return cr; 109 } 110 111 protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target, 112 byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) { 113 int i, ch = 0; 114 115 /* 116 * perform ascii conversion from the source array to the target array, making sure each 117 * byte in the source is within the correct range 118 */ 119 for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++) 120 targetArray[i + offset] = (char) ch; 121 122 /* 123 * if some byte was not in the correct range, we need to deal with this byte by calling 124 * decodeMalformedOrUnmappable and move the source and target positions to reflect the 125 * early termination of the loop 126 */ 127 if ((ch & 0x80) != 0) { 128 source.position(i + 1); 129 target.position(i + offset); 130 return decodeMalformedOrUnmappable(ch); 131 } else 132 return null; 133 } 134 135 protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) { 136 int ch = 0; 137 138 /* 139 * perform ascii conversion from the source buffer to the target buffer, making sure 140 * each byte in the source is within the correct range 141 */ 142 while (source.hasRemaining()) { 143 ch = source.get() & 0xff; 144 145 if ((ch & 0x80) == 0) { 146 if (target.hasRemaining()) { 147 target.put((char)ch); 148 } else { 149 return CoderResult.OVERFLOW; 150 } 151 } else { 152 /* 153 * if we reach here, it's because a character was not in the correct range, and we need 154 * to deak with this by calling decodeMalformedOrUnmappable 155 */ 156 return decodeMalformedOrUnmappable(ch); 157 } 158 } 159 160 return CoderResult.UNDERFLOW; 161 } 162 163 protected CoderResult decodeMalformedOrUnmappable(int ch) { 164 /* 165 * put the guilty character into toUBytesArray and return a message saying that the 166 * character was malformed and of length 1. 167 */ 168 toUBytesArray[0] = (byte) ch; 169 toULength = 1; 170 return CoderResult.malformedForLength(1); 171 } 172 } 173 174 class CharsetEncoderASCII extends CharsetEncoderICU { 175 176 public CharsetEncoderASCII(CharsetICU cs) { 177 super(cs, fromUSubstitution); 178 implReset(); 179 } 180 181 private final static int NEED_TO_WRITE_BOM = 1; 182 183 @Override 184 protected void implReset() { 185 super.implReset(); 186 fromUnicodeStatus = NEED_TO_WRITE_BOM; 187 } 188 189 @Override 190 protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, 191 boolean flush) { 192 if (!source.hasRemaining()) { 193 /* no input, nothing to do */ 194 return CoderResult.UNDERFLOW; 195 } 196 if (!target.hasRemaining()) { 197 /* no output available, can't do anything */ 198 return CoderResult.OVERFLOW; 199 } 200 201 CoderResult cr; 202 int oldSource = source.position(); 203 int oldTarget = target.position(); 204 205 if (fromUChar32 != 0) { 206 /* 207 * if we have a leading character in fromUChar32 that needs to be dealt with, we 208 * need to check for a matching trail character and taking the appropriate action as 209 * dictated by encodeTrail. 210 */ 211 cr = encodeTrail(source, (char) fromUChar32, flush); 212 } else { 213 if (source.hasArray() && target.hasArray()) { 214 /* optimized loop */ 215 216 /* 217 * extract arrays from the buffers and obtain various constant values that will 218 * be necessary in the core loop 219 */ 220 char[] sourceArray = source.array(); 221 int sourceOffset = source.arrayOffset(); 222 int sourceIndex = oldSource + sourceOffset; 223 int sourceLength = source.limit() - oldSource; 224 225 byte[] targetArray = target.array(); 226 int targetOffset = target.arrayOffset(); 227 int targetIndex = oldTarget + targetOffset; 228 int targetLength = target.limit() - oldTarget; 229 230 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength) 231 + sourceIndex; 232 int offset = targetIndex - sourceIndex; 233 234 /* 235 * perform the core loop... if it returns null, it must be due to an overflow or 236 * underflow 237 */ 238 cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush); 239 if (cr == null) { 240 if (sourceLength <= targetLength) { 241 source.position(oldSource + sourceLength); 242 target.position(oldTarget + sourceLength); 243 cr = CoderResult.UNDERFLOW; 244 } else { 245 source.position(oldSource + targetLength); 246 target.position(oldTarget + targetLength); 247 cr = CoderResult.OVERFLOW; 248 } 249 } 250 } else { 251 /* unoptimized loop */ 252 253 cr = encodeLoopCoreUnoptimized(source, target, flush); 254 255 if (cr == CoderResult.OVERFLOW) { 256 source.position(source.position() - 1); /* rewind by 1 */ 257 } 258 } 259 } 260 261 /* set offsets since the start */ 262 if (offsets != null) { 263 int count = target.position() - oldTarget; 264 int sourceIndex = -1; 265 while (--count >= 0) offsets.put(++sourceIndex); 266 } 267 268 return cr; 269 } 270 271 protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target, 272 char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit, 273 boolean flush) { 274 int i, ch = 0; 275 276 /* 277 * perform ascii conversion from the source array to the target array, making sure each 278 * char in the source is within the correct range 279 */ 280 for (i = oldSource; i < limit && (((ch = sourceArray[i]) & 0xff80) == 0); i++) 281 targetArray[i + offset] = (byte) ch; 282 283 /* 284 * if some byte was not in the correct range, we need to deal with this byte by calling 285 * encodeMalformedOrUnmappable and move the source and target positions to reflect the 286 * early termination of the loop 287 */ 288 if ((ch & 0xff80) != 0) { 289 source.position((i + 1) - source.arrayOffset()); 290 target.position(i + offset); 291 return encodeMalformedOrUnmappable(source, ch, flush); 292 } else 293 return null; 294 } 295 296 protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) { 297 int ch; 298 299 /* 300 * perform ascii conversion from the source buffer to the target buffer, making sure 301 * each char in the source is within the correct range 302 */ 303 while (source.hasRemaining()) { 304 ch = source.get(); 305 306 if ((ch & 0xff80) == 0) { 307 if (target.hasRemaining()) { 308 target.put((byte) ch); 309 } else { 310 return CoderResult.OVERFLOW; 311 } 312 } else { 313 /* 314 * if we reach here, it's because a character was not in the correct range, and we need 315 * to deak with this by calling encodeMalformedOrUnmappable. 316 */ 317 return encodeMalformedOrUnmappable(source, ch, flush); 318 } 319 } 320 321 return CoderResult.UNDERFLOW; 322 } 323 324 protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) { 325 /* 326 * if the character is a lead surrogate, we need to call encodeTrail to attempt to match 327 * it up with a trail surrogate. if not, the character is unmappable. 328 */ 329 return (UTF16.isSurrogate((char) ch)) 330 ? encodeTrail(source, (char) ch, flush) 331 : CoderResult.unmappableForLength(1); 332 } 333 334 private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) { 335 /* 336 * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null, 337 * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable. 338 */ 339 CoderResult cr = handleSurrogates(source, lead); 340 if (cr != null) { 341 return cr; 342 } else { 343 //source.position(source.position() - 2); 344 return CoderResult.unmappableForLength(2); 345 } 346 } 347 348 } 349 350 @Override 351 public CharsetDecoder newDecoder() { 352 return new CharsetDecoderASCII(this); 353 } 354 355 @Override 356 public CharsetEncoder newEncoder() { 357 return new CharsetEncoderASCII(this); 358 } 359 360 @Override 361 void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ 362 setFillIn.add(0,0x7f); 363 } 364 } 365