Home | History | Annotate | Download | only in charset
      1 /**
      2  *******************************************************************************
      3  * Copyright (C) 2006-2011, International Business Machines Corporation and    *
      4  * others. All Rights Reserved.                                                *
      5  *******************************************************************************
      6  *
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.charset;
     10 
     11 import java.nio.ByteBuffer;
     12 import java.nio.CharBuffer;
     13 import java.nio.IntBuffer;
     14 import java.nio.charset.CharsetDecoder;
     15 import java.nio.charset.CharsetEncoder;
     16 import java.nio.charset.CoderResult;
     17 
     18 import com.ibm.icu.text.UTF16;
     19 import com.ibm.icu.text.UnicodeSet;
     20 
     21 class CharsetASCII extends CharsetICU {
     22     protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a };
     23 
     24     public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
     25         super(icuCanonicalName, javaCanonicalName, aliases);
     26         maxBytesPerChar = 1;
     27         minBytesPerChar = 1;
     28         maxCharsPerByte = 1;
     29     }
     30 
     31     class CharsetDecoderASCII extends CharsetDecoderICU {
     32 
     33         public CharsetDecoderASCII(CharsetICU cs) {
     34             super(cs);
     35         }
     36 
     37         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
     38                 boolean flush) {
     39             if (!source.hasRemaining()) {
     40                 /* no input, nothing to do */
     41                 return CoderResult.UNDERFLOW;
     42             }
     43             if (!target.hasRemaining()) {
     44                 /* no output available, can't do anything */
     45                 return CoderResult.OVERFLOW;
     46             }
     47 
     48             CoderResult cr;
     49             int oldSource = source.position();
     50             int oldTarget = target.position();
     51 
     52             if (source.hasArray() && target.hasArray()) {
     53                 /* optimized loop */
     54 
     55                 /*
     56                  * extract arrays from the buffers and obtain various constant values that will be
     57                  * necessary in the core loop
     58                  */
     59                 byte[] sourceArray = source.array();
     60                 int sourceOffset = source.arrayOffset();
     61                 int sourceIndex = oldSource + sourceOffset;
     62                 int sourceLength = source.limit() - oldSource;
     63 
     64                 char[] targetArray = target.array();
     65                 int targetOffset = target.arrayOffset();
     66                 int targetIndex = oldTarget + targetOffset;
     67                 int targetLength = target.limit() - oldTarget;
     68 
     69                 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
     70                         + sourceIndex;
     71                 int offset = targetIndex - sourceIndex;
     72 
     73                 /*
     74                  * perform the core loop... if it returns null, it must be due to an overflow or
     75                  * underflow
     76                  */
     77                 cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit);
     78                 if (cr == null) {
     79                     if (sourceLength <= targetLength) {
     80                         source.position(oldSource + sourceLength);
     81                         target.position(oldTarget + sourceLength);
     82                         cr = CoderResult.UNDERFLOW;
     83                     } else {
     84                         source.position(oldSource + targetLength);
     85                         target.position(oldTarget + targetLength);
     86                         cr = CoderResult.OVERFLOW;
     87                     }
     88                 }
     89             } else {
     90                 /* unoptimized loop */
     91                 cr = decodeLoopCoreUnoptimized(source, target);
     92                 if (cr == CoderResult.OVERFLOW) {
     93                     /* the target is full */
     94                     source.position(source.position() - 1); /* rewind by 1 */
     95                 }
     96             }
     97 
     98             /* set offsets since the start */
     99             if (offsets != null) {
    100                 int count = target.position() - oldTarget;
    101                 int sourceIndex = -1;
    102                 while (--count >= 0) offsets.put(++sourceIndex);
    103             }
    104 
    105             return cr;
    106         }
    107 
    108         protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
    109                 byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
    110             int i, ch = 0;
    111 
    112             /*
    113              * perform ascii conversion from the source array to the target array, making sure each
    114              * byte in the source is within the correct range
    115              */
    116             for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++)
    117                 targetArray[i + offset] = (char) ch;
    118 
    119             /*
    120              * if some byte was not in the correct range, we need to deal with this byte by calling
    121              * decodeMalformedOrUnmappable and move the source and target positions to reflect the
    122              * early termination of the loop
    123              */
    124             if ((ch & 0x80) != 0) {
    125                 source.position(i + 1);
    126                 target.position(i + offset);
    127                 return decodeMalformedOrUnmappable(ch);
    128             } else
    129                 return null;
    130         }
    131 
    132         protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) {
    133             int ch = 0;
    134 
    135             /*
    136              * perform ascii conversion from the source buffer to the target buffer, making sure
    137              * each byte in the source is within the correct range
    138              */
    139             while (source.hasRemaining()) {
    140                 ch = source.get() & 0xff;
    141 
    142                 if ((ch & 0x80) == 0) {
    143                     if (target.hasRemaining()) {
    144                         target.put((char)ch);
    145                     } else {
    146                         return CoderResult.OVERFLOW;
    147                     }
    148                 } else {
    149                     /*
    150                      * if we reach here, it's because a character was not in the correct range, and we need
    151                      * to deak with this by calling decodeMalformedOrUnmappable
    152                      */
    153                     return decodeMalformedOrUnmappable(ch);
    154                 }
    155             }
    156 
    157             return CoderResult.UNDERFLOW;
    158         }
    159 
    160         protected CoderResult decodeMalformedOrUnmappable(int ch) {
    161             /*
    162              * put the guilty character into toUBytesArray and return a message saying that the
    163              * character was malformed and of length 1.
    164              */
    165             toUBytesArray[0] = (byte) ch;
    166             toULength = 1;
    167             return CoderResult.malformedForLength(1);
    168         }
    169     }
    170 
    171     class CharsetEncoderASCII extends CharsetEncoderICU {
    172 
    173         public CharsetEncoderASCII(CharsetICU cs) {
    174             super(cs, fromUSubstitution);
    175             implReset();
    176         }
    177 
    178         private final static int NEED_TO_WRITE_BOM = 1;
    179 
    180         protected void implReset() {
    181             super.implReset();
    182             fromUnicodeStatus = NEED_TO_WRITE_BOM;
    183         }
    184 
    185         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
    186                 boolean flush) {
    187             if (!source.hasRemaining()) {
    188                 /* no input, nothing to do */
    189                 return CoderResult.UNDERFLOW;
    190             }
    191             if (!target.hasRemaining()) {
    192                 /* no output available, can't do anything */
    193                 return CoderResult.OVERFLOW;
    194             }
    195 
    196             CoderResult cr;
    197             int oldSource = source.position();
    198             int oldTarget = target.position();
    199 
    200             if (fromUChar32 != 0) {
    201                 /*
    202                  * if we have a leading character in fromUChar32 that needs to be dealt with, we
    203                  * need to check for a matching trail character and taking the appropriate action as
    204                  * dictated by encodeTrail.
    205                  */
    206                 cr = encodeTrail(source, (char) fromUChar32, flush);
    207             } else {
    208                 if (source.hasArray() && target.hasArray()) {
    209                     /* optimized loop */
    210 
    211                     /*
    212                      * extract arrays from the buffers and obtain various constant values that will
    213                      * be necessary in the core loop
    214                      */
    215                     char[] sourceArray = source.array();
    216                     int sourceOffset = source.arrayOffset();
    217                     int sourceIndex = oldSource + sourceOffset;
    218                     int sourceLength = source.limit() - oldSource;
    219 
    220                     byte[] targetArray = target.array();
    221                     int targetOffset = target.arrayOffset();
    222                     int targetIndex = oldTarget + targetOffset;
    223                     int targetLength = target.limit() - oldTarget;
    224 
    225                     int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
    226                             + sourceIndex;
    227                     int offset = targetIndex - sourceIndex;
    228 
    229                     /*
    230                      * perform the core loop... if it returns null, it must be due to an overflow or
    231                      * underflow
    232                      */
    233                     cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush);
    234                     if (cr == null) {
    235                         if (sourceLength <= targetLength) {
    236                             source.position(oldSource + sourceLength);
    237                             target.position(oldTarget + sourceLength);
    238                             cr = CoderResult.UNDERFLOW;
    239                         } else {
    240                             source.position(oldSource + targetLength);
    241                             target.position(oldTarget + targetLength);
    242                             cr = CoderResult.OVERFLOW;
    243                         }
    244                     }
    245                 } else {
    246                     /* unoptimized loop */
    247 
    248                     cr = encodeLoopCoreUnoptimized(source, target, flush);
    249 
    250                     if (cr == CoderResult.OVERFLOW) {
    251                         source.position(source.position() - 1); /* rewind by 1 */
    252                     }
    253                 }
    254             }
    255 
    256             /* set offsets since the start */
    257             if (offsets != null) {
    258                 int count = target.position() - oldTarget;
    259                 int sourceIndex = -1;
    260                 while (--count >= 0) offsets.put(++sourceIndex);
    261             }
    262 
    263             return cr;
    264         }
    265 
    266         protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
    267                 char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
    268                 boolean flush) {
    269             int i, ch = 0;
    270 
    271             /*
    272              * perform ascii conversion from the source array to the target array, making sure each
    273              * char in the source is within the correct range
    274              */
    275             for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++)
    276                 targetArray[i + offset] = (byte) ch;
    277 
    278             /*
    279              * if some byte was not in the correct range, we need to deal with this byte by calling
    280              * encodeMalformedOrUnmappable and move the source and target positions to reflect the
    281              * early termination of the loop
    282              */
    283             if ((ch & 0xff80) != 0) {
    284                 source.position((i + 1) - source.arrayOffset());
    285                 target.position(i + offset);
    286                 return encodeMalformedOrUnmappable(source, ch, flush);
    287             } else
    288                 return null;
    289         }
    290 
    291         protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) {
    292             int ch;
    293 
    294             /*
    295              * perform ascii conversion from the source buffer to the target buffer, making sure
    296              * each char in the source is within the correct range
    297              */
    298             while (source.hasRemaining()) {
    299                 ch = (int) source.get();
    300 
    301                 if ((ch & 0xff80) == 0) {
    302                     if (target.hasRemaining()) {
    303                         target.put((byte) ch);
    304                     } else {
    305                         return CoderResult.OVERFLOW;
    306                     }
    307                 } else {
    308                     /*
    309                      * if we reach here, it's because a character was not in the correct range, and we need
    310                      * to deak with this by calling encodeMalformedOrUnmappable.
    311                      */
    312                     return encodeMalformedOrUnmappable(source, ch, flush);
    313                 }
    314             }
    315 
    316             return CoderResult.UNDERFLOW;
    317         }
    318 
    319         protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) {
    320             /*
    321              * if the character is a lead surrogate, we need to call encodeTrail to attempt to match
    322              * it up with a trail surrogate. if not, the character is unmappable.
    323              */
    324             return (UTF16.isSurrogate((char) ch))
    325                     ? encodeTrail(source, (char) ch, flush)
    326                     : CoderResult.unmappableForLength(1);
    327         }
    328 
    329         private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) {
    330             /*
    331              * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null,
    332              * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable.
    333              */
    334             CoderResult cr = handleSurrogates(source, lead);
    335             if (cr != null) {
    336                 return cr;
    337             } else {
    338                 //source.position(source.position() - 2);
    339                 return CoderResult.unmappableForLength(2);
    340             }
    341         }
    342 
    343     }
    344 
    345     public CharsetDecoder newDecoder() {
    346         return new CharsetDecoderASCII(this);
    347     }
    348 
    349     public CharsetEncoder newEncoder() {
    350         return new CharsetEncoderASCII(this);
    351     }
    352 
    353     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
    354         setFillIn.add(0,0x7f);
    355      }
    356 }
    357