Home | History | Annotate | Download | only in charset
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /**
      4  *******************************************************************************
      5  * Copyright (C) 2006-2011, International Business Machines Corporation and    *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  *
      9  *******************************************************************************
     10  */
     11 package com.ibm.icu.charset;
     12 
     13 import java.nio.ByteBuffer;
     14 import java.nio.CharBuffer;
     15 import java.nio.IntBuffer;
     16 import java.nio.charset.CharsetDecoder;
     17 import java.nio.charset.CharsetEncoder;
     18 import java.nio.charset.CoderResult;
     19 
     20 import com.ibm.icu.text.UTF16;
     21 import com.ibm.icu.text.UnicodeSet;
     22 
     23 class CharsetASCII extends CharsetICU {
     24     protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a };
     25 
     26     public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
     27         super(icuCanonicalName, javaCanonicalName, aliases);
     28         maxBytesPerChar = 1;
     29         minBytesPerChar = 1;
     30         maxCharsPerByte = 1;
     31     }
     32 
     33     class CharsetDecoderASCII extends CharsetDecoderICU {
     34 
     35         public CharsetDecoderASCII(CharsetICU cs) {
     36             super(cs);
     37         }
     38 
     39         @Override
     40         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
     41                 boolean flush) {
     42             if (!source.hasRemaining()) {
     43                 /* no input, nothing to do */
     44                 return CoderResult.UNDERFLOW;
     45             }
     46             if (!target.hasRemaining()) {
     47                 /* no output available, can't do anything */
     48                 return CoderResult.OVERFLOW;
     49             }
     50 
     51             CoderResult cr;
     52             int oldSource = source.position();
     53             int oldTarget = target.position();
     54 
     55             if (source.hasArray() && target.hasArray()) {
     56                 /* optimized loop */
     57 
     58                 /*
     59                  * extract arrays from the buffers and obtain various constant values that will be
     60                  * necessary in the core loop
     61                  */
     62                 byte[] sourceArray = source.array();
     63                 int sourceOffset = source.arrayOffset();
     64                 int sourceIndex = oldSource + sourceOffset;
     65                 int sourceLength = source.limit() - oldSource;
     66 
     67                 char[] targetArray = target.array();
     68                 int targetOffset = target.arrayOffset();
     69                 int targetIndex = oldTarget + targetOffset;
     70                 int targetLength = target.limit() - oldTarget;
     71 
     72                 int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
     73                         + sourceIndex;
     74                 int offset = targetIndex - sourceIndex;
     75 
     76                 /*
     77                  * perform the core loop... if it returns null, it must be due to an overflow or
     78                  * underflow
     79                  */
     80                 cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit);
     81                 if (cr == null) {
     82                     if (sourceLength <= targetLength) {
     83                         source.position(oldSource + sourceLength);
     84                         target.position(oldTarget + sourceLength);
     85                         cr = CoderResult.UNDERFLOW;
     86                     } else {
     87                         source.position(oldSource + targetLength);
     88                         target.position(oldTarget + targetLength);
     89                         cr = CoderResult.OVERFLOW;
     90                     }
     91                 }
     92             } else {
     93                 /* unoptimized loop */
     94                 cr = decodeLoopCoreUnoptimized(source, target);
     95                 if (cr == CoderResult.OVERFLOW) {
     96                     /* the target is full */
     97                     source.position(source.position() - 1); /* rewind by 1 */
     98                 }
     99             }
    100 
    101             /* set offsets since the start */
    102             if (offsets != null) {
    103                 int count = target.position() - oldTarget;
    104                 int sourceIndex = -1;
    105                 while (--count >= 0) offsets.put(++sourceIndex);
    106             }
    107 
    108             return cr;
    109         }
    110 
    111         protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
    112                 byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
    113             int i, ch = 0;
    114 
    115             /*
    116              * perform ascii conversion from the source array to the target array, making sure each
    117              * byte in the source is within the correct range
    118              */
    119             for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++)
    120                 targetArray[i + offset] = (char) ch;
    121 
    122             /*
    123              * if some byte was not in the correct range, we need to deal with this byte by calling
    124              * decodeMalformedOrUnmappable and move the source and target positions to reflect the
    125              * early termination of the loop
    126              */
    127             if ((ch & 0x80) != 0) {
    128                 source.position(i + 1);
    129                 target.position(i + offset);
    130                 return decodeMalformedOrUnmappable(ch);
    131             } else
    132                 return null;
    133         }
    134 
    135         protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) {
    136             int ch = 0;
    137 
    138             /*
    139              * perform ascii conversion from the source buffer to the target buffer, making sure
    140              * each byte in the source is within the correct range
    141              */
    142             while (source.hasRemaining()) {
    143                 ch = source.get() & 0xff;
    144 
    145                 if ((ch & 0x80) == 0) {
    146                     if (target.hasRemaining()) {
    147                         target.put((char)ch);
    148                     } else {
    149                         return CoderResult.OVERFLOW;
    150                     }
    151                 } else {
    152                     /*
    153                      * if we reach here, it's because a character was not in the correct range, and we need
    154                      * to deak with this by calling decodeMalformedOrUnmappable
    155                      */
    156                     return decodeMalformedOrUnmappable(ch);
    157                 }
    158             }
    159 
    160             return CoderResult.UNDERFLOW;
    161         }
    162 
    163         protected CoderResult decodeMalformedOrUnmappable(int ch) {
    164             /*
    165              * put the guilty character into toUBytesArray and return a message saying that the
    166              * character was malformed and of length 1.
    167              */
    168             toUBytesArray[0] = (byte) ch;
    169             toULength = 1;
    170             return CoderResult.malformedForLength(1);
    171         }
    172     }
    173 
    174     class CharsetEncoderASCII extends CharsetEncoderICU {
    175 
    176         public CharsetEncoderASCII(CharsetICU cs) {
    177             super(cs, fromUSubstitution);
    178             implReset();
    179         }
    180 
    181         private final static int NEED_TO_WRITE_BOM = 1;
    182 
    183         @Override
    184         protected void implReset() {
    185             super.implReset();
    186             fromUnicodeStatus = NEED_TO_WRITE_BOM;
    187         }
    188 
    189         @Override
    190         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
    191                 boolean flush) {
    192             if (!source.hasRemaining()) {
    193                 /* no input, nothing to do */
    194                 return CoderResult.UNDERFLOW;
    195             }
    196             if (!target.hasRemaining()) {
    197                 /* no output available, can't do anything */
    198                 return CoderResult.OVERFLOW;
    199             }
    200 
    201             CoderResult cr;
    202             int oldSource = source.position();
    203             int oldTarget = target.position();
    204 
    205             if (fromUChar32 != 0) {
    206                 /*
    207                  * if we have a leading character in fromUChar32 that needs to be dealt with, we
    208                  * need to check for a matching trail character and taking the appropriate action as
    209                  * dictated by encodeTrail.
    210                  */
    211                 cr = encodeTrail(source, (char) fromUChar32, flush);
    212             } else {
    213                 if (source.hasArray() && target.hasArray()) {
    214                     /* optimized loop */
    215 
    216                     /*
    217                      * extract arrays from the buffers and obtain various constant values that will
    218                      * be necessary in the core loop
    219                      */
    220                     char[] sourceArray = source.array();
    221                     int sourceOffset = source.arrayOffset();
    222                     int sourceIndex = oldSource + sourceOffset;
    223                     int sourceLength = source.limit() - oldSource;
    224 
    225                     byte[] targetArray = target.array();
    226                     int targetOffset = target.arrayOffset();
    227                     int targetIndex = oldTarget + targetOffset;
    228                     int targetLength = target.limit() - oldTarget;
    229 
    230                     int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
    231                             + sourceIndex;
    232                     int offset = targetIndex - sourceIndex;
    233 
    234                     /*
    235                      * perform the core loop... if it returns null, it must be due to an overflow or
    236                      * underflow
    237                      */
    238                     cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush);
    239                     if (cr == null) {
    240                         if (sourceLength <= targetLength) {
    241                             source.position(oldSource + sourceLength);
    242                             target.position(oldTarget + sourceLength);
    243                             cr = CoderResult.UNDERFLOW;
    244                         } else {
    245                             source.position(oldSource + targetLength);
    246                             target.position(oldTarget + targetLength);
    247                             cr = CoderResult.OVERFLOW;
    248                         }
    249                     }
    250                 } else {
    251                     /* unoptimized loop */
    252 
    253                     cr = encodeLoopCoreUnoptimized(source, target, flush);
    254 
    255                     if (cr == CoderResult.OVERFLOW) {
    256                         source.position(source.position() - 1); /* rewind by 1 */
    257                     }
    258                 }
    259             }
    260 
    261             /* set offsets since the start */
    262             if (offsets != null) {
    263                 int count = target.position() - oldTarget;
    264                 int sourceIndex = -1;
    265                 while (--count >= 0) offsets.put(++sourceIndex);
    266             }
    267 
    268             return cr;
    269         }
    270 
    271         protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
    272                 char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
    273                 boolean flush) {
    274             int i, ch = 0;
    275 
    276             /*
    277              * perform ascii conversion from the source array to the target array, making sure each
    278              * char in the source is within the correct range
    279              */
    280             for (i = oldSource; i < limit && (((ch = sourceArray[i]) & 0xff80) == 0); i++)
    281                 targetArray[i + offset] = (byte) ch;
    282 
    283             /*
    284              * if some byte was not in the correct range, we need to deal with this byte by calling
    285              * encodeMalformedOrUnmappable and move the source and target positions to reflect the
    286              * early termination of the loop
    287              */
    288             if ((ch & 0xff80) != 0) {
    289                 source.position((i + 1) - source.arrayOffset());
    290                 target.position(i + offset);
    291                 return encodeMalformedOrUnmappable(source, ch, flush);
    292             } else
    293                 return null;
    294         }
    295 
    296         protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) {
    297             int ch;
    298 
    299             /*
    300              * perform ascii conversion from the source buffer to the target buffer, making sure
    301              * each char in the source is within the correct range
    302              */
    303             while (source.hasRemaining()) {
    304                 ch = source.get();
    305 
    306                 if ((ch & 0xff80) == 0) {
    307                     if (target.hasRemaining()) {
    308                         target.put((byte) ch);
    309                     } else {
    310                         return CoderResult.OVERFLOW;
    311                     }
    312                 } else {
    313                     /*
    314                      * if we reach here, it's because a character was not in the correct range, and we need
    315                      * to deak with this by calling encodeMalformedOrUnmappable.
    316                      */
    317                     return encodeMalformedOrUnmappable(source, ch, flush);
    318                 }
    319             }
    320 
    321             return CoderResult.UNDERFLOW;
    322         }
    323 
    324         protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) {
    325             /*
    326              * if the character is a lead surrogate, we need to call encodeTrail to attempt to match
    327              * it up with a trail surrogate. if not, the character is unmappable.
    328              */
    329             return (UTF16.isSurrogate((char) ch))
    330                     ? encodeTrail(source, (char) ch, flush)
    331                     : CoderResult.unmappableForLength(1);
    332         }
    333 
    334         private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) {
    335             /*
    336              * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null,
    337              * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable.
    338              */
    339             CoderResult cr = handleSurrogates(source, lead);
    340             if (cr != null) {
    341                 return cr;
    342             } else {
    343                 //source.position(source.position() - 2);
    344                 return CoderResult.unmappableForLength(2);
    345             }
    346         }
    347 
    348     }
    349 
    350     @Override
    351     public CharsetDecoder newDecoder() {
    352         return new CharsetDecoderASCII(this);
    353     }
    354 
    355     @Override
    356     public CharsetEncoder newEncoder() {
    357         return new CharsetEncoderASCII(this);
    358     }
    359 
    360     @Override
    361     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
    362         setFillIn.add(0,0x7f);
    363      }
    364 }
    365