Home | History | Annotate | Download | only in charset
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2007-2011, International Business Machines Corporation and         *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.charset;
     10 
     11 import java.nio.ByteBuffer;
     12 import java.nio.CharBuffer;
     13 import java.nio.IntBuffer;
     14 import java.nio.charset.CharsetDecoder;
     15 import java.nio.charset.CharsetEncoder;
     16 import java.nio.charset.CoderResult;
     17 
     18 import com.ibm.icu.text.UnicodeSet;
     19 
     20 /**
     21  * @author Michael Ow
     22  *
     23  */
     24 class CharsetUTF7 extends CharsetICU {
     25     private final static String IMAP_NAME="IMAP-mailbox-name";
     26     private boolean useIMAP;
     27     protected byte[] fromUSubstitution=new byte[]{0x3F};
     28 
     29     public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
     30         super(icuCanonicalName, javaCanonicalName, aliases);
     31         maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */
     32         minBytesPerChar=1;
     33         maxCharsPerByte=1;
     34 
     35         useIMAP=false;
     36 
     37         if (icuCanonicalName.equals(IMAP_NAME)) {
     38             useIMAP=true;
     39         }
     40     }
     41 
     42     //private static boolean inSetD(char c) {
     43     //    return (
     44     //            (char)(c - 97) < 26 || (char)(c - 65) < 26 || /* letters */
     45     //            (char)(c - 48) < 10 ||                        /* digits */
     46     //            (char)(c - 39) < 3 ||                          /* ' () */
     47     //            (char)(c - 44) < 4 ||                          /* ,-./ */
     48     //            (c==58) || (c==63)            /* :? */
     49     //            );
     50     //}
     51 
     52     //private static boolean inSetO(char c) {
     53     //    return (
     54     //            (char)(c - 33) < 6 ||                           /* !"#$%& */
     55     //            (char)(c - 59) < 4 ||                           /* ;<=> */
     56     //            (char)(c - 93) < 4 ||                           /* ]^_` */
     57     //            (char)(c - 123) < 3 ||                         /* {|} */
     58     //            (c==58) || (c==63)             /* *@[ */
     59     //            );
     60     //}
     61 
     62     private static boolean isCRLFTAB(char c) {
     63         return (
     64                 (c==13) || (c==10) || (c==9)
     65                 );
     66     }
     67 
     68     //private static boolean isCRLFSPTAB(char c) {
     69     //   return (
     70     //            (c==32) || (c==13) || (c==10) || (c==9)
     71     //            );
     72     //}
     73 
     74     private static final byte PLUS=43;
     75     private static final byte MINUS=45;
     76     private static final byte BACKSLASH=92;
     77     //private static final byte TILDE=126;
     78     private static final byte AMPERSAND=0x26;
     79     private static final byte COMMA=0x2c;
     80     private static final byte SLASH=0x2f;
     81 
     82     // legal byte values: all US-ASCII graphic characters 0x20..0x7e
     83     private static boolean isLegal(char c, boolean useIMAP) {
     84         if (useIMAP) {
     85             return (
     86                     (0x20 <= c) && (c <= 0x7e)
     87                     );
     88         } else {
     89             return (
     90                     ((char)(c - 32) < 94 && (c != BACKSLASH)) || isCRLFTAB(c)
     91                     );
     92         }
     93     }
     94 
     95     // directly encode all of printable ASCII 0x20..0x7e except '&' 0x26
     96     private static boolean inSetDIMAP(char c) {
     97         return (
     98                 (isLegal(c, true) && c != AMPERSAND)
     99                 );
    100     }
    101 
    102     private static byte TO_BASE64_IMAP(int n) {
    103         return (n < 63 ? TO_BASE_64[n] : COMMA);
    104     }
    105 
    106     private static byte FROM_BASE64_IMAP(char c) {
    107         return (c==COMMA ? 63 : c==SLASH ? -1 : FROM_BASE_64[c]);
    108     }
    109 
    110     /* encode directly sets D and O and CR LF SP TAB */
    111     private static final byte ENCODE_DIRECTLY_MAXIMUM[] =
    112     {
    113      /*0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f*/
    114         0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
    115         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    116 
    117         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
    118         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    119 
    120         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    121         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
    122 
    123         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    124         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
    125     };
    126 
    127     /* encode directly set D and CR LF SP TAB but not set O */
    128     private static final byte ENCODE_DIRECTLY_RESTRICTED[] =
    129     {
    130      /*0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f*/
    131         0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
    132         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    133 
    134         1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
    135         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
    136 
    137         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    138         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
    139 
    140         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    141         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
    142     };
    143 
    144     private static final byte TO_BASE_64[] =
    145     {
    146        /* A-Z */
    147        65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
    148        78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
    149        /* a-z */
    150        97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
    151        110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
    152        /* 0-9 */
    153        48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
    154        /* +/ */
    155        43, 47
    156     };
    157 
    158     private static final byte FROM_BASE_64[] =
    159     {
    160        /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
    161        -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
    162        -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
    163        /* general punctuation with + and / and a special value (-2) for - */
    164        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
    165        /* digits */
    166        52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
    167        /* A-Z */
    168        -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
    169        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
    170        /* a-z*/
    171        -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    172        41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
    173     };
    174 
    175     class CharsetDecoderUTF7 extends CharsetDecoderICU {
    176         public CharsetDecoderUTF7(CharsetICU cs) {
    177             super(cs);
    178             implReset();
    179         }
    180 
    181         @Override
    182         protected void implReset() {
    183             super.implReset();
    184             toUnicodeStatus=(toUnicodeStatus & 0xf0000000) | 0x1000000;
    185         }
    186 
    187         @Override
    188         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
    189             CoderResult cr=CoderResult.UNDERFLOW;
    190             byte base64Value;
    191             byte base64Counter;
    192             byte inDirectMode;
    193             char bits;
    194             int byteIndex;
    195             int sourceIndex, nextSourceIndex;
    196 
    197             int length;
    198 
    199             char b;
    200             char c;
    201 
    202             int sourceArrayIndex=source.position();
    203 
    204             //get the state of the machine state
    205             {
    206             int status=toUnicodeStatus;
    207             inDirectMode=(byte)((status >> 24) & 1);
    208             base64Counter=(byte)(status >> 16);
    209             bits=(char)status;
    210             }
    211             byteIndex=toULength;
    212             /* sourceIndex=-1 if the current character began in the previous buffer */
    213             sourceIndex=byteIndex==0 ? 0 : -1;
    214             nextSourceIndex=0;
    215 
    216             directMode:  while (true) {
    217                 if (inDirectMode==1) {
    218                     /*
    219                      * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
    220                      * with their US-ASCII byte values.
    221                      * Backslash and Tilde and most control characters are not alled in UTF-7.
    222                      * A plus sign starts Unicode (or "escape") Mode.
    223                      * An ampersand starts Unicode Mode for IMAP.
    224                      *
    225                      * In Direct Mode, only the sourceIndex is used.
    226                      */
    227                     byteIndex=0;
    228                     length=source.remaining();
    229                     //targetCapacity=target.remaining();
    230                     //Commented out because length of source may be larger than target when it comes to bytes
    231                     /*if (useIMAP && length > targetCapacity) {
    232                         length=targetCapacity;
    233                     }*/
    234                     while (length > 0) {
    235                         b=(char)(source.get());
    236                         sourceArrayIndex++;
    237                         if (!isLegal(b, useIMAP)) {
    238                             toUBytesArray[0]=(byte)b;
    239                             byteIndex=1;
    240                             cr=CoderResult.malformedForLength(sourceArrayIndex);
    241                             break;
    242                         } else if ((!useIMAP && b!=PLUS) || (useIMAP && b!=AMPERSAND)) {
    243                             // write directly encoded character
    244                             if (target.hasRemaining()) { // Check to make sure that there is room in target.
    245                                 target.put(b);
    246                                 if (offsets!= null) {
    247                                     offsets.put(sourceIndex++);
    248                                 }
    249                             } else {  // Get out and set the CoderResult.
    250                                 charErrorBufferArray[charErrorBufferLength++] = b;
    251                                 cr = CoderResult.OVERFLOW;
    252                                 break;
    253                             }
    254                         } else { /* PLUS or (AMPERSAND in IMAP)*/
    255                             /* switch to Unicode mode */
    256                             nextSourceIndex=++sourceIndex;
    257                             inDirectMode=0;
    258                             byteIndex=0;
    259                             bits=0;
    260                             base64Counter=-1;
    261                             continue directMode;
    262                         }
    263                         --length;
    264                     }//end of while
    265                     if (source.hasRemaining() && target.position() >= target.limit()) {
    266                         /* target is full */
    267                         cr=CoderResult.OVERFLOW;
    268                     }
    269                     break directMode;
    270                 } else { /* Unicode Mode*/
    271                     /*
    272                      * In Unicode Mode, UTF-16BE is base64-encoded.
    273                      * The base64 sequence ends with any character that is not in the base64 alphabet.
    274                      * A terminating minus sign is consumed.
    275                      *
    276                      * In Unicode Mode, the sourceIndex has the index to the start of the current
    277                      * base64 bytes, while nextSourceIndex is precisely parallel to source,
    278                      * keeping the index to the following byte.
    279                      */
    280                     while(source.hasRemaining()) {
    281                         if (target.hasRemaining()) {
    282                             b=(char)source.get();
    283                             sourceArrayIndex++;
    284                             toUBytesArray[byteIndex++]=(byte)b;
    285                             base64Value = -3; /* initialize as illegal */
    286                             if ((!useIMAP && (b>=126 || (base64Value=FROM_BASE_64[b])==-3 || base64Value==-1)) || (useIMAP && b>0x7e)) {
    287                                 /* either
    288                                  * base64Value==-1 for any legal character except base64 and minus sign, or
    289                                  * base64Value==-3 for illegal characters:
    290                                  * 1. In either case, leave Unicode mode.
    291                                  * 2.1. If we ended with an incomplete UChar or none after the +, then
    292                                  *      generate an error for the preceding erroneous sequence and deal with
    293                                  *      the current (possibly illegal) character next time through.
    294                                  * 2.2. Else the current char comes after a complete UChar, which was already
    295                                  *      pushed to the output buf, so:
    296                                  * 2.2.1. If the current char is legal, just save it for processing next time.
    297                                  *        It may be for example, a plus which we need to deal with in direct mode.
    298                                  * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
    299                                  */
    300                                 inDirectMode=1;
    301 
    302                                 if(base64Counter==-1) {
    303                                     /* illegal: + immediately followed by something other than base64 or minus sign */
    304                                     /* include the plus sign in the reported sequence, but not the subsequent char */
    305                                     source.position(source.position() -1);
    306                                     toUBytesArray[0]=PLUS;
    307                                     byteIndex=1;
    308                                     cr=CoderResult.malformedForLength(sourceArrayIndex);
    309                                     break directMode;
    310                                 } else if(bits!=0) {
    311                                     /* bits are illegally left over, a UChar is incomplete */
    312                                     /* don't include current char (legal or illegal) in error seq */
    313                                     source.position(source.position() -1);
    314                                     --byteIndex;
    315                                     cr=CoderResult.malformedForLength(sourceArrayIndex);
    316                                     break directMode;
    317                                 } else {
    318                                     /* previous UChar was complete */
    319                                     if(base64Value==-3) {
    320                                         /* current character is illegal, deal with it here */
    321                                         cr=CoderResult.malformedForLength(sourceArrayIndex);
    322                                         break directMode;
    323                                     } else {
    324                                         /* un-read the current character in case it is a plus sign */
    325                                         source.position(source.position() -1);
    326                                         sourceIndex=nextSourceIndex-1;
    327                                         continue directMode;
    328                                     }
    329                                 }
    330                             } else if ((!useIMAP && (base64Value=FROM_BASE_64[b])>=0) || (useIMAP && (base64Value=FROM_BASE64_IMAP(b))>=0)) {
    331                                 /* collect base64 bytes */
    332                                 switch (base64Counter) {
    333                                 case -1: /* -1 is immediately after the + */
    334                                 case 0:
    335                                     bits=(char)base64Value;
    336                                     base64Counter=1;
    337                                     break;
    338                                 case 1:
    339                                 case 3:
    340                                 case 4:
    341                                 case 6:
    342                                     bits=(char)((bits<<6) | base64Value);
    343                                     ++base64Counter;
    344                                     break;
    345                                 case 2:
    346                                     c=(char)((bits<<4) | (base64Value>>2));
    347                                     if (useIMAP && isLegal(c, useIMAP)) {
    348                                         // illegal
    349                                         inDirectMode=1;
    350                                         cr=CoderResult.malformedForLength(sourceArrayIndex);
    351                                         // goto endloop;
    352                                         break directMode;
    353                                     }
    354                                     target.put(c);
    355                                     if (offsets != null) {
    356                                         offsets.put(sourceIndex);
    357                                         sourceIndex=nextSourceIndex - 1;
    358                                     }
    359                                     toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */
    360                                     byteIndex=1;
    361                                     bits=(char)(base64Value&3);
    362                                     base64Counter=3;
    363                                     break;
    364                                 case 5:
    365                                     c=(char)((bits<<2) | (base64Value>>4));
    366                                     if(useIMAP && isLegal(c, useIMAP)) {
    367                                         // illegal
    368                                         inDirectMode=1;
    369                                         cr=CoderResult.malformedForLength(sourceArrayIndex);
    370                                         // goto endloop;
    371                                         break directMode;
    372                                     }
    373                                     target.put(c);
    374                                     if (offsets != null) {
    375                                         offsets.put(sourceIndex);
    376                                         sourceIndex=nextSourceIndex - 1;
    377                                     }
    378                                     toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */
    379                                     byteIndex=1;
    380                                     bits=(char)(base64Value&15);
    381                                     base64Counter=6;
    382                                     break;
    383                                 case 7:
    384                                     c=(char)((bits<<6) | base64Value);
    385                                     if (useIMAP && isLegal(c, useIMAP)) {
    386                                         // illegal
    387                                         inDirectMode=1;
    388                                         cr=CoderResult.malformedForLength(sourceArrayIndex);
    389                                         // goto endloop;
    390                                         break directMode;
    391                                     }
    392                                     target.put(c);
    393                                     if (offsets != null) {
    394                                         offsets.put(sourceIndex);
    395                                         sourceIndex=nextSourceIndex;
    396                                     }
    397                                     byteIndex=0;
    398                                     bits=0;
    399                                     base64Counter=0;
    400                                     break;
    401                                 //default:
    402                                     /* will never occur */
    403                                     //break;
    404                                 }//end of switch
    405                             } else if (!useIMAP || (useIMAP && base64Value==-2)) {
    406                                 /* minus sign terminates the base64 sequence */
    407                                 inDirectMode=1;
    408                                 if (base64Counter==-1) {
    409                                     /* +- i.e. a minus immediately following a plus */
    410                                     target.put(useIMAP ? (char)AMPERSAND : (char)PLUS);
    411                                     if (offsets != null) {
    412                                         offsets.put(sourceIndex - 1);
    413                                     }
    414                                 } else {
    415                                     /* absorb the minus and leave the Unicode Mode */
    416                                     if (bits!=0 || (useIMAP && base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
    417                                         /*bits are illegally left over, a unicode character is incomplete */
    418                                         cr=CoderResult.malformedForLength(sourceArrayIndex);
    419                                         break;
    420                                     }
    421                                 }
    422                                 sourceIndex=nextSourceIndex;
    423                                 continue directMode;
    424                             } else if (useIMAP) {
    425                                 if (base64Counter==-1) {
    426                                     // illegal: & immediately followed by something other than base64 or minus sign
    427                                     // include the ampersand in the reported sequence
    428                                     --sourceIndex;
    429                                     toUBytesArray[0]=AMPERSAND;
    430                                     toUBytesArray[1]=(byte)b;
    431                                     byteIndex=2;
    432                                 }
    433                                 /* base64Value==-3 for illegal characters */
    434                                 /* illegal */
    435                                 inDirectMode=1;
    436                                 cr=CoderResult.malformedForLength(sourceArrayIndex);
    437                                 break;
    438                             }
    439                         } else {
    440                             /* target is full */
    441                             cr=CoderResult.OVERFLOW;
    442                             break;
    443                         }
    444                     } //end of while
    445                     break directMode;
    446                 }
    447             }//end of direct mode label
    448             if (useIMAP) {
    449                 if (!cr.isError() && inDirectMode==0 && flush && byteIndex==0 && !source.hasRemaining()) {
    450                     if (base64Counter==-1) {
    451                         /* & at the very end of the input */
    452                         /* make the ampersand the reported sequence */
    453                         toUBytesArray[0]=AMPERSAND;
    454                         byteIndex=1;
    455                     }
    456                     /* else if (base64Counter!=-1) byteIndex remains 0 because ther is no particular byte sequence */
    457                     inDirectMode=1;
    458                     cr=CoderResult.malformedForLength(sourceIndex);
    459                 }
    460 
    461             } else {
    462                 if (!cr.isError() && flush && !source.hasRemaining() && bits  ==0) {
    463                     /*
    464                      * if we are in Unicode Mode, then the byteIndex might not be 0,
    465                      * but that is ok if bits -- 0
    466                      * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
    467                      * (not true for IMAP-mailbox-name where we must end in direct mode)
    468                      */
    469                     if (!cr.isOverflow()) {
    470                         byteIndex=0;
    471                     }
    472                 }
    473             }
    474             /* set the converter state */
    475             toUnicodeStatus=(inDirectMode<<24 | ((base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | bits);
    476             toULength=byteIndex;
    477 
    478             return cr;
    479         }
    480     }
    481 
    482     class CharsetEncoderUTF7 extends CharsetEncoderICU {
    483         public CharsetEncoderUTF7(CharsetICU cs) {
    484             super(cs, fromUSubstitution);
    485             implReset();
    486         }
    487 
    488         @Override
    489         protected void implReset() {
    490             super.implReset();
    491             fromUnicodeStatus=(fromUnicodeStatus & 0xf0000000) | 0x1000000;
    492         }
    493 
    494         @Override
    495         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
    496             CoderResult cr=CoderResult.UNDERFLOW;
    497             byte inDirectMode;
    498             byte encodeDirectly[];
    499             int status;
    500 
    501             int length, targetCapacity, sourceIndex;
    502 
    503             byte base64Counter;
    504             char bits;
    505             char c;
    506             char b;
    507             /* get the state machine state */
    508             {
    509                 status=fromUnicodeStatus;
    510                 encodeDirectly=(((long)status) < 0x10000000) ? ENCODE_DIRECTLY_MAXIMUM : ENCODE_DIRECTLY_RESTRICTED;
    511                 inDirectMode=(byte)((status >> 24) & 1);
    512                 base64Counter=(byte)(status >> 16);
    513                 bits=(char)((byte)status);
    514             }
    515             /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
    516             sourceIndex=0;
    517 
    518             directMode: while(true) {
    519             if(inDirectMode==1) {
    520                 length=source.remaining();
    521                 targetCapacity=target.remaining();
    522                 if(length > targetCapacity) {
    523                     length=targetCapacity;
    524                 }
    525                 while (length > 0) {
    526                     c=source.get();
    527                     /* UTF7: currently always encode CR LF SP TAB directly */
    528                     /* IMAP: encode 0x20..0x7e except '&' directly */
    529                     if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && inSetDIMAP(c))) {
    530                         /* encode directly */
    531                         target.put((byte)c);
    532                         if (offsets != null) {
    533                             offsets.put(sourceIndex++);
    534                         }
    535                     } else if ((!useIMAP && c==PLUS) || (useIMAP && c==AMPERSAND)) {
    536                         /* IMAP: output &- for & */
    537                         /* UTF-7: output +- for + */
    538                         target.put(useIMAP ? AMPERSAND : PLUS);
    539                         if (target.hasRemaining()) {
    540                             target.put(MINUS);
    541                             if (offsets != null) {
    542                                 offsets.put(sourceIndex);
    543                                 offsets.put(sourceIndex++);
    544                             }
    545                             /* realign length and targetCapacity */
    546                             continue directMode;
    547                         } else {
    548                             if (offsets != null) {
    549                                 offsets.put(sourceIndex++);
    550                             }
    551                             errorBuffer[0]=MINUS;
    552                             errorBufferLength=1;
    553                             cr=CoderResult.OVERFLOW;
    554                             break;
    555                         }
    556                     } else {
    557                         /* un-read this character and switch to unicode mode */
    558                         source.position(source.position() - 1);
    559                         target.put(useIMAP ? AMPERSAND : PLUS);
    560                         if (offsets != null) {
    561                             offsets.put(sourceIndex);
    562                         }
    563                         inDirectMode=0;
    564                         base64Counter=0;
    565                         continue directMode;
    566                     }
    567                     --length;
    568                 } //end of while
    569                 if (source.hasRemaining() && !target.hasRemaining()) {
    570                     /* target is full */
    571                     cr=CoderResult.OVERFLOW;
    572                 }
    573                 break directMode;
    574             } else {
    575                 /* Unicode Mode */
    576                 while (source.hasRemaining()) {
    577                     if (target.hasRemaining()) {
    578                         c=source.get();
    579                         if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && isLegal(c, useIMAP))) {
    580                             /* encode directly */
    581                             inDirectMode=1;
    582 
    583                             /* trick: back out this character to make this easier */
    584                             source.position(source.position() - 1);
    585 
    586                             /* terminate the base64 sequence */
    587                             if (base64Counter!=0) {
    588                                 /* write remaining bits for the previous character */
    589                                 target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]);
    590                                 if (offsets!=null) {
    591                                     offsets.put(sourceIndex-1);
    592                                 }
    593                             }
    594                             if (FROM_BASE_64[c]!=-1 || useIMAP) {
    595                                 /* need to terminate with a minus */
    596                                 if (target.hasRemaining()) {
    597                                     target.put(MINUS);
    598                                     if (offsets!=null) {
    599                                         offsets.put(sourceIndex-1);
    600                                     }
    601                                 } else {
    602                                     errorBuffer[0]=MINUS;
    603                                     errorBufferLength=1;
    604                                     cr=CoderResult.OVERFLOW;
    605                                     break;
    606                                 }
    607                             }
    608                             continue directMode;
    609                         } else {
    610                             /*
    611                              * base64 this character:
    612                              * Output 2 or 3 base64 bytres for the remaining bits of the previous character
    613                              * and the bits of this character, each implicitly in UTF-16BE.
    614                              *
    615                              * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
    616                              * character to the next.  The actual 2 or 4 bits are shifted to the left edge
    617                              * of the 6-bits filed 5..0 to make the termination of the base64 sequence easier.
    618                              */
    619                             switch (base64Counter) {
    620                             case 0:
    621                                 b=(char)(c>>10);
    622                                 target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    623                                 if (target.hasRemaining()) {
    624                                     b=(char)((c>>4)&0x3f);
    625                                     target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    626                                     if (offsets!=null) {
    627                                         offsets.put(sourceIndex);
    628                                         offsets.put(sourceIndex++);
    629                                     }
    630                                 } else {
    631                                     if (offsets!=null) {
    632                                         offsets.put(sourceIndex++);
    633                                     }
    634                                     b=(char)((c>>4)&0x3f);
    635                                     errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
    636                                     errorBufferLength=1;
    637                                     cr=CoderResult.OVERFLOW;
    638                                 }
    639                                 bits=(char)((c&15)<<2);
    640                                 base64Counter=1;
    641                                 break;
    642                             case 1:
    643                                 b=(char)(bits|(c>>14));
    644                                 target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    645                                 if (target.hasRemaining()) {
    646                                     b=(char)((c>>8)&0x3f);
    647                                     target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    648                                     if (target.hasRemaining()) {
    649                                         b=(char)((c>>2)&0x3f);
    650                                         target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    651                                         if (offsets!=null) {
    652                                             offsets.put(sourceIndex);
    653                                             offsets.put(sourceIndex);
    654                                             offsets.put(sourceIndex++);
    655                                         }
    656                                     } else {
    657                                         if (offsets!=null) {
    658                                             offsets.put(sourceIndex);
    659                                             offsets.put(sourceIndex++);
    660                                         }
    661                                         b=(char)((c>>2)&0x3f);
    662                                         errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
    663                                         errorBufferLength=1;
    664                                         cr=CoderResult.OVERFLOW;
    665                                     }
    666                                 } else {
    667                                     if (offsets!=null) {
    668                                         offsets.put(sourceIndex++);
    669                                     }
    670                                     b=(char)((c>>8)&0x3f);
    671                                     errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
    672                                     b=(char)((c>>2)&0x3f);
    673                                     errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
    674                                     errorBufferLength=2;
    675                                     cr=CoderResult.OVERFLOW;
    676                                 }
    677                                 bits=(char)((c&3)<<4);
    678                                 base64Counter=2;
    679                                 break;
    680                             case 2:
    681                                 b=(char)(bits|(c>>12));
    682                                 target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    683                                 if (target.hasRemaining()) {
    684                                     b=(char)((c>>6)&0x3f);
    685                                     target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    686                                     if (target.hasRemaining()) {
    687                                         b=(char)(c&0x3f);
    688                                         target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
    689                                         if (offsets!=null) {
    690                                             offsets.put(sourceIndex);
    691                                             offsets.put(sourceIndex);
    692                                             offsets.put(sourceIndex++);
    693                                         }
    694                                     } else {
    695                                         if (offsets!=null) {
    696                                             offsets.put(sourceIndex);
    697                                             offsets.put(sourceIndex++);
    698                                         }
    699                                         b=(char)(c&0x3f);
    700                                         errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
    701                                         errorBufferLength=1;
    702                                         cr=CoderResult.OVERFLOW;
    703                                     }
    704                                 } else {
    705                                     if (offsets!=null) {
    706                                         offsets.put(sourceIndex++);
    707                                     }
    708                                     b=(char)((c>>6)&0x3f);
    709                                     errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
    710                                     b=(char)(c&0x3f);
    711                                     errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
    712                                     errorBufferLength=2;
    713                                     cr=CoderResult.OVERFLOW;
    714                                 }
    715                                 bits=0;
    716                                 base64Counter=0;
    717                                 break;
    718                            //default:
    719                                /* will never occur */
    720                                //break;
    721                            } //end of switch
    722                         }
    723                     } else {
    724                         /* target is full */
    725                         cr=CoderResult.OVERFLOW;
    726                         break;
    727                     }
    728                 } //end of while
    729                 break directMode;
    730             }
    731             } //end of directMode label
    732 
    733             if (flush && !source.hasRemaining()) {
    734                 /* flush remaining bits to the target */
    735                 if (inDirectMode==0) {
    736                     if (base64Counter!=0) {
    737                         if (target.hasRemaining()) {
    738                             target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]);
    739                             if (offsets!=null) {
    740                                 offsets.put(sourceIndex - 1);
    741                             }
    742                         } else {
    743                             errorBuffer[errorBufferLength++]=useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits];
    744                             cr=CoderResult.OVERFLOW;
    745                         }
    746                     }
    747 
    748                     /* need to terminate with a minus */
    749                     if (target.hasRemaining()) {
    750                         target.put(MINUS);
    751                         if (offsets!=null) {
    752                             offsets.put(sourceIndex - 1);
    753                         }
    754                     } else {
    755                         errorBuffer[errorBufferLength++]=MINUS;
    756                         cr=CoderResult.OVERFLOW;
    757                     }
    758                 }
    759                 /*reset the state for the next conversion */
    760                 fromUnicodeStatus=((status&0xf0000000) | 0x1000000); /* keep version, inDirectMode=TRUE */
    761             } else {
    762                 /* set the converter state back */
    763                 fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | ((base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (bits));
    764             }
    765 
    766             return cr;
    767         }
    768     }
    769 
    770     @Override
    771     public CharsetDecoder newDecoder() {
    772         return new CharsetDecoderUTF7(this);
    773     }
    774 
    775     @Override
    776     public CharsetEncoder newEncoder() {
    777         return new CharsetEncoderUTF7(this);
    778     }
    779 
    780     @Override
    781     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
    782         getCompleteUnicodeSet(setFillIn);
    783     }
    784 }
    785