Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2002-2009, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv_u7.c
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2002jul01
     12 *   created by: Markus W. Scherer
     13 *
     14 *   UTF-7 converter implementation. Used to be in ucnv_utf.c.
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_CONVERSION
     20 
     21 #include "unicode/ucnv.h"
     22 #include "ucnv_bld.h"
     23 #include "ucnv_cnv.h"
     24 
     25 /* UTF-7 -------------------------------------------------------------------- */
     26 
     27 /*
     28  * UTF-7 is a stateful encoding of Unicode.
     29  * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
     30  * It was intended for use in Internet email systems, using in its bytewise
     31  * encoding only a subset of 7-bit US-ASCII.
     32  * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
     33  * occasionally used.
     34  *
     35  * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
     36  * characters directly or in base64. Especially, the characters in set O
     37  * as defined in the RFC (see below) may be encoded directly but are not
     38  * allowed in, e.g., email headers.
     39  * By default, the ICU UTF-7 converter encodes set O directly.
     40  * By choosing the option "version=1", set O will be escaped instead.
     41  * For example:
     42  *     utf7Converter=ucnv_open("UTF-7,version=1");
     43  *
     44  * For details about email headers see RFC 2047.
     45  */
     46 
     47 /*
     48  * Tests for US-ASCII characters belonging to character classes
     49  * defined in UTF-7.
     50  *
     51  * Set D (directly encoded characters) consists of the following
     52  * characters: the upper and lower case letters A through Z
     53  * and a through z, the 10 digits 0-9, and the following nine special
     54  * characters (note that "+" and "=" are omitted):
     55  *     '(),-./:?
     56  *
     57  * Set O (optional direct characters) consists of the following
     58  * characters (note that "\" and "~" are omitted):
     59  *     !"#$%&*;<=>@[]^_`{|}
     60  *
     61  * According to the rules in RFC 2152, the byte values for the following
     62  * US-ASCII characters are not used in UTF-7 and are therefore illegal:
     63  * - all C0 control codes except for CR LF TAB
     64  * - BACKSLASH
     65  * - TILDE
     66  * - DEL
     67  * - all codes beyond US-ASCII, i.e. all >127
     68  */
     69 #define inSetD(c) \
     70     ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
     71      (uint8_t)((c)-48)<10 ||    /* digits */ \
     72      (uint8_t)((c)-39)<3 ||     /* '() */ \
     73      (uint8_t)((c)-44)<4 ||     /* ,-./ */ \
     74      (c)==58 || (c)==63         /* :? */ \
     75     )
     76 
     77 #define inSetO(c) \
     78     ((uint8_t)((c)-33)<6 ||         /* !"#$%& */ \
     79      (uint8_t)((c)-59)<4 ||         /* ;<=> */ \
     80      (uint8_t)((c)-93)<4 ||         /* ]^_` */ \
     81      (uint8_t)((c)-123)<3 ||        /* {|} */ \
     82      (c)==42 || (c)==64 || (c)==91  /* *@[ */ \
     83     )
     84 
     85 #define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
     86 #define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
     87 
     88 #define PLUS  43
     89 #define MINUS 45
     90 #define BACKSLASH 92
     91 #define TILDE 126
     92 
     93 /* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
     94 #define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
     95 
     96 /* encode directly sets D and O and CR LF SP TAB */
     97 static const UBool encodeDirectlyMaximum[128]={
     98  /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
     99     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
    100     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    101 
    102     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
    103     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    104 
    105     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    106     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
    107 
    108     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    109     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
    110 };
    111 
    112 /* encode directly set D and CR LF SP TAB but not set O */
    113 static const UBool encodeDirectlyRestricted[128]={
    114  /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
    115     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
    116     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    117 
    118     1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
    119     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
    120 
    121     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    122     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
    123 
    124     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    125     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
    126 };
    127 
    128 static const uint8_t
    129 toBase64[64]={
    130     /* A-Z */
    131     65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
    132     78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
    133     /* a-z */
    134     97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
    135     110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
    136     /* 0-9 */
    137     48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
    138     /* +/ */
    139     43, 47
    140 };
    141 
    142 static const int8_t
    143 fromBase64[128]={
    144     /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
    145     -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
    146     -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
    147 
    148     /* general punctuation with + and / and a special value (-2) for - */
    149     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
    150     /* digits */
    151     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
    152 
    153     /* A-Z */
    154     -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
    155     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
    156 
    157     /* a-z */
    158     -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    159     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
    160 };
    161 
    162 /*
    163  * converter status values:
    164  *
    165  * toUnicodeStatus:
    166  *     24 inDirectMode (boolean)
    167  * 23..16 base64Counter (-1..7)
    168  * 15..0  bits (up to 14 bits incoming base64)
    169  *
    170  * fromUnicodeStatus:
    171  * 31..28 version (0: set O direct  1: set O escaped)
    172  *     24 inDirectMode (boolean)
    173  * 23..16 base64Counter (0..2)
    174  *  7..0  bits (6 bits outgoing base64)
    175  *
    176  */
    177 
    178 static void
    179 _UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
    180     if(choice<=UCNV_RESET_TO_UNICODE) {
    181         /* reset toUnicode */
    182         cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
    183         cnv->toULength=0;
    184     }
    185     if(choice!=UCNV_RESET_TO_UNICODE) {
    186         /* reset fromUnicode */
    187         cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
    188     }
    189 }
    190 
    191 static void
    192 _UTF7Open(UConverter *cnv,
    193           UConverterLoadArgs *pArgs,
    194           UErrorCode *pErrorCode) {
    195     if(UCNV_GET_VERSION(cnv)<=1) {
    196         /* TODO(markus): Should just use cnv->options rather than copying the version number. */
    197         cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
    198         _UTF7Reset(cnv, UCNV_RESET_BOTH);
    199     } else {
    200         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    201     }
    202 }
    203 
    204 static void
    205 _UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
    206                           UErrorCode *pErrorCode) {
    207     UConverter *cnv;
    208     const uint8_t *source, *sourceLimit;
    209     UChar *target;
    210     const UChar *targetLimit;
    211     int32_t *offsets;
    212 
    213     uint8_t *bytes;
    214     uint8_t byteIndex;
    215 
    216     int32_t length, targetCapacity;
    217 
    218     /* UTF-7 state */
    219     uint16_t bits;
    220     int8_t base64Counter;
    221     UBool inDirectMode;
    222 
    223     int8_t base64Value;
    224 
    225     int32_t sourceIndex, nextSourceIndex;
    226 
    227     uint8_t b;
    228     /* set up the local pointers */
    229     cnv=pArgs->converter;
    230 
    231     source=(const uint8_t *)pArgs->source;
    232     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    233     target=pArgs->target;
    234     targetLimit=pArgs->targetLimit;
    235     offsets=pArgs->offsets;
    236     /* get the state machine state */
    237     {
    238         uint32_t status=cnv->toUnicodeStatus;
    239         inDirectMode=(UBool)((status>>24)&1);
    240         base64Counter=(int8_t)(status>>16);
    241         bits=(uint16_t)status;
    242     }
    243     bytes=cnv->toUBytes;
    244     byteIndex=cnv->toULength;
    245 
    246     /* sourceIndex=-1 if the current character began in the previous buffer */
    247     sourceIndex=byteIndex==0 ? 0 : -1;
    248     nextSourceIndex=0;
    249 
    250     if(inDirectMode) {
    251 directMode:
    252         /*
    253          * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
    254          * with their US-ASCII byte values.
    255          * Backslash and Tilde and most control characters are not allowed in UTF-7.
    256          * A plus sign starts Unicode (or "escape") Mode.
    257          *
    258          * In Direct Mode, only the sourceIndex is used.
    259          */
    260         byteIndex=0;
    261         length=(int32_t)(sourceLimit-source);
    262         targetCapacity=(int32_t)(targetLimit-target);
    263         if(length>targetCapacity) {
    264             length=targetCapacity;
    265         }
    266         while(length>0) {
    267             b=*source++;
    268             if(!isLegalUTF7(b)) {
    269                 /* illegal */
    270                 bytes[0]=b;
    271                 byteIndex=1;
    272                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    273                 break;
    274             } else if(b!=PLUS) {
    275                 /* write directly encoded character */
    276                 *target++=b;
    277                 if(offsets!=NULL) {
    278                     *offsets++=sourceIndex++;
    279                 }
    280             } else /* PLUS */ {
    281                 /* switch to Unicode mode */
    282                 nextSourceIndex=++sourceIndex;
    283                 inDirectMode=FALSE;
    284                 byteIndex=0;
    285                 bits=0;
    286                 base64Counter=-1;
    287                 goto unicodeMode;
    288             }
    289             --length;
    290         }
    291         if(source<sourceLimit && target>=targetLimit) {
    292             /* target is full */
    293             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    294         }
    295     } else {
    296 unicodeMode:
    297         /*
    298          * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
    299          * The base64 sequence ends with any character that is not in the base64 alphabet.
    300          * A terminating minus sign is consumed.
    301          *
    302          * In Unicode Mode, the sourceIndex has the index to the start of the current
    303          * base64 bytes, while nextSourceIndex is precisely parallel to source,
    304          * keeping the index to the following byte.
    305          * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
    306          */
    307         while(source<sourceLimit) {
    308             if(target<targetLimit) {
    309                 bytes[byteIndex++]=b=*source++;
    310                 ++nextSourceIndex;
    311                 if(b>=126) {
    312                     /* illegal - test other illegal US-ASCII values by base64Value==-3 */
    313                     inDirectMode=TRUE;
    314                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    315                     break;
    316                 } else if((base64Value=fromBase64[b])>=0) {
    317                     /* collect base64 bytes into UChars */
    318                     switch(base64Counter) {
    319                     case -1: /* -1 is immediately after the + */
    320                     case 0:
    321                         bits=base64Value;
    322                         base64Counter=1;
    323                         break;
    324                     case 1:
    325                     case 3:
    326                     case 4:
    327                     case 6:
    328                         bits=(uint16_t)((bits<<6)|base64Value);
    329                         ++base64Counter;
    330                         break;
    331                     case 2:
    332                         *target++=(UChar)((bits<<4)|(base64Value>>2));
    333                         if(offsets!=NULL) {
    334                             *offsets++=sourceIndex;
    335                             sourceIndex=nextSourceIndex-1;
    336                         }
    337                         bytes[0]=b; /* keep this byte in case an error occurs */
    338                         byteIndex=1;
    339                         bits=(uint16_t)(base64Value&3);
    340                         base64Counter=3;
    341                         break;
    342                     case 5:
    343                         *target++=(UChar)((bits<<2)|(base64Value>>4));
    344                         if(offsets!=NULL) {
    345                             *offsets++=sourceIndex;
    346                             sourceIndex=nextSourceIndex-1;
    347                         }
    348                         bytes[0]=b; /* keep this byte in case an error occurs */
    349                         byteIndex=1;
    350                         bits=(uint16_t)(base64Value&15);
    351                         base64Counter=6;
    352                         break;
    353                     case 7:
    354                         *target++=(UChar)((bits<<6)|base64Value);
    355                         if(offsets!=NULL) {
    356                             *offsets++=sourceIndex;
    357                             sourceIndex=nextSourceIndex;
    358                         }
    359                         byteIndex=0;
    360                         bits=0;
    361                         base64Counter=0;
    362                         break;
    363                     default:
    364                         /* will never occur */
    365                         break;
    366                     }
    367                 } else if(base64Value==-2) {
    368                     /* minus sign terminates the base64 sequence */
    369                     inDirectMode=TRUE;
    370                     if(base64Counter==-1) {
    371                         /* +- i.e. a minus immediately following a plus */
    372                         *target++=PLUS;
    373                         if(offsets!=NULL) {
    374                             *offsets++=sourceIndex-1;
    375                         }
    376                     } else {
    377                         /* absorb the minus and leave the Unicode Mode */
    378                         if(bits!=0) {
    379                             /* bits are illegally left over, a UChar is incomplete */
    380                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    381                             break;
    382                         }
    383                     }
    384                     sourceIndex=nextSourceIndex;
    385                     goto directMode;
    386                 } else if(base64Value==-1) /* for any legal character except base64 and minus sign */ {
    387                     /* leave the Unicode Mode */
    388                     inDirectMode=TRUE;
    389                     if(base64Counter==-1) {
    390                         /* illegal: + immediately followed by something other than base64 or minus sign */
    391                         /* include the plus sign in the reported sequence */
    392                         --sourceIndex;
    393                         bytes[0]=PLUS;
    394                         bytes[1]=b;
    395                         byteIndex=2;
    396                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    397                         break;
    398                     } else if(bits==0) {
    399                         /* un-read the character in case it is a plus sign */
    400                         --source;
    401                         sourceIndex=nextSourceIndex-1;
    402                         goto directMode;
    403                     } else {
    404                         /* bits are illegally left over, a UChar is incomplete */
    405                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    406                         break;
    407                     }
    408                 } else /* base64Value==-3 for illegal characters */ {
    409                     /* illegal */
    410                     inDirectMode=TRUE;
    411                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    412                     break;
    413                 }
    414             } else {
    415                 /* target is full */
    416                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    417                 break;
    418             }
    419         }
    420     }
    421 
    422     if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
    423         /*
    424          * if we are in Unicode mode, then the byteIndex might not be 0,
    425          * but that is ok if bits==0
    426          * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
    427          * (not true for IMAP-mailbox-name where we must end in direct mode)
    428          */
    429         byteIndex=0;
    430     }
    431 
    432     /* set the converter state back into UConverter */
    433     cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
    434     cnv->toULength=byteIndex;
    435 
    436     /* write back the updated pointers */
    437     pArgs->source=(const char *)source;
    438     pArgs->target=target;
    439     pArgs->offsets=offsets;
    440     return;
    441 }
    442 
    443 static void
    444 _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    445                             UErrorCode *pErrorCode) {
    446     UConverter *cnv;
    447     const UChar *source, *sourceLimit;
    448     uint8_t *target, *targetLimit;
    449     int32_t *offsets;
    450 
    451     int32_t length, targetCapacity, sourceIndex;
    452     UChar c;
    453 
    454     /* UTF-7 state */
    455     const UBool *encodeDirectly;
    456     uint8_t bits;
    457     int8_t base64Counter;
    458     UBool inDirectMode;
    459 
    460     /* set up the local pointers */
    461     cnv=pArgs->converter;
    462 
    463     /* set up the local pointers */
    464     source=pArgs->source;
    465     sourceLimit=pArgs->sourceLimit;
    466     target=(uint8_t *)pArgs->target;
    467     targetLimit=(uint8_t *)pArgs->targetLimit;
    468     offsets=pArgs->offsets;
    469 
    470     /* get the state machine state */
    471     {
    472         uint32_t status=cnv->fromUnicodeStatus;
    473         encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
    474         inDirectMode=(UBool)((status>>24)&1);
    475         base64Counter=(int8_t)(status>>16);
    476         bits=(uint8_t)status;
    477     }
    478 
    479     /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
    480     sourceIndex=0;
    481 
    482     if(inDirectMode) {
    483 directMode:
    484         length=(int32_t)(sourceLimit-source);
    485         targetCapacity=(int32_t)(targetLimit-target);
    486         if(length>targetCapacity) {
    487             length=targetCapacity;
    488         }
    489         while(length>0) {
    490             c=*source++;
    491             /* currently always encode CR LF SP TAB directly */
    492             if(c<=127 && encodeDirectly[c]) {
    493                 /* encode directly */
    494                 *target++=(uint8_t)c;
    495                 if(offsets!=NULL) {
    496                     *offsets++=sourceIndex++;
    497                 }
    498             } else if(c==PLUS) {
    499                 /* output +- for + */
    500                 *target++=PLUS;
    501                 if(target<targetLimit) {
    502                     *target++=MINUS;
    503                     if(offsets!=NULL) {
    504                         *offsets++=sourceIndex;
    505                         *offsets++=sourceIndex++;
    506                     }
    507                     /* realign length and targetCapacity */
    508                     goto directMode;
    509                 } else {
    510                     if(offsets!=NULL) {
    511                         *offsets++=sourceIndex++;
    512                     }
    513                     cnv->charErrorBuffer[0]=MINUS;
    514                     cnv->charErrorBufferLength=1;
    515                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    516                     break;
    517                 }
    518             } else {
    519                 /* un-read this character and switch to Unicode Mode */
    520                 --source;
    521                 *target++=PLUS;
    522                 if(offsets!=NULL) {
    523                     *offsets++=sourceIndex;
    524                 }
    525                 inDirectMode=FALSE;
    526                 base64Counter=0;
    527                 goto unicodeMode;
    528             }
    529             --length;
    530         }
    531         if(source<sourceLimit && target>=targetLimit) {
    532             /* target is full */
    533             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    534         }
    535     } else {
    536 unicodeMode:
    537         while(source<sourceLimit) {
    538             if(target<targetLimit) {
    539                 c=*source++;
    540                 if(c<=127 && encodeDirectly[c]) {
    541                     /* encode directly */
    542                     inDirectMode=TRUE;
    543 
    544                     /* trick: back out this character to make this easier */
    545                     --source;
    546 
    547                     /* terminate the base64 sequence */
    548                     if(base64Counter!=0) {
    549                         /* write remaining bits for the previous character */
    550                         *target++=toBase64[bits];
    551                         if(offsets!=NULL) {
    552                             *offsets++=sourceIndex-1;
    553                         }
    554                     }
    555                     if(fromBase64[c]!=-1) {
    556                         /* need to terminate with a minus */
    557                         if(target<targetLimit) {
    558                             *target++=MINUS;
    559                             if(offsets!=NULL) {
    560                                 *offsets++=sourceIndex-1;
    561                             }
    562                         } else {
    563                             cnv->charErrorBuffer[0]=MINUS;
    564                             cnv->charErrorBufferLength=1;
    565                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    566                             break;
    567                         }
    568                     }
    569                     goto directMode;
    570                 } else {
    571                     /*
    572                      * base64 this character:
    573                      * Output 2 or 3 base64 bytes for the remaining bits of the previous character
    574                      * and the bits of this character, each implicitly in UTF-16BE.
    575                      *
    576                      * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
    577                      * character to the next. The actual 2 or 4 bits are shifted to the left edge
    578                      * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
    579                      */
    580                     switch(base64Counter) {
    581                     case 0:
    582                         *target++=toBase64[c>>10];
    583                         if(target<targetLimit) {
    584                             *target++=toBase64[(c>>4)&0x3f];
    585                             if(offsets!=NULL) {
    586                                 *offsets++=sourceIndex;
    587                                 *offsets++=sourceIndex++;
    588                             }
    589                         } else {
    590                             if(offsets!=NULL) {
    591                                 *offsets++=sourceIndex++;
    592                             }
    593                             cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
    594                             cnv->charErrorBufferLength=1;
    595                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    596                         }
    597                         bits=(uint8_t)((c&15)<<2);
    598                         base64Counter=1;
    599                         break;
    600                     case 1:
    601                         *target++=toBase64[bits|(c>>14)];
    602                         if(target<targetLimit) {
    603                             *target++=toBase64[(c>>8)&0x3f];
    604                             if(target<targetLimit) {
    605                                 *target++=toBase64[(c>>2)&0x3f];
    606                                 if(offsets!=NULL) {
    607                                     *offsets++=sourceIndex;
    608                                     *offsets++=sourceIndex;
    609                                     *offsets++=sourceIndex++;
    610                                 }
    611                             } else {
    612                                 if(offsets!=NULL) {
    613                                     *offsets++=sourceIndex;
    614                                     *offsets++=sourceIndex++;
    615                                 }
    616                                 cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
    617                                 cnv->charErrorBufferLength=1;
    618                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    619                             }
    620                         } else {
    621                             if(offsets!=NULL) {
    622                                 *offsets++=sourceIndex++;
    623                             }
    624                             cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
    625                             cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
    626                             cnv->charErrorBufferLength=2;
    627                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    628                         }
    629                         bits=(uint8_t)((c&3)<<4);
    630                         base64Counter=2;
    631                         break;
    632                     case 2:
    633                         *target++=toBase64[bits|(c>>12)];
    634                         if(target<targetLimit) {
    635                             *target++=toBase64[(c>>6)&0x3f];
    636                             if(target<targetLimit) {
    637                                 *target++=toBase64[c&0x3f];
    638                                 if(offsets!=NULL) {
    639                                     *offsets++=sourceIndex;
    640                                     *offsets++=sourceIndex;
    641                                     *offsets++=sourceIndex++;
    642                                 }
    643                             } else {
    644                                 if(offsets!=NULL) {
    645                                     *offsets++=sourceIndex;
    646                                     *offsets++=sourceIndex++;
    647                                 }
    648                                 cnv->charErrorBuffer[0]=toBase64[c&0x3f];
    649                                 cnv->charErrorBufferLength=1;
    650                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    651                             }
    652                         } else {
    653                             if(offsets!=NULL) {
    654                                 *offsets++=sourceIndex++;
    655                             }
    656                             cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
    657                             cnv->charErrorBuffer[1]=toBase64[c&0x3f];
    658                             cnv->charErrorBufferLength=2;
    659                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    660                         }
    661                         bits=0;
    662                         base64Counter=0;
    663                         break;
    664                     default:
    665                         /* will never occur */
    666                         break;
    667                     }
    668                 }
    669             } else {
    670                 /* target is full */
    671                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    672                 break;
    673             }
    674         }
    675     }
    676 
    677     if(pArgs->flush && source>=sourceLimit) {
    678         /* flush remaining bits to the target */
    679         if(!inDirectMode && base64Counter!=0) {
    680             if(target<targetLimit) {
    681                 *target++=toBase64[bits];
    682                 if(offsets!=NULL) {
    683                     *offsets++=sourceIndex-1;
    684                 }
    685             } else {
    686                 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
    687                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    688             }
    689         }
    690         /* reset the state for the next conversion */
    691         cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
    692     } else {
    693         /* set the converter state back into UConverter */
    694         cnv->fromUnicodeStatus=
    695             (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
    696             ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
    697     }
    698 
    699     /* write back the updated pointers */
    700     pArgs->source=source;
    701     pArgs->target=(char *)target;
    702     pArgs->offsets=offsets;
    703     return;
    704 }
    705 
    706 static const char *
    707 _UTF7GetName(const UConverter *cnv) {
    708     switch(cnv->fromUnicodeStatus>>28) {
    709     case 1:
    710         return "UTF-7,version=1";
    711     default:
    712         return "UTF-7";
    713     }
    714 }
    715 
    716 static const UConverterImpl _UTF7Impl={
    717     UCNV_UTF7,
    718 
    719     NULL,
    720     NULL,
    721 
    722     _UTF7Open,
    723     NULL,
    724     _UTF7Reset,
    725 
    726     _UTF7ToUnicodeWithOffsets,
    727     _UTF7ToUnicodeWithOffsets,
    728     _UTF7FromUnicodeWithOffsets,
    729     _UTF7FromUnicodeWithOffsets,
    730     NULL,
    731 
    732     NULL,
    733     _UTF7GetName,
    734     NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
    735     NULL,
    736     ucnv_getCompleteUnicodeSet
    737 };
    738 
    739 static const UConverterStaticData _UTF7StaticData={
    740     sizeof(UConverterStaticData),
    741     "UTF-7",
    742     0, /* TODO CCSID for UTF-7 */
    743     UCNV_IBM, UCNV_UTF7,
    744     1, 4,
    745     { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
    746     FALSE, FALSE,
    747     0,
    748     0,
    749     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    750 };
    751 
    752 const UConverterSharedData _UTF7Data={
    753     sizeof(UConverterSharedData), ~((uint32_t)0),
    754     NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,
    755     0
    756 };
    757 
    758 /* IMAP mailbox name encoding ----------------------------------------------- */
    759 
    760 /*
    761  * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
    762  * http://www.ietf.org/rfc/rfc2060.txt
    763  *
    764  * 5.1.3.  Mailbox International Naming Convention
    765  *
    766  * By convention, international mailbox names are specified using a
    767  * modified version of the UTF-7 encoding described in [UTF-7].  The
    768  * purpose of these modifications is to correct the following problems
    769  * with UTF-7:
    770  *
    771  *    1) UTF-7 uses the "+" character for shifting; this conflicts with
    772  *       the common use of "+" in mailbox names, in particular USENET
    773  *       newsgroup names.
    774  *
    775  *    2) UTF-7's encoding is BASE64 which uses the "/" character; this
    776  *       conflicts with the use of "/" as a popular hierarchy delimiter.
    777  *
    778  *    3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
    779  *       the use of "\" as a popular hierarchy delimiter.
    780  *
    781  *    4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
    782  *       the use of "~" in some servers as a home directory indicator.
    783  *
    784  *    5) UTF-7 permits multiple alternate forms to represent the same
    785  *       string; in particular, printable US-ASCII chararacters can be
    786  *       represented in encoded form.
    787  *
    788  * In modified UTF-7, printable US-ASCII characters except for "&"
    789  * represent themselves; that is, characters with octet values 0x20-0x25
    790  * and 0x27-0x7e.  The character "&" (0x26) is represented by the two-
    791  * octet sequence "&-".
    792  *
    793  * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
    794  * Unicode 16-bit octets) are represented in modified BASE64, with a
    795  * further modification from [UTF-7] that "," is used instead of "/".
    796  * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
    797  * character which can represent itself.
    798  *
    799  * "&" is used to shift to modified BASE64 and "-" to shift back to US-
    800  * ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that
    801  * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
    802  * ").
    803  *
    804  * For example, here is a mailbox name which mixes English, Japanese,
    805  * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
    806  */
    807 
    808 /*
    809  * Tests for US-ASCII characters belonging to character classes
    810  * defined in UTF-7.
    811  *
    812  * Set D (directly encoded characters) consists of the following
    813  * characters: the upper and lower case letters A through Z
    814  * and a through z, the 10 digits 0-9, and the following nine special
    815  * characters (note that "+" and "=" are omitted):
    816  *     '(),-./:?
    817  *
    818  * Set O (optional direct characters) consists of the following
    819  * characters (note that "\" and "~" are omitted):
    820  *     !"#$%&*;<=>@[]^_`{|}
    821  *
    822  * According to the rules in RFC 2152, the byte values for the following
    823  * US-ASCII characters are not used in UTF-7 and are therefore illegal:
    824  * - all C0 control codes except for CR LF TAB
    825  * - BACKSLASH
    826  * - TILDE
    827  * - DEL
    828  * - all codes beyond US-ASCII, i.e. all >127
    829  */
    830 
    831 /* uses '&' not '+' to start a base64 sequence */
    832 #define AMPERSAND 0x26
    833 #define COMMA 0x2c
    834 #define SLASH 0x2f
    835 
    836 /* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
    837 #define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
    838 
    839 /* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
    840 #define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
    841 
    842 #define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
    843 #define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
    844 
    845 /*
    846  * converter status values:
    847  *
    848  * toUnicodeStatus:
    849  *     24 inDirectMode (boolean)
    850  * 23..16 base64Counter (-1..7)
    851  * 15..0  bits (up to 14 bits incoming base64)
    852  *
    853  * fromUnicodeStatus:
    854  *     24 inDirectMode (boolean)
    855  * 23..16 base64Counter (0..2)
    856  *  7..0  bits (6 bits outgoing base64)
    857  *
    858  * ignore bits 31..25
    859  */
    860 
    861 static void
    862 _IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
    863                           UErrorCode *pErrorCode) {
    864     UConverter *cnv;
    865     const uint8_t *source, *sourceLimit;
    866     UChar *target;
    867     const UChar *targetLimit;
    868     int32_t *offsets;
    869 
    870     uint8_t *bytes;
    871     uint8_t byteIndex;
    872 
    873     int32_t length, targetCapacity;
    874 
    875     /* UTF-7 state */
    876     uint16_t bits;
    877     int8_t base64Counter;
    878     UBool inDirectMode;
    879 
    880     int8_t base64Value;
    881 
    882     int32_t sourceIndex, nextSourceIndex;
    883 
    884     UChar c;
    885     uint8_t b;
    886 
    887     /* set up the local pointers */
    888     cnv=pArgs->converter;
    889 
    890     source=(const uint8_t *)pArgs->source;
    891     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    892     target=pArgs->target;
    893     targetLimit=pArgs->targetLimit;
    894     offsets=pArgs->offsets;
    895     /* get the state machine state */
    896     {
    897         uint32_t status=cnv->toUnicodeStatus;
    898         inDirectMode=(UBool)((status>>24)&1);
    899         base64Counter=(int8_t)(status>>16);
    900         bits=(uint16_t)status;
    901     }
    902     bytes=cnv->toUBytes;
    903     byteIndex=cnv->toULength;
    904 
    905     /* sourceIndex=-1 if the current character began in the previous buffer */
    906     sourceIndex=byteIndex==0 ? 0 : -1;
    907     nextSourceIndex=0;
    908 
    909     if(inDirectMode) {
    910 directMode:
    911         /*
    912          * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
    913          * with their US-ASCII byte values.
    914          * An ampersand starts Unicode (or "escape") Mode.
    915          *
    916          * In Direct Mode, only the sourceIndex is used.
    917          */
    918         byteIndex=0;
    919         length=(int32_t)(sourceLimit-source);
    920         targetCapacity=(int32_t)(targetLimit-target);
    921         if(length>targetCapacity) {
    922             length=targetCapacity;
    923         }
    924         while(length>0) {
    925             b=*source++;
    926             if(!isLegalIMAP(b)) {
    927                 /* illegal */
    928                 bytes[0]=b;
    929                 byteIndex=1;
    930                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    931                 break;
    932             } else if(b!=AMPERSAND) {
    933                 /* write directly encoded character */
    934                 *target++=b;
    935                 if(offsets!=NULL) {
    936                     *offsets++=sourceIndex++;
    937                 }
    938             } else /* AMPERSAND */ {
    939                 /* switch to Unicode mode */
    940                 nextSourceIndex=++sourceIndex;
    941                 inDirectMode=FALSE;
    942                 byteIndex=0;
    943                 bits=0;
    944                 base64Counter=-1;
    945                 goto unicodeMode;
    946             }
    947             --length;
    948         }
    949         if(source<sourceLimit && target>=targetLimit) {
    950             /* target is full */
    951             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    952         }
    953     } else {
    954 unicodeMode:
    955         /*
    956          * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
    957          * The base64 sequence ends with any character that is not in the base64 alphabet.
    958          * A terminating minus sign is consumed.
    959          * US-ASCII must not be base64-ed.
    960          *
    961          * In Unicode Mode, the sourceIndex has the index to the start of the current
    962          * base64 bytes, while nextSourceIndex is precisely parallel to source,
    963          * keeping the index to the following byte.
    964          * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
    965          */
    966         while(source<sourceLimit) {
    967             if(target<targetLimit) {
    968                 bytes[byteIndex++]=b=*source++;
    969                 ++nextSourceIndex;
    970                 if(b>0x7e) {
    971                     /* illegal - test other illegal US-ASCII values by base64Value==-3 */
    972                     inDirectMode=TRUE;
    973                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    974                     break;
    975                 } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
    976                     /* collect base64 bytes into UChars */
    977                     switch(base64Counter) {
    978                     case -1: /* -1 is immediately after the & */
    979                     case 0:
    980                         bits=base64Value;
    981                         base64Counter=1;
    982                         break;
    983                     case 1:
    984                     case 3:
    985                     case 4:
    986                     case 6:
    987                         bits=(uint16_t)((bits<<6)|base64Value);
    988                         ++base64Counter;
    989                         break;
    990                     case 2:
    991                         c=(UChar)((bits<<4)|(base64Value>>2));
    992                         if(isLegalIMAP(c)) {
    993                             /* illegal */
    994                             inDirectMode=TRUE;
    995                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    996                             goto endloop;
    997                         }
    998                         *target++=c;
    999                         if(offsets!=NULL) {
   1000                             *offsets++=sourceIndex;
   1001                             sourceIndex=nextSourceIndex-1;
   1002                         }
   1003                         bytes[0]=b; /* keep this byte in case an error occurs */
   1004                         byteIndex=1;
   1005                         bits=(uint16_t)(base64Value&3);
   1006                         base64Counter=3;
   1007                         break;
   1008                     case 5:
   1009                         c=(UChar)((bits<<2)|(base64Value>>4));
   1010                         if(isLegalIMAP(c)) {
   1011                             /* illegal */
   1012                             inDirectMode=TRUE;
   1013                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1014                             goto endloop;
   1015                         }
   1016                         *target++=c;
   1017                         if(offsets!=NULL) {
   1018                             *offsets++=sourceIndex;
   1019                             sourceIndex=nextSourceIndex-1;
   1020                         }
   1021                         bytes[0]=b; /* keep this byte in case an error occurs */
   1022                         byteIndex=1;
   1023                         bits=(uint16_t)(base64Value&15);
   1024                         base64Counter=6;
   1025                         break;
   1026                     case 7:
   1027                         c=(UChar)((bits<<6)|base64Value);
   1028                         if(isLegalIMAP(c)) {
   1029                             /* illegal */
   1030                             inDirectMode=TRUE;
   1031                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1032                             goto endloop;
   1033                         }
   1034                         *target++=c;
   1035                         if(offsets!=NULL) {
   1036                             *offsets++=sourceIndex;
   1037                             sourceIndex=nextSourceIndex;
   1038                         }
   1039                         byteIndex=0;
   1040                         bits=0;
   1041                         base64Counter=0;
   1042                         break;
   1043                     default:
   1044                         /* will never occur */
   1045                         break;
   1046                     }
   1047                 } else if(base64Value==-2) {
   1048                     /* minus sign terminates the base64 sequence */
   1049                     inDirectMode=TRUE;
   1050                     if(base64Counter==-1) {
   1051                         /* &- i.e. a minus immediately following an ampersand */
   1052                         *target++=AMPERSAND;
   1053                         if(offsets!=NULL) {
   1054                             *offsets++=sourceIndex-1;
   1055                         }
   1056                     } else {
   1057                         /* absorb the minus and leave the Unicode Mode */
   1058                         if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
   1059                             /* bits are illegally left over, a UChar is incomplete */
   1060                             /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
   1061                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1062                             break;
   1063                         }
   1064                     }
   1065                     sourceIndex=nextSourceIndex;
   1066                     goto directMode;
   1067                 } else {
   1068                     if(base64Counter==-1) {
   1069                         /* illegal: & immediately followed by something other than base64 or minus sign */
   1070                         /* include the ampersand in the reported sequence */
   1071                         --sourceIndex;
   1072                         bytes[0]=AMPERSAND;
   1073                         bytes[1]=b;
   1074                         byteIndex=2;
   1075                     }
   1076                     /* base64Value==-1 for characters that are illegal only in Unicode mode */
   1077                     /* base64Value==-3 for illegal characters */
   1078                     /* illegal */
   1079                     inDirectMode=TRUE;
   1080                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1081                     break;
   1082                 }
   1083             } else {
   1084                 /* target is full */
   1085                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1086                 break;
   1087             }
   1088         }
   1089     }
   1090 endloop:
   1091 
   1092     /*
   1093      * the end of the input stream and detection of truncated input
   1094      * are handled by the framework, but here we must check if we are in Unicode
   1095      * mode and byteIndex==0 because we must end in direct mode
   1096      *
   1097      * conditions:
   1098      *   successful
   1099      *   in Unicode mode and byteIndex==0
   1100      *   end of input and no truncated input
   1101      */
   1102     if( U_SUCCESS(*pErrorCode) &&
   1103         !inDirectMode && byteIndex==0 &&
   1104         pArgs->flush && source>=sourceLimit
   1105     ) {
   1106         if(base64Counter==-1) {
   1107             /* & at the very end of the input */
   1108             /* make the ampersand the reported sequence */
   1109             bytes[0]=AMPERSAND;
   1110             byteIndex=1;
   1111         }
   1112         /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
   1113 
   1114         inDirectMode=TRUE; /* avoid looping */
   1115         *pErrorCode=U_TRUNCATED_CHAR_FOUND;
   1116     }
   1117 
   1118     /* set the converter state back into UConverter */
   1119     cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
   1120     cnv->toULength=byteIndex;
   1121 
   1122     /* write back the updated pointers */
   1123     pArgs->source=(const char *)source;
   1124     pArgs->target=target;
   1125     pArgs->offsets=offsets;
   1126     return;
   1127 }
   1128 
   1129 static void
   1130 _IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
   1131                             UErrorCode *pErrorCode) {
   1132     UConverter *cnv;
   1133     const UChar *source, *sourceLimit;
   1134     uint8_t *target, *targetLimit;
   1135     int32_t *offsets;
   1136 
   1137     int32_t length, targetCapacity, sourceIndex;
   1138     UChar c;
   1139     uint8_t b;
   1140 
   1141     /* UTF-7 state */
   1142     uint8_t bits;
   1143     int8_t base64Counter;
   1144     UBool inDirectMode;
   1145 
   1146     /* set up the local pointers */
   1147     cnv=pArgs->converter;
   1148 
   1149     /* set up the local pointers */
   1150     source=pArgs->source;
   1151     sourceLimit=pArgs->sourceLimit;
   1152     target=(uint8_t *)pArgs->target;
   1153     targetLimit=(uint8_t *)pArgs->targetLimit;
   1154     offsets=pArgs->offsets;
   1155 
   1156     /* get the state machine state */
   1157     {
   1158         uint32_t status=cnv->fromUnicodeStatus;
   1159         inDirectMode=(UBool)((status>>24)&1);
   1160         base64Counter=(int8_t)(status>>16);
   1161         bits=(uint8_t)status;
   1162     }
   1163 
   1164     /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
   1165     sourceIndex=0;
   1166 
   1167     if(inDirectMode) {
   1168 directMode:
   1169         length=(int32_t)(sourceLimit-source);
   1170         targetCapacity=(int32_t)(targetLimit-target);
   1171         if(length>targetCapacity) {
   1172             length=targetCapacity;
   1173         }
   1174         while(length>0) {
   1175             c=*source++;
   1176             /* encode 0x20..0x7e except '&' directly */
   1177             if(inSetDIMAP(c)) {
   1178                 /* encode directly */
   1179                 *target++=(uint8_t)c;
   1180                 if(offsets!=NULL) {
   1181                     *offsets++=sourceIndex++;
   1182                 }
   1183             } else if(c==AMPERSAND) {
   1184                 /* output &- for & */
   1185                 *target++=AMPERSAND;
   1186                 if(target<targetLimit) {
   1187                     *target++=MINUS;
   1188                     if(offsets!=NULL) {
   1189                         *offsets++=sourceIndex;
   1190                         *offsets++=sourceIndex++;
   1191                     }
   1192                     /* realign length and targetCapacity */
   1193                     goto directMode;
   1194                 } else {
   1195                     if(offsets!=NULL) {
   1196                         *offsets++=sourceIndex++;
   1197                     }
   1198                     cnv->charErrorBuffer[0]=MINUS;
   1199                     cnv->charErrorBufferLength=1;
   1200                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1201                     break;
   1202                 }
   1203             } else {
   1204                 /* un-read this character and switch to Unicode Mode */
   1205                 --source;
   1206                 *target++=AMPERSAND;
   1207                 if(offsets!=NULL) {
   1208                     *offsets++=sourceIndex;
   1209                 }
   1210                 inDirectMode=FALSE;
   1211                 base64Counter=0;
   1212                 goto unicodeMode;
   1213             }
   1214             --length;
   1215         }
   1216         if(source<sourceLimit && target>=targetLimit) {
   1217             /* target is full */
   1218             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1219         }
   1220     } else {
   1221 unicodeMode:
   1222         while(source<sourceLimit) {
   1223             if(target<targetLimit) {
   1224                 c=*source++;
   1225                 if(isLegalIMAP(c)) {
   1226                     /* encode directly */
   1227                     inDirectMode=TRUE;
   1228 
   1229                     /* trick: back out this character to make this easier */
   1230                     --source;
   1231 
   1232                     /* terminate the base64 sequence */
   1233                     if(base64Counter!=0) {
   1234                         /* write remaining bits for the previous character */
   1235                         *target++=TO_BASE64_IMAP(bits);
   1236                         if(offsets!=NULL) {
   1237                             *offsets++=sourceIndex-1;
   1238                         }
   1239                     }
   1240                     /* need to terminate with a minus */
   1241                     if(target<targetLimit) {
   1242                         *target++=MINUS;
   1243                         if(offsets!=NULL) {
   1244                             *offsets++=sourceIndex-1;
   1245                         }
   1246                     } else {
   1247                         cnv->charErrorBuffer[0]=MINUS;
   1248                         cnv->charErrorBufferLength=1;
   1249                         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1250                         break;
   1251                     }
   1252                     goto directMode;
   1253                 } else {
   1254                     /*
   1255                      * base64 this character:
   1256                      * Output 2 or 3 base64 bytes for the remaining bits of the previous character
   1257                      * and the bits of this character, each implicitly in UTF-16BE.
   1258                      *
   1259                      * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
   1260                      * character to the next. The actual 2 or 4 bits are shifted to the left edge
   1261                      * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
   1262                      */
   1263                     switch(base64Counter) {
   1264                     case 0:
   1265                         b=(uint8_t)(c>>10);
   1266                         *target++=TO_BASE64_IMAP(b);
   1267                         if(target<targetLimit) {
   1268                             b=(uint8_t)((c>>4)&0x3f);
   1269                             *target++=TO_BASE64_IMAP(b);
   1270                             if(offsets!=NULL) {
   1271                                 *offsets++=sourceIndex;
   1272                                 *offsets++=sourceIndex++;
   1273                             }
   1274                         } else {
   1275                             if(offsets!=NULL) {
   1276                                 *offsets++=sourceIndex++;
   1277                             }
   1278                             b=(uint8_t)((c>>4)&0x3f);
   1279                             cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
   1280                             cnv->charErrorBufferLength=1;
   1281                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1282                         }
   1283                         bits=(uint8_t)((c&15)<<2);
   1284                         base64Counter=1;
   1285                         break;
   1286                     case 1:
   1287                         b=(uint8_t)(bits|(c>>14));
   1288                         *target++=TO_BASE64_IMAP(b);
   1289                         if(target<targetLimit) {
   1290                             b=(uint8_t)((c>>8)&0x3f);
   1291                             *target++=TO_BASE64_IMAP(b);
   1292                             if(target<targetLimit) {
   1293                                 b=(uint8_t)((c>>2)&0x3f);
   1294                                 *target++=TO_BASE64_IMAP(b);
   1295                                 if(offsets!=NULL) {
   1296                                     *offsets++=sourceIndex;
   1297                                     *offsets++=sourceIndex;
   1298                                     *offsets++=sourceIndex++;
   1299                                 }
   1300                             } else {
   1301                                 if(offsets!=NULL) {
   1302                                     *offsets++=sourceIndex;
   1303                                     *offsets++=sourceIndex++;
   1304                                 }
   1305                                 b=(uint8_t)((c>>2)&0x3f);
   1306                                 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
   1307                                 cnv->charErrorBufferLength=1;
   1308                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1309                             }
   1310                         } else {
   1311                             if(offsets!=NULL) {
   1312                                 *offsets++=sourceIndex++;
   1313                             }
   1314                             b=(uint8_t)((c>>8)&0x3f);
   1315                             cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
   1316                             b=(uint8_t)((c>>2)&0x3f);
   1317                             cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
   1318                             cnv->charErrorBufferLength=2;
   1319                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1320                         }
   1321                         bits=(uint8_t)((c&3)<<4);
   1322                         base64Counter=2;
   1323                         break;
   1324                     case 2:
   1325                         b=(uint8_t)(bits|(c>>12));
   1326                         *target++=TO_BASE64_IMAP(b);
   1327                         if(target<targetLimit) {
   1328                             b=(uint8_t)((c>>6)&0x3f);
   1329                             *target++=TO_BASE64_IMAP(b);
   1330                             if(target<targetLimit) {
   1331                                 b=(uint8_t)(c&0x3f);
   1332                                 *target++=TO_BASE64_IMAP(b);
   1333                                 if(offsets!=NULL) {
   1334                                     *offsets++=sourceIndex;
   1335                                     *offsets++=sourceIndex;
   1336                                     *offsets++=sourceIndex++;
   1337                                 }
   1338                             } else {
   1339                                 if(offsets!=NULL) {
   1340                                     *offsets++=sourceIndex;
   1341                                     *offsets++=sourceIndex++;
   1342                                 }
   1343                                 b=(uint8_t)(c&0x3f);
   1344                                 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
   1345                                 cnv->charErrorBufferLength=1;
   1346                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1347                             }
   1348                         } else {
   1349                             if(offsets!=NULL) {
   1350                                 *offsets++=sourceIndex++;
   1351                             }
   1352                             b=(uint8_t)((c>>6)&0x3f);
   1353                             cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
   1354                             b=(uint8_t)(c&0x3f);
   1355                             cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
   1356                             cnv->charErrorBufferLength=2;
   1357                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1358                         }
   1359                         bits=0;
   1360                         base64Counter=0;
   1361                         break;
   1362                     default:
   1363                         /* will never occur */
   1364                         break;
   1365                     }
   1366                 }
   1367             } else {
   1368                 /* target is full */
   1369                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1370                 break;
   1371             }
   1372         }
   1373     }
   1374 
   1375     if(pArgs->flush && source>=sourceLimit) {
   1376         /* flush remaining bits to the target */
   1377         if(!inDirectMode) {
   1378             if(base64Counter!=0) {
   1379                 if(target<targetLimit) {
   1380                     *target++=TO_BASE64_IMAP(bits);
   1381                     if(offsets!=NULL) {
   1382                         *offsets++=sourceIndex-1;
   1383                     }
   1384                 } else {
   1385                     cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
   1386                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1387                 }
   1388             }
   1389             /* need to terminate with a minus */
   1390             if(target<targetLimit) {
   1391                 *target++=MINUS;
   1392                 if(offsets!=NULL) {
   1393                     *offsets++=sourceIndex-1;
   1394                 }
   1395             } else {
   1396                 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
   1397                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1398             }
   1399         }
   1400         /* reset the state for the next conversion */
   1401         cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
   1402     } else {
   1403         /* set the converter state back into UConverter */
   1404         cnv->fromUnicodeStatus=
   1405             (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
   1406             ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
   1407     }
   1408 
   1409     /* write back the updated pointers */
   1410     pArgs->source=source;
   1411     pArgs->target=(char *)target;
   1412     pArgs->offsets=offsets;
   1413     return;
   1414 }
   1415 
   1416 static const UConverterImpl _IMAPImpl={
   1417     UCNV_IMAP_MAILBOX,
   1418 
   1419     NULL,
   1420     NULL,
   1421 
   1422     _UTF7Open,
   1423     NULL,
   1424     _UTF7Reset,
   1425 
   1426     _IMAPToUnicodeWithOffsets,
   1427     _IMAPToUnicodeWithOffsets,
   1428     _IMAPFromUnicodeWithOffsets,
   1429     _IMAPFromUnicodeWithOffsets,
   1430     NULL,
   1431 
   1432     NULL,
   1433     NULL,
   1434     NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
   1435     NULL,
   1436     ucnv_getCompleteUnicodeSet
   1437 };
   1438 
   1439 static const UConverterStaticData _IMAPStaticData={
   1440     sizeof(UConverterStaticData),
   1441     "IMAP-mailbox-name",
   1442     0, /* TODO CCSID for IMAP-mailbox-name */
   1443     UCNV_IBM, UCNV_IMAP_MAILBOX,
   1444     1, 4,
   1445     { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
   1446     FALSE, FALSE,
   1447     0,
   1448     0,
   1449     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   1450 };
   1451 
   1452 const UConverterSharedData _IMAPData={
   1453     sizeof(UConverterSharedData), ~((uint32_t)0),
   1454     NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
   1455     0
   1456 };
   1457 
   1458 #endif
   1459