Home | History | Annotate | Download | only in unicode
      1 /* ================================================================ */
      2 /*
      3 File:   ConvertUTF7.c
      4 Author: David B. Goldsmith
      5 Copyright (C) 1994, 1996 IBM Corporation All rights reserved.
      6 Revisions: Header update only July, 2001.
      7 
      8 This code is copyrighted. Under the copyright laws, this code may not
      9 be copied, in whole or part, without prior written consent of IBM Corporation.
     10 
     11 IBM Corporation grants the right to use this code as long as this ENTIRE
     12 copyright notice is reproduced in the code.  The code is provided
     13 AS-IS, AND IBM CORPORATION DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR
     14 IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF
     15 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT
     16 WILL IBM CORPORATION BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
     17 WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
     18 INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
     19 LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
     20 IF IBM CORPORATION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
     21 BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
     22 LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
     23 LIMITATION MAY NOT APPLY TO YOU.
     24 
     25 RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
     26 government is subject to restrictions as set forth in subparagraph
     27 (c)(l)(ii) of the Rights in Technical Data and Computer Software
     28 clause at DFARS 252.227-7013 and FAR 52.227-19.
     29 
     30 This code may be protected by one or more U.S. and International
     31 Patents.
     32 
     33 */
     34 
     35 #include "CVTUTF7.H"
     36 
     37 static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
     38 static short invbase64[128];
     39 
     40 static char direct[] =
     41 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?";
     42 static char optional[] = "!\"#$%&*;<=>@[]^_`{|}";
     43 static char spaces[] = " \011\015\012";		/* space, tab, return, line feed */
     44 static char mustshiftsafe[128];
     45 static char mustshiftopt[128];
     46 
     47 static int needtables = 1;
     48 
     49 #define SHIFT_IN '+'
     50 #define SHIFT_OUT '-'
     51 
     52 static void
     53 tabinit()
     54 {
     55 	int i, limit;
     56 
     57 	for (i = 0; i < 128; ++i)
     58 	{
     59 		mustshiftopt[i] = mustshiftsafe[i] = 1;
     60 		invbase64[i] = -1;
     61 	}
     62 	limit = strlen(direct);
     63 	for (i = 0; i < limit; ++i)
     64 		mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0;
     65 	limit = strlen(spaces);
     66 	for (i = 0; i < limit; ++i)
     67 		mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0;
     68 	limit = strlen(optional);
     69 	for (i = 0; i < limit; ++i)
     70 		mustshiftopt[optional[i]] = 0;
     71 	limit = strlen(base64);
     72 	for (i = 0; i < limit; ++i)
     73 		invbase64[base64[i]] = i;
     74 
     75 	needtables = 0;
     76 }
     77 
     78 #define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0
     79 #define BITS_IN_BUFFER bufferbits
     80 #define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) )
     81 #define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp)
     82 #define TARGETCHECK  {if (target >= targetEnd) {result = targetExhausted; break;}}
     83 
     84 ConversionResult ConvertUCS2toUTF7(
     85                 UCS2** sourceStart, UCS2* sourceEnd,
     86                 char** targetStart, char* targetEnd,
     87                 int optional, int verbose)
     88 {
     89 	ConversionResult result = ok;
     90 	DECLARE_BIT_BUFFER;
     91 	int shifted = 0, needshift = 0, done = 0;
     92 	register UCS2 *source = *sourceStart;
     93 	register char *target = *targetStart;
     94 	char *mustshift;
     95 
     96 	if (needtables)
     97 		tabinit();
     98 
     99 	if (optional)
    100 		mustshift = mustshiftopt;
    101 	else
    102 		mustshift = mustshiftsafe;
    103 
    104 	do
    105 	{
    106 		register UCS2 r;
    107 
    108 		if (!(done = (source >= sourceEnd)))
    109 			r = *source++;
    110 		needshift = (!done && ((r > 0x7f) || mustshift[r]));
    111 
    112 		if (needshift && !shifted)
    113 		{
    114 			TARGETCHECK;
    115 			*target++ = SHIFT_IN;
    116 			/* Special case handling of the SHIFT_IN character */
    117 			if (r == (UCS2)SHIFT_IN) {
    118 				TARGETCHECK;
    119 				*target++ = SHIFT_OUT;
    120 			}
    121 			else
    122 				shifted = 1;
    123 		}
    124 
    125 		if (shifted)
    126 		{
    127 			/* Either write the character to the bit buffer, or pad
    128 			   the bit buffer out to a full base64 character.
    129 			 */
    130 			if (needshift)
    131 				WRITE_N_BITS(r, 16);
    132 			else
    133 				WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6);
    134 
    135 			/* Flush out as many full base64 characters as possible
    136 			   from the bit buffer.
    137 			 */
    138 			while ((target < targetEnd) && BITS_IN_BUFFER >= 6)
    139 			{
    140 				*target++ = base64[READ_N_BITS(6)];
    141 			}
    142 
    143 			if (BITS_IN_BUFFER >= 6)
    144 				TARGETCHECK;
    145 
    146 			if (!needshift)
    147 			{
    148 				/* Write the explicit shift out character if
    149 				   1) The caller has requested we always do it, or
    150 				   2) The directly encoded character is in the
    151 				   base64 set, or
    152 				   3) The directly encoded character is SHIFT_OUT.
    153 				 */
    154 				if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT)))
    155 				{
    156 					TARGETCHECK;
    157 					*target++ = SHIFT_OUT;
    158 				}
    159 				shifted = 0;
    160 			}
    161 		}
    162 
    163 		/* The character can be directly encoded as ASCII. */
    164 		if (!needshift && !done)
    165 		{
    166 			TARGETCHECK;
    167 			*target++ = (char) r;
    168 		}
    169 
    170 	}
    171 	while (!done);
    172 
    173     *sourceStart = source;
    174     *targetStart = target;
    175     return result;
    176 }
    177 
    178 ConversionResult ConvertUTF7toUCS2(
    179                 char** sourceStart, char* sourceEnd,
    180                 UCS2** targetStart, UCS2* targetEnd)
    181 {
    182 	ConversionResult result = ok;
    183 	DECLARE_BIT_BUFFER;
    184 	int shifted = 0, first = 0, wroteone = 0, base64EOF, base64value, done;
    185 	unsigned int c, prevc;
    186 	unsigned long junk;
    187 	register char *source = *sourceStart;
    188 	register UCS2 *target = *targetStart;
    189 
    190 	if (needtables)
    191 		tabinit();
    192 
    193 	do
    194 	{
    195 		/* read an ASCII character c */
    196 		if (!(done = (source >= sourceEnd)))
    197 			c = *source++;
    198 		if (shifted)
    199 		{
    200 			/* We're done with a base64 string if we hit EOF, it's not a valid
    201 			   ASCII character, or it's not in the base64 set.
    202 			 */
    203 			base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0;
    204 			if (base64EOF)
    205 			{
    206 				shifted = 0;
    207 				/* If the character causing us to drop out was SHIFT_IN or
    208 				   SHIFT_OUT, it may be a special escape for SHIFT_IN. The
    209 				   test for SHIFT_IN is not necessary, but allows an alternate
    210 				   form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This
    211 				   only works for some values of SHIFT_IN.
    212 				 */
    213 				if (!done && (c == SHIFT_IN || c == SHIFT_OUT))
    214 				{
    215 					/* get another character c */
    216 					prevc = c;
    217 					if (!(done = (source >= sourceEnd)))
    218 						c = *source++;
    219 					/* If no base64 characters were encountered, and the
    220 					   character terminating the shift sequence was
    221 					   SHIFT_OUT, then it's a special escape for SHIFT_IN.
    222 					 */
    223 					if (first && prevc == SHIFT_OUT)
    224 					{
    225 						/* write SHIFT_IN unicode */
    226 						TARGETCHECK;
    227 						*target++ = (UCS2)SHIFT_IN;
    228 					}
    229 					else if (!wroteone)
    230 					{
    231 						result = sourceCorrupt;
    232 						/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
    233 					}
    234 				}
    235 				else if (!wroteone)
    236 				{
    237 					result = sourceCorrupt;
    238 					/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
    239 				}
    240 			}
    241 			else
    242 			{
    243 				/* Add another 6 bits of base64 to the bit buffer. */
    244 				WRITE_N_BITS(base64value, 6);
    245 				first = 0;
    246 			}
    247 
    248 			/* Extract as many full 16 bit characters as possible from the
    249 			   bit buffer.
    250 			 */
    251 			while (BITS_IN_BUFFER >= 16 && (target < targetEnd))
    252 			{
    253 				/* write a unicode */
    254 				*target++ = READ_N_BITS(16);
    255 				wroteone = 1;
    256 			}
    257 
    258 			if (BITS_IN_BUFFER >= 16)
    259 				TARGETCHECK;
    260 
    261 			if (base64EOF)
    262 			{
    263 				junk = READ_N_BITS(BITS_IN_BUFFER);
    264 				if (junk)
    265 				{
    266 					result = sourceCorrupt;
    267 					/* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */;
    268 				}
    269 			}
    270 		}
    271 
    272 		if (!shifted && !done)
    273 		{
    274 			if (c == SHIFT_IN)
    275 			{
    276 				shifted = 1;
    277 				first = 1;
    278 				wroteone = 0;
    279 			}
    280 			else
    281 			{
    282 				/* It must be a directly encoded character. */
    283 				if (c > 0x7f)
    284 				{
    285 					result = sourceCorrupt;
    286 					/* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */;
    287 				}
    288 				/* write a unicode */
    289 				TARGETCHECK;
    290 				*target++ = c;
    291 			}
    292 		}
    293 	}
    294 	while (!done);
    295 
    296     *sourceStart = source;
    297     *targetStart = target;
    298     return result;
    299 }
    300