1 /* ================================================================ */ 2 /* 3 File: ConvertUTF7.c 4 Author: David B. Goldsmith 5 Copyright (C) 1994, 1996 IBM Corporation All rights reserved. 6 Revisions: Header update only July, 2001. 7 8 This code is copyrighted. Under the copyright laws, this code may not 9 be copied, in whole or part, without prior written consent of IBM Corporation. 10 11 IBM Corporation grants the right to use this code as long as this ENTIRE 12 copyright notice is reproduced in the code. The code is provided 13 AS-IS, AND IBM CORPORATION DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR 14 IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF 15 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 16 WILL IBM CORPORATION BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, 17 WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS 18 INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY 19 LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN 20 IF IBM CORPORATION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 21 BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF 22 LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE 23 LIMITATION MAY NOT APPLY TO YOU. 24 25 RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the 26 government is subject to restrictions as set forth in subparagraph 27 (c)(l)(ii) of the Rights in Technical Data and Computer Software 28 clause at DFARS 252.227-7013 and FAR 52.227-19. 29 30 This code may be protected by one or more U.S. and International 31 Patents. 32 33 */ 34 35 #include "CVTUTF7.H" 36 37 static char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 38 static short invbase64[128]; 39 40 static char direct[] = 41 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"; 42 static char optional[] = "!\"#$%&*;<=>@[]^_`{|}"; 43 static char spaces[] = " \011\015\012"; /* space, tab, return, line feed */ 44 static char mustshiftsafe[128]; 45 static char mustshiftopt[128]; 46 47 static int needtables = 1; 48 49 #define SHIFT_IN '+' 50 #define SHIFT_OUT '-' 51 52 static void 53 tabinit() 54 { 55 int i, limit; 56 57 for (i = 0; i < 128; ++i) 58 { 59 mustshiftopt[i] = mustshiftsafe[i] = 1; 60 invbase64[i] = -1; 61 } 62 limit = strlen(direct); 63 for (i = 0; i < limit; ++i) 64 mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0; 65 limit = strlen(spaces); 66 for (i = 0; i < limit; ++i) 67 mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0; 68 limit = strlen(optional); 69 for (i = 0; i < limit; ++i) 70 mustshiftopt[optional[i]] = 0; 71 limit = strlen(base64); 72 for (i = 0; i < limit; ++i) 73 invbase64[base64[i]] = i; 74 75 needtables = 0; 76 } 77 78 #define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0 79 #define BITS_IN_BUFFER bufferbits 80 #define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) ) 81 #define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp) 82 #define TARGETCHECK {if (target >= targetEnd) {result = targetExhausted; break;}} 83 84 ConversionResult ConvertUCS2toUTF7( 85 UCS2** sourceStart, UCS2* sourceEnd, 86 char** targetStart, char* targetEnd, 87 int optional, int verbose) 88 { 89 ConversionResult result = ok; 90 DECLARE_BIT_BUFFER; 91 int shifted = 0, needshift = 0, done = 0; 92 register UCS2 *source = *sourceStart; 93 register char *target = *targetStart; 94 char *mustshift; 95 96 if (needtables) 97 tabinit(); 98 99 if (optional) 100 mustshift = mustshiftopt; 101 else 102 mustshift = mustshiftsafe; 103 104 do 105 { 106 register UCS2 r; 107 108 if (!(done = (source >= sourceEnd))) 109 r = *source++; 110 needshift = (!done && ((r > 0x7f) || mustshift[r])); 111 112 if (needshift && !shifted) 113 { 114 TARGETCHECK; 115 *target++ = SHIFT_IN; 116 /* Special case handling of the SHIFT_IN character */ 117 if (r == (UCS2)SHIFT_IN) { 118 TARGETCHECK; 119 *target++ = SHIFT_OUT; 120 } 121 else 122 shifted = 1; 123 } 124 125 if (shifted) 126 { 127 /* Either write the character to the bit buffer, or pad 128 the bit buffer out to a full base64 character. 129 */ 130 if (needshift) 131 WRITE_N_BITS(r, 16); 132 else 133 WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6); 134 135 /* Flush out as many full base64 characters as possible 136 from the bit buffer. 137 */ 138 while ((target < targetEnd) && BITS_IN_BUFFER >= 6) 139 { 140 *target++ = base64[READ_N_BITS(6)]; 141 } 142 143 if (BITS_IN_BUFFER >= 6) 144 TARGETCHECK; 145 146 if (!needshift) 147 { 148 /* Write the explicit shift out character if 149 1) The caller has requested we always do it, or 150 2) The directly encoded character is in the 151 base64 set, or 152 3) The directly encoded character is SHIFT_OUT. 153 */ 154 if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT))) 155 { 156 TARGETCHECK; 157 *target++ = SHIFT_OUT; 158 } 159 shifted = 0; 160 } 161 } 162 163 /* The character can be directly encoded as ASCII. */ 164 if (!needshift && !done) 165 { 166 TARGETCHECK; 167 *target++ = (char) r; 168 } 169 170 } 171 while (!done); 172 173 *sourceStart = source; 174 *targetStart = target; 175 return result; 176 } 177 178 ConversionResult ConvertUTF7toUCS2( 179 char** sourceStart, char* sourceEnd, 180 UCS2** targetStart, UCS2* targetEnd) 181 { 182 ConversionResult result = ok; 183 DECLARE_BIT_BUFFER; 184 int shifted = 0, first = 0, wroteone = 0, base64EOF, base64value, done; 185 unsigned int c, prevc; 186 unsigned long junk; 187 register char *source = *sourceStart; 188 register UCS2 *target = *targetStart; 189 190 if (needtables) 191 tabinit(); 192 193 do 194 { 195 /* read an ASCII character c */ 196 if (!(done = (source >= sourceEnd))) 197 c = *source++; 198 if (shifted) 199 { 200 /* We're done with a base64 string if we hit EOF, it's not a valid 201 ASCII character, or it's not in the base64 set. 202 */ 203 base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0; 204 if (base64EOF) 205 { 206 shifted = 0; 207 /* If the character causing us to drop out was SHIFT_IN or 208 SHIFT_OUT, it may be a special escape for SHIFT_IN. The 209 test for SHIFT_IN is not necessary, but allows an alternate 210 form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This 211 only works for some values of SHIFT_IN. 212 */ 213 if (!done && (c == SHIFT_IN || c == SHIFT_OUT)) 214 { 215 /* get another character c */ 216 prevc = c; 217 if (!(done = (source >= sourceEnd))) 218 c = *source++; 219 /* If no base64 characters were encountered, and the 220 character terminating the shift sequence was 221 SHIFT_OUT, then it's a special escape for SHIFT_IN. 222 */ 223 if (first && prevc == SHIFT_OUT) 224 { 225 /* write SHIFT_IN unicode */ 226 TARGETCHECK; 227 *target++ = (UCS2)SHIFT_IN; 228 } 229 else if (!wroteone) 230 { 231 result = sourceCorrupt; 232 /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */; 233 } 234 } 235 else if (!wroteone) 236 { 237 result = sourceCorrupt; 238 /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */; 239 } 240 } 241 else 242 { 243 /* Add another 6 bits of base64 to the bit buffer. */ 244 WRITE_N_BITS(base64value, 6); 245 first = 0; 246 } 247 248 /* Extract as many full 16 bit characters as possible from the 249 bit buffer. 250 */ 251 while (BITS_IN_BUFFER >= 16 && (target < targetEnd)) 252 { 253 /* write a unicode */ 254 *target++ = READ_N_BITS(16); 255 wroteone = 1; 256 } 257 258 if (BITS_IN_BUFFER >= 16) 259 TARGETCHECK; 260 261 if (base64EOF) 262 { 263 junk = READ_N_BITS(BITS_IN_BUFFER); 264 if (junk) 265 { 266 result = sourceCorrupt; 267 /* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */; 268 } 269 } 270 } 271 272 if (!shifted && !done) 273 { 274 if (c == SHIFT_IN) 275 { 276 shifted = 1; 277 first = 1; 278 wroteone = 0; 279 } 280 else 281 { 282 /* It must be a directly encoded character. */ 283 if (c > 0x7f) 284 { 285 result = sourceCorrupt; 286 /* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */; 287 } 288 /* write a unicode */ 289 TARGETCHECK; 290 *target++ = c; 291 } 292 } 293 } 294 while (!done); 295 296 *sourceStart = source; 297 *targetStart = target; 298 return result; 299 } 300