1 /* 2 ***************************************************************************** 3 * 4 * Copyright (C) 1998-2007, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ***************************************************************************** 8 * 9 * ucnv_err.c 10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode 11 * 12 * 13 * Change history: 14 * 15 * 06/29/2000 helena Major rewrite of the callback APIs. 16 */ 17 18 #include "unicode/utypes.h" 19 20 #if !UCONFIG_NO_CONVERSION 21 22 #include "unicode/ucnv_err.h" 23 #include "unicode/ucnv_cb.h" 24 #include "ucnv_cnv.h" 25 #include "cmemory.h" 26 #include "unicode/ucnv.h" 27 #include "ustrfmt.h" 28 29 #define VALUE_STRING_LENGTH 32 30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ 31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 32 #define UNICODE_U_CODEPOINT 0x0055 33 #define UNICODE_X_CODEPOINT 0x0058 34 #define UNICODE_RS_CODEPOINT 0x005C 35 #define UNICODE_U_LOW_CODEPOINT 0x0075 36 #define UNICODE_X_LOW_CODEPOINT 0x0078 37 #define UNICODE_AMP_CODEPOINT 0x0026 38 #define UNICODE_HASH_CODEPOINT 0x0023 39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B 40 #define UNICODE_PLUS_CODEPOINT 0x002B 41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B 42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D 43 #define UNICODE_SPACE_CODEPOINT 0x0020 44 #define UCNV_PRV_ESCAPE_ICU 0 45 #define UCNV_PRV_ESCAPE_C 'C' 46 #define UCNV_PRV_ESCAPE_XML_DEC 'D' 47 #define UCNV_PRV_ESCAPE_XML_HEX 'X' 48 #define UCNV_PRV_ESCAPE_JAVA 'J' 49 #define UCNV_PRV_ESCAPE_UNICODE 'U' 50 #define UCNV_PRV_ESCAPE_CSS2 'S' 51 #define UCNV_PRV_STOP_ON_ILLEGAL 'i' 52 53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ 54 U_CAPI void U_EXPORT2 55 UCNV_FROM_U_CALLBACK_STOP ( 56 const void *context, 57 UConverterFromUnicodeArgs *fromUArgs, 58 const UChar* codeUnits, 59 int32_t length, 60 UChar32 codePoint, 61 UConverterCallbackReason reason, 62 UErrorCode * err) 63 { 64 /* the caller must have set the error code accordingly */ 65 return; 66 } 67 68 69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ 70 U_CAPI void U_EXPORT2 71 UCNV_TO_U_CALLBACK_STOP ( 72 const void *context, 73 UConverterToUnicodeArgs *toUArgs, 74 const char* codePoints, 75 int32_t length, 76 UConverterCallbackReason reason, 77 UErrorCode * err) 78 { 79 /* the caller must have set the error code accordingly */ 80 return; 81 } 82 83 U_CAPI void U_EXPORT2 84 UCNV_FROM_U_CALLBACK_SKIP ( 85 const void *context, 86 UConverterFromUnicodeArgs *fromUArgs, 87 const UChar* codeUnits, 88 int32_t length, 89 UChar32 codePoint, 90 UConverterCallbackReason reason, 91 UErrorCode * err) 92 { 93 if (reason <= UCNV_IRREGULAR) 94 { 95 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 96 { 97 *err = U_ZERO_ERROR; 98 } 99 /* else the caller must have set the error code accordingly. */ 100 } 101 /* else ignore the reset, close and clone calls. */ 102 } 103 104 U_CAPI void U_EXPORT2 105 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( 106 const void *context, 107 UConverterFromUnicodeArgs *fromArgs, 108 const UChar* codeUnits, 109 int32_t length, 110 UChar32 codePoint, 111 UConverterCallbackReason reason, 112 UErrorCode * err) 113 { 114 if (reason <= UCNV_IRREGULAR) 115 { 116 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 117 { 118 *err = U_ZERO_ERROR; 119 ucnv_cbFromUWriteSub(fromArgs, 0, err); 120 } 121 /* else the caller must have set the error code accordingly. */ 122 } 123 /* else ignore the reset, close and clone calls. */ 124 } 125 126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, 127 *uses a clean copy (resetted) of the converter, to convert that unicode 128 *escape sequence to the target codepage (if conversion failure happens then 129 *we revert to substituting with subchar) 130 */ 131 U_CAPI void U_EXPORT2 132 UCNV_FROM_U_CALLBACK_ESCAPE ( 133 const void *context, 134 UConverterFromUnicodeArgs *fromArgs, 135 const UChar *codeUnits, 136 int32_t length, 137 UChar32 codePoint, 138 UConverterCallbackReason reason, 139 UErrorCode * err) 140 { 141 142 UChar valueString[VALUE_STRING_LENGTH]; 143 int32_t valueStringLength = 0; 144 int32_t i = 0; 145 146 const UChar *myValueSource = NULL; 147 UErrorCode err2 = U_ZERO_ERROR; 148 UConverterFromUCallback original = NULL; 149 const void *originalContext; 150 151 UConverterFromUCallback ignoredCallback = NULL; 152 const void *ignoredContext; 153 154 if (reason > UCNV_IRREGULAR) 155 { 156 return; 157 } 158 159 ucnv_setFromUCallBack (fromArgs->converter, 160 (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, 161 NULL, 162 &original, 163 &originalContext, 164 &err2); 165 166 if (U_FAILURE (err2)) 167 { 168 *err = err2; 169 return; 170 } 171 if(context==NULL) 172 { 173 while (i < length) 174 { 175 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 176 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 177 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); 178 } 179 } 180 else 181 { 182 switch(*((char*)context)) 183 { 184 case UCNV_PRV_ESCAPE_JAVA: 185 while (i < length) 186 { 187 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 188 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ 189 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); 190 } 191 break; 192 193 case UCNV_PRV_ESCAPE_C: 194 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 195 196 if(length==2){ 197 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 198 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); 199 200 } 201 else{ 202 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ 203 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); 204 } 205 break; 206 207 case UCNV_PRV_ESCAPE_XML_DEC: 208 209 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 210 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 211 if(length==2){ 212 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); 213 } 214 else{ 215 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); 216 } 217 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 218 break; 219 220 case UCNV_PRV_ESCAPE_XML_HEX: 221 222 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 223 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 224 valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ 225 if(length==2){ 226 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); 227 } 228 else{ 229 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); 230 } 231 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 232 break; 233 234 case UCNV_PRV_ESCAPE_UNICODE: 235 valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ 236 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 237 valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ 238 if (length == 2) { 239 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); 240 } else { 241 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); 242 } 243 valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ 244 break; 245 246 case UCNV_PRV_ESCAPE_CSS2: 247 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 248 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); 249 /* Always add space character, becase the next character might be whitespace, 250 which would erroneously be considered the termination of the escape sequence. */ 251 valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; 252 break; 253 254 default: 255 while (i < length) 256 { 257 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 258 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 259 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); 260 } 261 } 262 } 263 myValueSource = valueString; 264 265 /* reset the error */ 266 *err = U_ZERO_ERROR; 267 268 ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); 269 270 ucnv_setFromUCallBack (fromArgs->converter, 271 original, 272 originalContext, 273 &ignoredCallback, 274 &ignoredContext, 275 &err2); 276 if (U_FAILURE (err2)) 277 { 278 *err = err2; 279 return; 280 } 281 282 return; 283 } 284 285 286 287 U_CAPI void U_EXPORT2 288 UCNV_TO_U_CALLBACK_SKIP ( 289 const void *context, 290 UConverterToUnicodeArgs *toArgs, 291 const char* codeUnits, 292 int32_t length, 293 UConverterCallbackReason reason, 294 UErrorCode * err) 295 { 296 if (reason <= UCNV_IRREGULAR) 297 { 298 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 299 { 300 *err = U_ZERO_ERROR; 301 } 302 /* else the caller must have set the error code accordingly. */ 303 } 304 /* else ignore the reset, close and clone calls. */ 305 } 306 307 U_CAPI void U_EXPORT2 308 UCNV_TO_U_CALLBACK_SUBSTITUTE ( 309 const void *context, 310 UConverterToUnicodeArgs *toArgs, 311 const char* codeUnits, 312 int32_t length, 313 UConverterCallbackReason reason, 314 UErrorCode * err) 315 { 316 if (reason <= UCNV_IRREGULAR) 317 { 318 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 319 { 320 *err = U_ZERO_ERROR; 321 ucnv_cbToUWriteSub(toArgs,0,err); 322 } 323 /* else the caller must have set the error code accordingly. */ 324 } 325 /* else ignore the reset, close and clone calls. */ 326 } 327 328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, 329 *and uses that as the substitution sequence 330 */ 331 U_CAPI void U_EXPORT2 332 UCNV_TO_U_CALLBACK_ESCAPE ( 333 const void *context, 334 UConverterToUnicodeArgs *toArgs, 335 const char* codeUnits, 336 int32_t length, 337 UConverterCallbackReason reason, 338 UErrorCode * err) 339 { 340 UChar uniValueString[VALUE_STRING_LENGTH]; 341 int32_t valueStringLength = 0; 342 int32_t i = 0; 343 344 if (reason > UCNV_IRREGULAR) 345 { 346 return; 347 } 348 349 if(context==NULL) 350 { 351 while (i < length) 352 { 353 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 354 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ 355 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); 356 } 357 } 358 else 359 { 360 switch(*((char*)context)) 361 { 362 case UCNV_PRV_ESCAPE_XML_DEC: 363 while (i < length) 364 { 365 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 366 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 367 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); 368 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 369 } 370 break; 371 372 case UCNV_PRV_ESCAPE_XML_HEX: 373 while (i < length) 374 { 375 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 376 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 377 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ 378 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); 379 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 380 } 381 break; 382 case UCNV_PRV_ESCAPE_C: 383 while (i < length) 384 { 385 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 386 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ 387 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); 388 } 389 break; 390 default: 391 while (i < length) 392 { 393 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 394 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ 395 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); 396 valueStringLength += 2; 397 } 398 } 399 } 400 /* reset the error */ 401 *err = U_ZERO_ERROR; 402 403 ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); 404 } 405 406 #endif 407