1 /* 2 ********************************************************************** 3 * Copyright (C) 1999-2009, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * 8 * ucnv_err.h: 9 */ 10 11 /** 12 * \file 13 * \brief C UConverter predefined error callbacks 14 * 15 * <h2>Error Behaviour Functions</h2> 16 * Defines some error behaviour functions called by ucnv_{from,to}Unicode 17 * These are provided as part of ICU and many are stable, but they 18 * can also be considered only as an example of what can be done with 19 * callbacks. You may of course write your own. 20 * 21 * If you want to write your own, you may also find the functions from 22 * ucnv_cb.h useful when writing your own callbacks. 23 * 24 * These functions, although public, should NEVER be called directly. 25 * They should be used as parameters to the ucnv_setFromUCallback 26 * and ucnv_setToUCallback functions, to set the behaviour of a converter 27 * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. 28 * 29 * usage example: 'STOP' doesn't need any context, but newContext 30 * could be set to something other than 'NULL' if needed. The available 31 * contexts in this header can modify the default behavior of the callback. 32 * 33 * \code 34 * UErrorCode err = U_ZERO_ERROR; 35 * UConverter *myConverter = ucnv_open("ibm-949", &err); 36 * const void *oldContext; 37 * UConverterFromUCallback oldAction; 38 * 39 * 40 * if (U_SUCCESS(err)) 41 * { 42 * ucnv_setFromUCallBack(myConverter, 43 * UCNV_FROM_U_CALLBACK_STOP, 44 * NULL, 45 * &oldAction, 46 * &oldContext, 47 * &status); 48 * } 49 * \endcode 50 * 51 * The code above tells "myConverter" to stop when it encounters an 52 * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from 53 * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, 54 * and ucnv_setToUCallBack would need to be called in order to change 55 * that behavior too. 56 * 57 * Here is an example with a context: 58 * 59 * \code 60 * UErrorCode err = U_ZERO_ERROR; 61 * UConverter *myConverter = ucnv_open("ibm-949", &err); 62 * const void *oldContext; 63 * UConverterFromUCallback oldAction; 64 * 65 * 66 * if (U_SUCCESS(err)) 67 * { 68 * ucnv_setToUCallBack(myConverter, 69 * UCNV_TO_U_CALLBACK_SUBSTITUTE, 70 * UCNV_SUB_STOP_ON_ILLEGAL, 71 * &oldAction, 72 * &oldContext, 73 * &status); 74 * } 75 * \endcode 76 * 77 * The code above tells "myConverter" to stop when it encounters an 78 * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from 79 * Codepage -> Unicode. Any unmapped and legal characters will be 80 * substituted to be the default substitution character. 81 */ 82 83 #ifndef UCNV_ERR_H 84 #define UCNV_ERR_H 85 86 #include "unicode/utypes.h" 87 88 #if !UCONFIG_NO_CONVERSION 89 90 /** Forward declaring the UConverter structure. @stable ICU 2.0 */ 91 struct UConverter; 92 93 /** @stable ICU 2.0 */ 94 typedef struct UConverter UConverter; 95 96 /** 97 * FROM_U, TO_U context options for sub callback 98 * @stable ICU 2.0 99 */ 100 #define UCNV_SUB_STOP_ON_ILLEGAL "i" 101 102 /** 103 * FROM_U, TO_U context options for skip callback 104 * @stable ICU 2.0 105 */ 106 #define UCNV_SKIP_STOP_ON_ILLEGAL "i" 107 108 /** 109 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 110 * @stable ICU 2.0 111 */ 112 #define UCNV_ESCAPE_ICU NULL 113 /** 114 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) 115 * @stable ICU 2.0 116 */ 117 #define UCNV_ESCAPE_JAVA "J" 118 /** 119 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) 120 * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) 121 * @stable ICU 2.0 122 */ 123 #define UCNV_ESCAPE_C "C" 124 /** 125 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly 126 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly 127 * @stable ICU 2.0 128 */ 129 #define UCNV_ESCAPE_XML_DEC "D" 130 /** 131 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly 132 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly 133 * @stable ICU 2.0 134 */ 135 #define UCNV_ESCAPE_XML_HEX "X" 136 /** 137 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) 138 * @stable ICU 2.0 139 */ 140 #define UCNV_ESCAPE_UNICODE "U" 141 142 /** 143 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is, 144 * a backslash, 1..6 hex digits, and a space) 145 * @stable ICU 4.0 146 */ 147 #define UCNV_ESCAPE_CSS2 "S" 148 149 /** 150 * The process condition code to be used with the callbacks. 151 * Codes which are greater than UCNV_IRREGULAR should be 152 * passed on to any chained callbacks. 153 * @stable ICU 2.0 154 */ 155 typedef enum { 156 UCNV_UNASSIGNED = 0, /**< The code point is unassigned. 157 The error code U_INVALID_CHAR_FOUND will be set. */ 158 UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, 159 \\x81\\x2E is illegal in SJIS because \\x2E 160 is not a valid trail byte for the \\x81 161 lead byte. 162 Also, starting with Unicode 3.0.1, non-shortest byte sequences 163 in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) 164 are also illegal, not just irregular. 165 The error code U_ILLEGAL_CHAR_FOUND will be set. */ 166 UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in 167 the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF 168 are irregular UTF-8 byte sequences for single surrogate 169 code points. 170 The error code U_INVALID_CHAR_FOUND will be set. */ 171 UCNV_RESET = 3, /**< The callback is called with this reason when a 172 'reset' has occured. Callback should reset all 173 state. */ 174 UCNV_CLOSE = 4, /**< Called when the converter is closed. The 175 callback should release any allocated memory.*/ 176 UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the 177 converter. the pointer available as the 178 'context' is an alias to the original converters' 179 context pointer. If the context must be owned 180 by the new converter, the callback must clone 181 the data and call ucnv_setFromUCallback 182 (or setToUCallback) with the correct pointer. 183 @stable ICU 2.2 184 */ 185 } UConverterCallbackReason; 186 187 188 /** 189 * The structure for the fromUnicode callback function parameter. 190 * @stable ICU 2.0 191 */ 192 typedef struct { 193 uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ 194 UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ 195 UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ 196 const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ 197 const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ 198 char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ 199 const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ 200 int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ 201 } UConverterFromUnicodeArgs; 202 203 204 /** 205 * The structure for the toUnicode callback function parameter. 206 * @stable ICU 2.0 207 */ 208 typedef struct { 209 uint16_t size; /**< The size of this struct @stable ICU 2.0 */ 210 UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ 211 UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ 212 const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ 213 const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ 214 UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ 215 const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ 216 int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ 217 } UConverterToUnicodeArgs; 218 219 220 /** 221 * DO NOT CALL THIS FUNCTION DIRECTLY! 222 * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, 223 * returning the error code back to the caller immediately. 224 * 225 * @param context Pointer to the callback's private data 226 * @param fromUArgs Information about the conversion in progress 227 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 228 * @param length Size (in bytes) of the concerned codepage sequence 229 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 230 * @param reason Defines the reason the callback was invoked 231 * @param err This should always be set to a failure status prior to calling. 232 * @stable ICU 2.0 233 */ 234 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( 235 const void *context, 236 UConverterFromUnicodeArgs *fromUArgs, 237 const UChar* codeUnits, 238 int32_t length, 239 UChar32 codePoint, 240 UConverterCallbackReason reason, 241 UErrorCode * err); 242 243 244 245 /** 246 * DO NOT CALL THIS FUNCTION DIRECTLY! 247 * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, 248 * returning the error code back to the caller immediately. 249 * 250 * @param context Pointer to the callback's private data 251 * @param toUArgs Information about the conversion in progress 252 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 253 * @param length Size (in bytes) of the concerned codepage sequence 254 * @param reason Defines the reason the callback was invoked 255 * @param err This should always be set to a failure status prior to calling. 256 * @stable ICU 2.0 257 */ 258 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( 259 const void *context, 260 UConverterToUnicodeArgs *toUArgs, 261 const char* codeUnits, 262 int32_t length, 263 UConverterCallbackReason reason, 264 UErrorCode * err); 265 266 /** 267 * DO NOT CALL THIS FUNCTION DIRECTLY! 268 * This From Unicode callback skips any ILLEGAL_SEQUENCE, or 269 * skips only UNASSINGED_SEQUENCE depending on the context parameter 270 * simply ignoring those characters. 271 * 272 * @param context The function currently recognizes the callback options: 273 * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 274 * returning the error code back to the caller immediately. 275 * NULL: Skips any ILLEGAL_SEQUENCE 276 * @param fromUArgs Information about the conversion in progress 277 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 278 * @param length Size (in bytes) of the concerned codepage sequence 279 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 280 * @param reason Defines the reason the callback was invoked 281 * @param err Return value will be set to success if the callback was handled, 282 * otherwise this value will be set to a failure status. 283 * @stable ICU 2.0 284 */ 285 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( 286 const void *context, 287 UConverterFromUnicodeArgs *fromUArgs, 288 const UChar* codeUnits, 289 int32_t length, 290 UChar32 codePoint, 291 UConverterCallbackReason reason, 292 UErrorCode * err); 293 294 /** 295 * DO NOT CALL THIS FUNCTION DIRECTLY! 296 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 297 * UNASSIGNED_SEQUENCE depending on context parameter, with the 298 * current substitution string for the converter. This is the default 299 * callback. 300 * 301 * @param context The function currently recognizes the callback options: 302 * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 303 * returning the error code back to the caller immediately. 304 * NULL: Substitutes any ILLEGAL_SEQUENCE 305 * @param fromUArgs Information about the conversion in progress 306 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 307 * @param length Size (in bytes) of the concerned codepage sequence 308 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 309 * @param reason Defines the reason the callback was invoked 310 * @param err Return value will be set to success if the callback was handled, 311 * otherwise this value will be set to a failure status. 312 * @see ucnv_setSubstChars 313 * @stable ICU 2.0 314 */ 315 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( 316 const void *context, 317 UConverterFromUnicodeArgs *fromUArgs, 318 const UChar* codeUnits, 319 int32_t length, 320 UChar32 codePoint, 321 UConverterCallbackReason reason, 322 UErrorCode * err); 323 324 /** 325 * DO NOT CALL THIS FUNCTION DIRECTLY! 326 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the 327 * hexadecimal representation of the illegal codepoints 328 * 329 * @param context The function currently recognizes the callback options: 330 * <ul> 331 * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal 332 * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 333 * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 334 * it will substitute the illegal sequence with the substitution characters. 335 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 336 * %UD84D%UDC56</li> 337 * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal 338 * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 339 * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 340 * it will substitute the illegal sequence with the substitution characters. 341 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 342 * \\uD84D\\uDC56</li> 343 * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal 344 * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 345 * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 346 * it will substitute the illegal sequence with the substitution characters. 347 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 348 * \\U00023456</li> 349 * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal 350 * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly. 351 * In the Event the converter doesn't support the characters {&,#}[0-9], 352 * it will substitute the illegal sequence with the substitution characters. 353 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 354 * &#144470; and Zero padding is ignored.</li> 355 * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal 356 * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly. 357 * In the Event the converter doesn't support the characters {&,#,x}[0-9], 358 * it will substitute the illegal sequence with the substitution characters. 359 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 360 * \htmlonly&#x23456;\endhtmlonly</li> 361 * </ul> 362 * @param fromUArgs Information about the conversion in progress 363 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 364 * @param length Size (in bytes) of the concerned codepage sequence 365 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 366 * @param reason Defines the reason the callback was invoked 367 * @param err Return value will be set to success if the callback was handled, 368 * otherwise this value will be set to a failure status. 369 * @stable ICU 2.0 370 */ 371 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( 372 const void *context, 373 UConverterFromUnicodeArgs *fromUArgs, 374 const UChar* codeUnits, 375 int32_t length, 376 UChar32 codePoint, 377 UConverterCallbackReason reason, 378 UErrorCode * err); 379 380 381 /** 382 * DO NOT CALL THIS FUNCTION DIRECTLY! 383 * This To Unicode callback skips any ILLEGAL_SEQUENCE, or 384 * skips only UNASSINGED_SEQUENCE depending on the context parameter 385 * simply ignoring those characters. 386 * 387 * @param context The function currently recognizes the callback options: 388 * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 389 * returning the error code back to the caller immediately. 390 * NULL: Skips any ILLEGAL_SEQUENCE 391 * @param toUArgs Information about the conversion in progress 392 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 393 * @param length Size (in bytes) of the concerned codepage sequence 394 * @param reason Defines the reason the callback was invoked 395 * @param err Return value will be set to success if the callback was handled, 396 * otherwise this value will be set to a failure status. 397 * @stable ICU 2.0 398 */ 399 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( 400 const void *context, 401 UConverterToUnicodeArgs *toUArgs, 402 const char* codeUnits, 403 int32_t length, 404 UConverterCallbackReason reason, 405 UErrorCode * err); 406 407 /** 408 * DO NOT CALL THIS FUNCTION DIRECTLY! 409 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 410 * UNASSIGNED_SEQUENCE depending on context parameter, with the 411 * Unicode substitution character, U+FFFD. 412 * 413 * @param context The function currently recognizes the callback options: 414 * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 415 * returning the error code back to the caller immediately. 416 * NULL: Substitutes any ILLEGAL_SEQUENCE 417 * @param toUArgs Information about the conversion in progress 418 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 419 * @param length Size (in bytes) of the concerned codepage sequence 420 * @param reason Defines the reason the callback was invoked 421 * @param err Return value will be set to success if the callback was handled, 422 * otherwise this value will be set to a failure status. 423 * @stable ICU 2.0 424 */ 425 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( 426 const void *context, 427 UConverterToUnicodeArgs *toUArgs, 428 const char* codeUnits, 429 int32_t length, 430 UConverterCallbackReason reason, 431 UErrorCode * err); 432 433 /** 434 * DO NOT CALL THIS FUNCTION DIRECTLY! 435 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the 436 * hexadecimal representation of the illegal bytes 437 * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). 438 * 439 * @param context This function currently recognizes the callback options: 440 * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, 441 * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. 442 * @param toUArgs Information about the conversion in progress 443 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 444 * @param length Size (in bytes) of the concerned codepage sequence 445 * @param reason Defines the reason the callback was invoked 446 * @param err Return value will be set to success if the callback was handled, 447 * otherwise this value will be set to a failure status. 448 * @stable ICU 2.0 449 */ 450 451 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( 452 const void *context, 453 UConverterToUnicodeArgs *toUArgs, 454 const char* codeUnits, 455 int32_t length, 456 UConverterCallbackReason reason, 457 UErrorCode * err); 458 459 #endif 460 461 #endif 462 463 /*UCNV_ERR_H*/ 464