1 /* 2 ********************************************************************** 3 * Copyright (C) 1999-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * ucnv_cnv.h: 8 * Definitions for converter implementations. 9 * 10 * Modification History: 11 * 12 * Date Name Description 13 * 05/09/00 helena Added implementation to handle fallback mappings. 14 * 06/29/2000 helena Major rewrite of the callback APIs. 15 */ 16 17 #ifndef UCNV_CNV_H 18 #define UCNV_CNV_H 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_CONVERSION 23 24 #include "unicode/ucnv.h" 25 #include "unicode/ucnv_err.h" 26 #include "unicode/uset.h" 27 #include "uset_imp.h" 28 29 U_CDECL_BEGIN 30 31 /* this is used in fromUnicode DBCS tables as an "unassigned" marker */ 32 #define missingCharMarker 0xFFFF 33 34 /* 35 * #define missingUCharMarker 0xfffe 36 * 37 * commented out because there are actually two values used in toUnicode tables: 38 * U+fffe "unassigned" 39 * U+ffff "illegal" 40 */ 41 42 /** Forward declaration, see ucnv_bld.h */ 43 struct UConverterSharedData; 44 typedef struct UConverterSharedData UConverterSharedData; 45 46 /* function types for UConverterImpl ---------------------------------------- */ 47 48 /* struct with arguments for UConverterLoad and ucnv_load() */ 49 typedef struct { 50 int32_t size; /* sizeof(UConverterLoadArgs) */ 51 int32_t nestedLoads; /* count nested ucnv_load() calls */ 52 UBool onlyTestIsLoadable; /* input: don't actually load */ 53 UBool reserved0; /* reserved - for good alignment of the pointers */ 54 int16_t reserved; /* reserved - for good alignment of the pointers */ 55 uint32_t options; 56 const char *pkg, *name, *locale; 57 } UConverterLoadArgs; 58 59 #define UCNV_LOAD_ARGS_INITIALIZER \ 60 { (int32_t)sizeof(UConverterLoadArgs), 0, FALSE, FALSE, 0, 0, NULL, NULL, NULL } 61 62 typedef void (*UConverterLoad) (UConverterSharedData *sharedData, 63 UConverterLoadArgs *pArgs, 64 const uint8_t *raw, UErrorCode *pErrorCode); 65 typedef void (*UConverterUnload) (UConverterSharedData *sharedData); 66 67 typedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *pErrorCode); 68 typedef void (*UConverterClose) (UConverter *cnv); 69 70 typedef enum UConverterResetChoice { 71 UCNV_RESET_BOTH, 72 UCNV_RESET_TO_UNICODE, 73 UCNV_RESET_FROM_UNICODE 74 } UConverterResetChoice; 75 76 typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice); 77 78 /* 79 * Converter implementation function(s) for ucnv_toUnicode(). 80 * If the toUnicodeWithOffsets function pointer is NULL, 81 * then the toUnicode function will be used and the offsets will be set to -1. 82 * 83 * Must maintain state across buffers. Use toUBytes[toULength] for partial input 84 * sequences; it will be checked in ucnv.c at the end of the input stream 85 * to detect truncated input. 86 * Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND. 87 * 88 * The toUnicodeWithOffsets must write exactly as many offset values as target 89 * units. Write offset values of -1 for when the source index corresponding to 90 * the output unit is not known (e.g., the character started in an earlier buffer). 91 * The pArgs->offsets pointer need not be moved forward. 92 * 93 * At function return, either one of the following conditions must be true: 94 * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit 95 * - another error code with toUBytes[toULength] set to the offending input 96 * - no error, and the source is consumed: source==sourceLimit 97 * 98 * The ucnv.c code will handle the end of the input (reset) 99 * (reset, and truncation detection) and callbacks. 100 */ 101 typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *); 102 103 /* 104 * Same rules as for UConverterToUnicode. 105 * A lead surrogate is kept in fromUChar32 across buffers, and if an error 106 * occurs, then the offending input code point must be put into fromUChar32 107 * as well. 108 */ 109 typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *); 110 111 /* 112 * Converter implementation function for ucnv_convertEx(), for direct conversion 113 * between two charsets without pivoting through UTF-16. 114 * The rules are the same as for UConverterToUnicode and UConverterFromUnicode. 115 * In addition, 116 * - The toUnicode side must behave and keep state exactly like the 117 * UConverterToUnicode implementation for the same source charset. 118 * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back 119 * to pivoting. When this function is called, the conversion framework makes 120 * sure that this warning is not set on input. 121 * - Continuing a partial match and flushing the toUnicode replay buffer 122 * are handled by pivoting, using the toUnicode and fromUnicode functions. 123 */ 124 typedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs, 125 UConverterToUnicodeArgs *pToUArgs, 126 UErrorCode *pErrorCode); 127 128 /* 129 * Converter implementation function for ucnv_getNextUChar(). 130 * If the function pointer is NULL, then the toUnicode function will be used. 131 * 132 * Will be called at a character boundary (toULength==0). 133 * May return with 134 * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input 135 * (the return value will be ignored) 136 * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!) 137 * with toUBytes[toULength] set to the offending input 138 * (the return value will be ignored) 139 * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer, 140 * to indicate that the ucnv.c code shall call the toUnicode function instead 141 * - return a real code point result 142 * 143 * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed. 144 * 145 * The ucnv.c code will handle the end of the input (reset) 146 * (except for truncation detection!) and callbacks. 147 */ 148 typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *); 149 150 typedef void (*UConverterGetStarters)(const UConverter* converter, 151 UBool starters[256], 152 UErrorCode *pErrorCode); 153 154 /* If this function pointer is null or if the function returns null 155 * the name field in static data struct should be returned by 156 * ucnv_getName() API function 157 */ 158 typedef const char * (*UConverterGetName) (const UConverter *cnv); 159 160 /** 161 * Write the codepage substitution character. 162 * If this function is not set, then ucnv_cbFromUWriteSub() writes 163 * the substitution character from UConverter. 164 * For stateful converters, it is typically necessary to handle this 165 * specificially for the converter in order to properly maintain the state. 166 */ 167 typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode); 168 169 /** 170 * For converter-specific safeClone processing 171 * If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes 172 * after the converter is done opening. 173 * If this function is set, then it is called just after a memcpy() of 174 * converter data to the new, empty converter, and is expected to set up 175 * the initial state of the converter. It is not expected to increment the 176 * reference counts of the standard data types such as the shared data. 177 */ 178 typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, 179 void *stackBuffer, 180 int32_t *pBufferSize, 181 UErrorCode *status); 182 183 /** 184 * Filters for some ucnv_getUnicodeSet() implementation code. 185 */ 186 typedef enum UConverterSetFilter { 187 UCNV_SET_FILTER_NONE, 188 UCNV_SET_FILTER_DBCS_ONLY, 189 UCNV_SET_FILTER_2022_CN, 190 UCNV_SET_FILTER_SJIS, 191 UCNV_SET_FILTER_GR94DBCS, 192 UCNV_SET_FILTER_HZ, 193 UCNV_SET_FILTER_COUNT 194 } UConverterSetFilter; 195 196 /** 197 * Fills the set of Unicode code points that can be converted by an ICU converter. 198 * The API function ucnv_getUnicodeSet() clears the USet before calling 199 * the converter's getUnicodeSet() implementation; the converter should only 200 * add the appropriate code points to allow recursive use. 201 * For example, the ISO-2022-JP converter will call each subconverter's 202 * getUnicodeSet() implementation to consecutively add code points to 203 * the same USet, which will result in a union of the sets of all subconverters. 204 * 205 * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. 206 */ 207 typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, 208 const USetAdder *sa, 209 UConverterUnicodeSet which, 210 UErrorCode *pErrorCode); 211 212 UBool CONVERSION_U_SUCCESS (UErrorCode err); 213 214 /** 215 * UConverterImpl contains all the data and functions for a converter type. 216 * Its function pointers work much like a C++ vtable. 217 * Many converter types need to define only a subset of the functions; 218 * when a function pointer is NULL, then a default action will be performed. 219 * 220 * Every converter type must implement toUnicode, fromUnicode, and getNextUChar, 221 * otherwise the converter may crash. 222 * Every converter type that has variable-length codepage sequences should 223 * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for 224 * correct offset handling. 225 * All other functions may or may not be implemented - it depends only on 226 * whether the converter type needs them. 227 * 228 * When open() fails, then close() will be called, if present. 229 */ 230 struct UConverterImpl { 231 UConverterType type; 232 233 UConverterLoad load; 234 UConverterUnload unload; 235 236 UConverterOpen open; 237 UConverterClose close; 238 UConverterReset reset; 239 240 UConverterToUnicode toUnicode; 241 UConverterToUnicode toUnicodeWithOffsets; 242 UConverterFromUnicode fromUnicode; 243 UConverterFromUnicode fromUnicodeWithOffsets; 244 UConverterGetNextUChar getNextUChar; 245 246 UConverterGetStarters getStarters; 247 UConverterGetName getName; 248 UConverterWriteSub writeSub; 249 UConverterSafeClone safeClone; 250 UConverterGetUnicodeSet getUnicodeSet; 251 252 UConverterConvert toUTF8; 253 UConverterConvert fromUTF8; 254 }; 255 256 extern const UConverterSharedData 257 _MBCSData, _Latin1Data, 258 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, 259 _ISO2022Data, 260 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, 261 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, 262 _HZData,_ISCIIData, _SCSUData, _ASCIIData, 263 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; 264 265 U_CDECL_END 266 267 /** Always use fallbacks from codepage to Unicode */ 268 #define TO_U_USE_FALLBACK(useFallback) TRUE 269 #define UCNV_TO_U_USE_FALLBACK(cnv) TRUE 270 271 /** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */ 272 #define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000) 273 #define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c)) 274 #define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c) 275 276 /** 277 * Magic number for ucnv_getNextUChar(), returned by a 278 * getNextUChar() implementation to indicate to use the converter's toUnicode() 279 * instead of the native function. 280 * @internal 281 */ 282 #define UCNV_GET_NEXT_UCHAR_USE_TO_U -9 283 284 U_CFUNC void 285 ucnv_getCompleteUnicodeSet(const UConverter *cnv, 286 const USetAdder *sa, 287 UConverterUnicodeSet which, 288 UErrorCode *pErrorCode); 289 290 U_CFUNC void 291 ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, 292 const USetAdder *sa, 293 UConverterUnicodeSet which, 294 UErrorCode *pErrorCode); 295 296 U_CFUNC void 297 ucnv_fromUWriteBytes(UConverter *cnv, 298 const char *bytes, int32_t length, 299 char **target, const char *targetLimit, 300 int32_t **offsets, 301 int32_t sourceIndex, 302 UErrorCode *pErrorCode); 303 U_CFUNC void 304 ucnv_toUWriteUChars(UConverter *cnv, 305 const UChar *uchars, int32_t length, 306 UChar **target, const UChar *targetLimit, 307 int32_t **offsets, 308 int32_t sourceIndex, 309 UErrorCode *pErrorCode); 310 311 U_CFUNC void 312 ucnv_toUWriteCodePoint(UConverter *cnv, 313 UChar32 c, 314 UChar **target, const UChar *targetLimit, 315 int32_t **offsets, 316 int32_t sourceIndex, 317 UErrorCode *pErrorCode); 318 319 #endif 320 321 #endif /* UCNV_CNV */ 322