1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2008, International Business Machines 5 * Corporation, Google and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 */ 9 /* 10 * Author : eldawy (at) google.com (Mohamed Eldawy) 11 * ucnvsel.h 12 * 13 * Purpose: To generate a list of encodings capable of handling 14 * a given Unicode text 15 * 16 * Started 09-April-2008 17 */ 18 19 #ifndef __ICU_UCNV_SEL_H__ 20 #define __ICU_UCNV_SEL_H__ 21 22 #include "unicode/uset.h" 23 #include "unicode/utypes.h" 24 #include "unicode/utf16.h" 25 #include "unicode/uenum.h" 26 #include "unicode/ucnv.h" 27 28 /** 29 * \file 30 * 31 * A converter selector is built with a set of encoding/charset names 32 * and given an input string returns the set of names of the 33 * corresponding converters which can convert the string. 34 * 35 * A converter selector can be serialized into a buffer and reopened 36 * from the serialized form. 37 */ 38 39 /** 40 * @{ 41 * The selector data structure 42 */ 43 struct UConverterSelector; 44 typedef struct UConverterSelector UConverterSelector; 45 /** @} */ 46 47 /** 48 * Open a selector. 49 * If converterListSize is 0, build for all available converters. 50 * If excludedCodePoints is NULL, don't exclude any code points. 51 * 52 * @param converterList a pointer to encoding names needed to be involved. 53 * Can be NULL if converterListSize==0. 54 * The list and the names will be cloned, and the caller 55 * retains ownership of the original. 56 * @param converterListSize number of encodings in above list. 57 * If 0, builds a selector for all available converters. 58 * @param excludedCodePoints a set of code points to be excluded from consideration. 59 * That is, excluded code points in a string do not change 60 * the selection result. (They might be handled by a callback.) 61 * Use NULL to exclude nothing. 62 * @param whichSet what converter set to use? Use this to determine whether 63 * to consider only roundtrip mappings or also fallbacks. 64 * @param status an in/out ICU UErrorCode 65 * @return the new selector 66 * 67 * @draft ICU 4.2 68 */ 69 U_CAPI UConverterSelector* U_EXPORT2 70 ucnvsel_open(const char* const* converterList, int32_t converterListSize, 71 const USet* excludedCodePoints, 72 const UConverterUnicodeSet whichSet, UErrorCode* status); 73 74 /** 75 * Closes a selector. 76 * If any Enumerations were returned by ucnv_select*, they become invalid. 77 * They can be closed before or after calling ucnv_closeSelector, 78 * but should never be used after the selector is closed. 79 * 80 * @see ucnv_selectForString 81 * @see ucnv_selectForUTF8 82 * 83 * @param sel selector to close 84 * 85 * @draft ICU 4.2 86 */ 87 U_CAPI void U_EXPORT2 88 ucnvsel_close(UConverterSelector *sel); 89 90 /** 91 * Open a selector from its serialized form. 92 * The buffer must remain valid and unchanged for the lifetime of the selector. 93 * This is much faster than creating a selector from scratch. 94 * Using a serialized form from a different machine (endianness/charset) is supported. 95 * 96 * @param buffer pointer to the serialized form of a converter selector; 97 * must be 32-bit-aligned 98 * @param length the capacity of this buffer (can be equal to or larger than 99 * the actual data length) 100 * @param status an in/out ICU UErrorCode 101 * @return the new selector 102 * 103 * @draft ICU 4.2 104 */ 105 U_CAPI UConverterSelector* U_EXPORT2 106 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status); 107 108 /** 109 * Serialize a selector into a linear buffer. 110 * The serialized form is portable to different machines. 111 * 112 * @param sel selector to consider 113 * @param buffer pointer to 32-bit-aligned memory to be filled with the 114 * serialized form of this converter selector 115 * @param bufferCapacity the capacity of this buffer 116 * @param status an in/out ICU UErrorCode 117 * @return the required buffer capacity to hold serialize data (even if the call fails 118 * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity) 119 * 120 * @draft ICU 4.2 121 */ 122 U_CAPI int32_t U_EXPORT2 123 ucnvsel_serialize(const UConverterSelector* sel, 124 void* buffer, int32_t bufferCapacity, UErrorCode* status); 125 126 /** 127 * Select converters that can map all characters in a UTF-16 string, 128 * ignoring the excluded code points. 129 * 130 * @param sel a selector 131 * @param s UTF-16 string 132 * @param length length of the string, or -1 if NUL-terminated 133 * @param status an in/out ICU UErrorCode 134 * @return an enumeration containing encoding names. 135 * The returned encoding names and their order will be the same as 136 * supplied when building the selector. 137 * 138 * @draft ICU 4.2 139 */ 140 U_CAPI UEnumeration * U_EXPORT2 141 ucnvsel_selectForString(const UConverterSelector* sel, 142 const UChar *s, int32_t length, UErrorCode *status); 143 144 /** 145 * Select converters that can map all characters in a UTF-8 string, 146 * ignoring the excluded code points. 147 * 148 * @param sel a selector 149 * @param s UTF-8 string 150 * @param length length of the string, or -1 if NUL-terminated 151 * @param status an in/out ICU UErrorCode 152 * @return an enumeration containing encoding names. 153 * The returned encoding names and their order will be the same as 154 * supplied when building the selector. 155 * 156 * @draft ICU 4.2 157 */ 158 U_CAPI UEnumeration * U_EXPORT2 159 ucnvsel_selectForUTF8(const UConverterSelector* sel, 160 const char *s, int32_t length, UErrorCode *status); 161 162 #endif /* __ICU_UCNV_SEL_H__ */ 163