1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2008-2011, International Business Machines 5 * Corporation, Google and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 */ 9 /* 10 * Author : eldawy (at) google.com (Mohamed Eldawy) 11 * ucnvsel.h 12 * 13 * Purpose: To generate a list of encodings capable of handling 14 * a given Unicode text 15 * 16 * Started 09-April-2008 17 */ 18 19 #ifndef __ICU_UCNV_SEL_H__ 20 #define __ICU_UCNV_SEL_H__ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/uset.h" 27 #include "unicode/utf16.h" 28 #include "unicode/uenum.h" 29 #include "unicode/ucnv.h" 30 #include "unicode/localpointer.h" 31 32 /** 33 * \file 34 * 35 * A converter selector is built with a set of encoding/charset names 36 * and given an input string returns the set of names of the 37 * corresponding converters which can convert the string. 38 * 39 * A converter selector can be serialized into a buffer and reopened 40 * from the serialized form. 41 */ 42 43 /** 44 * @{ 45 * The selector data structure 46 */ 47 struct UConverterSelector; 48 typedef struct UConverterSelector UConverterSelector; 49 /** @} */ 50 51 /** 52 * Open a selector. 53 * If converterListSize is 0, build for all available converters. 54 * If excludedCodePoints is NULL, don't exclude any code points. 55 * 56 * @param converterList a pointer to encoding names needed to be involved. 57 * Can be NULL if converterListSize==0. 58 * The list and the names will be cloned, and the caller 59 * retains ownership of the original. 60 * @param converterListSize number of encodings in above list. 61 * If 0, builds a selector for all available converters. 62 * @param excludedCodePoints a set of code points to be excluded from consideration. 63 * That is, excluded code points in a string do not change 64 * the selection result. (They might be handled by a callback.) 65 * Use NULL to exclude nothing. 66 * @param whichSet what converter set to use? Use this to determine whether 67 * to consider only roundtrip mappings or also fallbacks. 68 * @param status an in/out ICU UErrorCode 69 * @return the new selector 70 * 71 * @stable ICU 4.2 72 */ 73 U_STABLE UConverterSelector* U_EXPORT2 74 ucnvsel_open(const char* const* converterList, int32_t converterListSize, 75 const USet* excludedCodePoints, 76 const UConverterUnicodeSet whichSet, UErrorCode* status); 77 78 /** 79 * Closes a selector. 80 * If any Enumerations were returned by ucnv_select*, they become invalid. 81 * They can be closed before or after calling ucnv_closeSelector, 82 * but should never be used after the selector is closed. 83 * 84 * @see ucnv_selectForString 85 * @see ucnv_selectForUTF8 86 * 87 * @param sel selector to close 88 * 89 * @stable ICU 4.2 90 */ 91 U_STABLE void U_EXPORT2 92 ucnvsel_close(UConverterSelector *sel); 93 94 #if U_SHOW_CPLUSPLUS_API 95 96 U_NAMESPACE_BEGIN 97 98 /** 99 * \class LocalUConverterSelectorPointer 100 * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close(). 101 * For most methods see the LocalPointerBase base class. 102 * 103 * @see LocalPointerBase 104 * @see LocalPointer 105 * @stable ICU 4.4 106 */ 107 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close); 108 109 U_NAMESPACE_END 110 111 #endif 112 113 /** 114 * Open a selector from its serialized form. 115 * The buffer must remain valid and unchanged for the lifetime of the selector. 116 * This is much faster than creating a selector from scratch. 117 * Using a serialized form from a different machine (endianness/charset) is supported. 118 * 119 * @param buffer pointer to the serialized form of a converter selector; 120 * must be 32-bit-aligned 121 * @param length the capacity of this buffer (can be equal to or larger than 122 * the actual data length) 123 * @param status an in/out ICU UErrorCode 124 * @return the new selector 125 * 126 * @stable ICU 4.2 127 */ 128 U_STABLE UConverterSelector* U_EXPORT2 129 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status); 130 131 /** 132 * Serialize a selector into a linear buffer. 133 * The serialized form is portable to different machines. 134 * 135 * @param sel selector to consider 136 * @param buffer pointer to 32-bit-aligned memory to be filled with the 137 * serialized form of this converter selector 138 * @param bufferCapacity the capacity of this buffer 139 * @param status an in/out ICU UErrorCode 140 * @return the required buffer capacity to hold serialize data (even if the call fails 141 * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity) 142 * 143 * @stable ICU 4.2 144 */ 145 U_STABLE int32_t U_EXPORT2 146 ucnvsel_serialize(const UConverterSelector* sel, 147 void* buffer, int32_t bufferCapacity, UErrorCode* status); 148 149 /** 150 * Select converters that can map all characters in a UTF-16 string, 151 * ignoring the excluded code points. 152 * 153 * @param sel a selector 154 * @param s UTF-16 string 155 * @param length length of the string, or -1 if NUL-terminated 156 * @param status an in/out ICU UErrorCode 157 * @return an enumeration containing encoding names. 158 * The returned encoding names and their order will be the same as 159 * supplied when building the selector. 160 * 161 * @stable ICU 4.2 162 */ 163 U_STABLE UEnumeration * U_EXPORT2 164 ucnvsel_selectForString(const UConverterSelector* sel, 165 const UChar *s, int32_t length, UErrorCode *status); 166 167 /** 168 * Select converters that can map all characters in a UTF-8 string, 169 * ignoring the excluded code points. 170 * 171 * @param sel a selector 172 * @param s UTF-8 string 173 * @param length length of the string, or -1 if NUL-terminated 174 * @param status an in/out ICU UErrorCode 175 * @return an enumeration containing encoding names. 176 * The returned encoding names and their order will be the same as 177 * supplied when building the selector. 178 * 179 * @stable ICU 4.2 180 */ 181 U_STABLE UEnumeration * U_EXPORT2 182 ucnvsel_selectForUTF8(const UConverterSelector* sel, 183 const char *s, int32_t length, UErrorCode *status); 184 185 #endif /* !UCONFIG_NO_CONVERSION */ 186 187 #endif /* __ICU_UCNV_SEL_H__ */ 188