Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2008-2011, International Business Machines
      5 *   Corporation, Google and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 */
      9 /*
     10  * Author : eldawy (at) google.com (Mohamed Eldawy)
     11  * ucnvsel.h
     12  *
     13  * Purpose: To generate a list of encodings capable of handling
     14  * a given Unicode text
     15  *
     16  * Started 09-April-2008
     17  */
     18 
     19 #ifndef __ICU_UCNV_SEL_H__
     20 #define __ICU_UCNV_SEL_H__
     21 
     22 #include "unicode/utypes.h"
     23 
     24 #if !UCONFIG_NO_CONVERSION
     25 
     26 #include "unicode/uset.h"
     27 #include "unicode/utf16.h"
     28 #include "unicode/uenum.h"
     29 #include "unicode/ucnv.h"
     30 #include "unicode/localpointer.h"
     31 
     32 /**
     33  * \file
     34  *
     35  * A converter selector is built with a set of encoding/charset names
     36  * and given an input string returns the set of names of the
     37  * corresponding converters which can convert the string.
     38  *
     39  * A converter selector can be serialized into a buffer and reopened
     40  * from the serialized form.
     41  */
     42 
     43 /**
     44  * @{
     45  * The selector data structure
     46  */
     47 struct UConverterSelector;
     48 typedef struct UConverterSelector UConverterSelector;
     49 /** @} */
     50 
     51 /**
     52  * Open a selector.
     53  * If converterListSize is 0, build for all available converters.
     54  * If excludedCodePoints is NULL, don't exclude any code points.
     55  *
     56  * @param converterList a pointer to encoding names needed to be involved.
     57  *                      Can be NULL if converterListSize==0.
     58  *                      The list and the names will be cloned, and the caller
     59  *                      retains ownership of the original.
     60  * @param converterListSize number of encodings in above list.
     61  *                          If 0, builds a selector for all available converters.
     62  * @param excludedCodePoints a set of code points to be excluded from consideration.
     63  *                           That is, excluded code points in a string do not change
     64  *                           the selection result. (They might be handled by a callback.)
     65  *                           Use NULL to exclude nothing.
     66  * @param whichSet what converter set to use? Use this to determine whether
     67  *                 to consider only roundtrip mappings or also fallbacks.
     68  * @param status an in/out ICU UErrorCode
     69  * @return the new selector
     70  *
     71  * @stable ICU 4.2
     72  */
     73 U_STABLE UConverterSelector* U_EXPORT2
     74 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
     75              const USet* excludedCodePoints,
     76              const UConverterUnicodeSet whichSet, UErrorCode* status);
     77 
     78 /**
     79  * Closes a selector.
     80  * If any Enumerations were returned by ucnv_select*, they become invalid.
     81  * They can be closed before or after calling ucnv_closeSelector,
     82  * but should never be used after the selector is closed.
     83  *
     84  * @see ucnv_selectForString
     85  * @see ucnv_selectForUTF8
     86  *
     87  * @param sel selector to close
     88  *
     89  * @stable ICU 4.2
     90  */
     91 U_STABLE void U_EXPORT2
     92 ucnvsel_close(UConverterSelector *sel);
     93 
     94 #if U_SHOW_CPLUSPLUS_API
     95 
     96 U_NAMESPACE_BEGIN
     97 
     98 /**
     99  * \class LocalUConverterSelectorPointer
    100  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
    101  * For most methods see the LocalPointerBase base class.
    102  *
    103  * @see LocalPointerBase
    104  * @see LocalPointer
    105  * @stable ICU 4.4
    106  */
    107 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
    108 
    109 U_NAMESPACE_END
    110 
    111 #endif
    112 
    113 /**
    114  * Open a selector from its serialized form.
    115  * The buffer must remain valid and unchanged for the lifetime of the selector.
    116  * This is much faster than creating a selector from scratch.
    117  * Using a serialized form from a different machine (endianness/charset) is supported.
    118  *
    119  * @param buffer pointer to the serialized form of a converter selector;
    120  *               must be 32-bit-aligned
    121  * @param length the capacity of this buffer (can be equal to or larger than
    122  *               the actual data length)
    123  * @param status an in/out ICU UErrorCode
    124  * @return the new selector
    125  *
    126  * @stable ICU 4.2
    127  */
    128 U_STABLE UConverterSelector* U_EXPORT2
    129 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
    130 
    131 /**
    132  * Serialize a selector into a linear buffer.
    133  * The serialized form is portable to different machines.
    134  *
    135  * @param sel selector to consider
    136  * @param buffer pointer to 32-bit-aligned memory to be filled with the
    137  *               serialized form of this converter selector
    138  * @param bufferCapacity the capacity of this buffer
    139  * @param status an in/out ICU UErrorCode
    140  * @return the required buffer capacity to hold serialize data (even if the call fails
    141  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
    142  *
    143  * @stable ICU 4.2
    144  */
    145 U_STABLE int32_t U_EXPORT2
    146 ucnvsel_serialize(const UConverterSelector* sel,
    147                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
    148 
    149 /**
    150  * Select converters that can map all characters in a UTF-16 string,
    151  * ignoring the excluded code points.
    152  *
    153  * @param sel a selector
    154  * @param s UTF-16 string
    155  * @param length length of the string, or -1 if NUL-terminated
    156  * @param status an in/out ICU UErrorCode
    157  * @return an enumeration containing encoding names.
    158  *         The returned encoding names and their order will be the same as
    159  *         supplied when building the selector.
    160  *
    161  * @stable ICU 4.2
    162  */
    163 U_STABLE UEnumeration * U_EXPORT2
    164 ucnvsel_selectForString(const UConverterSelector* sel,
    165                         const UChar *s, int32_t length, UErrorCode *status);
    166 
    167 /**
    168  * Select converters that can map all characters in a UTF-8 string,
    169  * ignoring the excluded code points.
    170  *
    171  * @param sel a selector
    172  * @param s UTF-8 string
    173  * @param length length of the string, or -1 if NUL-terminated
    174  * @param status an in/out ICU UErrorCode
    175  * @return an enumeration containing encoding names.
    176  *         The returned encoding names and their order will be the same as
    177  *         supplied when building the selector.
    178  *
    179  * @stable ICU 4.2
    180  */
    181 U_STABLE UEnumeration * U_EXPORT2
    182 ucnvsel_selectForUTF8(const UConverterSelector* sel,
    183                       const char *s, int32_t length, UErrorCode *status);
    184 
    185 #endif  /* !UCONFIG_NO_CONVERSION */
    186 
    187 #endif  /* __ICU_UCNV_SEL_H__ */
    188