Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2008-2010, International Business Machines
      5 *   Corporation, Google and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 */
      9 /*
     10  * Author : eldawy (at) google.com (Mohamed Eldawy)
     11  * ucnvsel.h
     12  *
     13  * Purpose: To generate a list of encodings capable of handling
     14  * a given Unicode text
     15  *
     16  * Started 09-April-2008
     17  */
     18 
     19 #ifndef __ICU_UCNV_SEL_H__
     20 #define __ICU_UCNV_SEL_H__
     21 
     22 #include "unicode/uset.h"
     23 #include "unicode/utypes.h"
     24 #include "unicode/utf16.h"
     25 #include "unicode/uenum.h"
     26 #include "unicode/ucnv.h"
     27 #include "unicode/localpointer.h"
     28 
     29 /**
     30  * \file
     31  *
     32  * A converter selector is built with a set of encoding/charset names
     33  * and given an input string returns the set of names of the
     34  * corresponding converters which can convert the string.
     35  *
     36  * A converter selector can be serialized into a buffer and reopened
     37  * from the serialized form.
     38  */
     39 
     40 /**
     41  * @{
     42  * The selector data structure
     43  */
     44 struct UConverterSelector;
     45 typedef struct UConverterSelector UConverterSelector;
     46 /** @} */
     47 
     48 /**
     49  * Open a selector.
     50  * If converterListSize is 0, build for all available converters.
     51  * If excludedCodePoints is NULL, don't exclude any code points.
     52  *
     53  * @param converterList a pointer to encoding names needed to be involved.
     54  *                      Can be NULL if converterListSize==0.
     55  *                      The list and the names will be cloned, and the caller
     56  *                      retains ownership of the original.
     57  * @param converterListSize number of encodings in above list.
     58  *                          If 0, builds a selector for all available converters.
     59  * @param excludedCodePoints a set of code points to be excluded from consideration.
     60  *                           That is, excluded code points in a string do not change
     61  *                           the selection result. (They might be handled by a callback.)
     62  *                           Use NULL to exclude nothing.
     63  * @param whichSet what converter set to use? Use this to determine whether
     64  *                 to consider only roundtrip mappings or also fallbacks.
     65  * @param status an in/out ICU UErrorCode
     66  * @return the new selector
     67  *
     68  * @stable ICU 4.2
     69  */
     70 U_STABLE UConverterSelector* U_EXPORT2
     71 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
     72              const USet* excludedCodePoints,
     73              const UConverterUnicodeSet whichSet, UErrorCode* status);
     74 
     75 /**
     76  * Closes a selector.
     77  * If any Enumerations were returned by ucnv_select*, they become invalid.
     78  * They can be closed before or after calling ucnv_closeSelector,
     79  * but should never be used after the selector is closed.
     80  *
     81  * @see ucnv_selectForString
     82  * @see ucnv_selectForUTF8
     83  *
     84  * @param sel selector to close
     85  *
     86  * @stable ICU 4.2
     87  */
     88 U_STABLE void U_EXPORT2
     89 ucnvsel_close(UConverterSelector *sel);
     90 
     91 #if U_SHOW_CPLUSPLUS_API
     92 
     93 U_NAMESPACE_BEGIN
     94 
     95 /**
     96  * \class LocalUConverterSelectorPointer
     97  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
     98  * For most methods see the LocalPointerBase base class.
     99  *
    100  * @see LocalPointerBase
    101  * @see LocalPointer
    102  * @stable ICU 4.4
    103  */
    104 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
    105 
    106 U_NAMESPACE_END
    107 
    108 #endif
    109 
    110 /**
    111  * Open a selector from its serialized form.
    112  * The buffer must remain valid and unchanged for the lifetime of the selector.
    113  * This is much faster than creating a selector from scratch.
    114  * Using a serialized form from a different machine (endianness/charset) is supported.
    115  *
    116  * @param buffer pointer to the serialized form of a converter selector;
    117  *               must be 32-bit-aligned
    118  * @param length the capacity of this buffer (can be equal to or larger than
    119  *               the actual data length)
    120  * @param status an in/out ICU UErrorCode
    121  * @return the new selector
    122  *
    123  * @stable ICU 4.2
    124  */
    125 U_STABLE UConverterSelector* U_EXPORT2
    126 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
    127 
    128 /**
    129  * Serialize a selector into a linear buffer.
    130  * The serialized form is portable to different machines.
    131  *
    132  * @param sel selector to consider
    133  * @param buffer pointer to 32-bit-aligned memory to be filled with the
    134  *               serialized form of this converter selector
    135  * @param bufferCapacity the capacity of this buffer
    136  * @param status an in/out ICU UErrorCode
    137  * @return the required buffer capacity to hold serialize data (even if the call fails
    138  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
    139  *
    140  * @stable ICU 4.2
    141  */
    142 U_STABLE int32_t U_EXPORT2
    143 ucnvsel_serialize(const UConverterSelector* sel,
    144                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
    145 
    146 /**
    147  * Select converters that can map all characters in a UTF-16 string,
    148  * ignoring the excluded code points.
    149  *
    150  * @param sel a selector
    151  * @param s UTF-16 string
    152  * @param length length of the string, or -1 if NUL-terminated
    153  * @param status an in/out ICU UErrorCode
    154  * @return an enumeration containing encoding names.
    155  *         The returned encoding names and their order will be the same as
    156  *         supplied when building the selector.
    157  *
    158  * @stable ICU 4.2
    159  */
    160 U_STABLE UEnumeration * U_EXPORT2
    161 ucnvsel_selectForString(const UConverterSelector* sel,
    162                         const UChar *s, int32_t length, UErrorCode *status);
    163 
    164 /**
    165  * Select converters that can map all characters in a UTF-8 string,
    166  * ignoring the excluded code points.
    167  *
    168  * @param sel a selector
    169  * @param s UTF-8 string
    170  * @param length length of the string, or -1 if NUL-terminated
    171  * @param status an in/out ICU UErrorCode
    172  * @return an enumeration containing encoding names.
    173  *         The returned encoding names and their order will be the same as
    174  *         supplied when building the selector.
    175  *
    176  * @stable ICU 4.2
    177  */
    178 U_STABLE UEnumeration * U_EXPORT2
    179 ucnvsel_selectForUTF8(const UConverterSelector* sel,
    180                       const char *s, int32_t length, UErrorCode *status);
    181 
    182 #endif  /* __ICU_UCNV_SEL_H__ */
    183