Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2008-2011, International Business Machines
      7 *   Corporation, Google and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 */
     11 /*
     12  * Author : eldawy (at) google.com (Mohamed Eldawy)
     13  * ucnvsel.h
     14  *
     15  * Purpose: To generate a list of encodings capable of handling
     16  * a given Unicode text
     17  *
     18  * Started 09-April-2008
     19  */
     20 
     21 #ifndef __ICU_UCNV_SEL_H__
     22 #define __ICU_UCNV_SEL_H__
     23 
     24 #include "unicode/utypes.h"
     25 
     26 #if !UCONFIG_NO_CONVERSION
     27 
     28 #include "unicode/uset.h"
     29 #include "unicode/utf16.h"
     30 #include "unicode/uenum.h"
     31 #include "unicode/ucnv.h"
     32 #include "unicode/localpointer.h"
     33 
     34 /**
     35  * \file
     36  *
     37  * A converter selector is built with a set of encoding/charset names
     38  * and given an input string returns the set of names of the
     39  * corresponding converters which can convert the string.
     40  *
     41  * A converter selector can be serialized into a buffer and reopened
     42  * from the serialized form.
     43  */
     44 
     45 /**
     46  * @{
     47  * The selector data structure
     48  */
     49 struct UConverterSelector;
     50 typedef struct UConverterSelector UConverterSelector;
     51 /** @} */
     52 
     53 /**
     54  * Open a selector.
     55  * If converterListSize is 0, build for all available converters.
     56  * If excludedCodePoints is NULL, don't exclude any code points.
     57  *
     58  * @param converterList a pointer to encoding names needed to be involved.
     59  *                      Can be NULL if converterListSize==0.
     60  *                      The list and the names will be cloned, and the caller
     61  *                      retains ownership of the original.
     62  * @param converterListSize number of encodings in above list.
     63  *                          If 0, builds a selector for all available converters.
     64  * @param excludedCodePoints a set of code points to be excluded from consideration.
     65  *                           That is, excluded code points in a string do not change
     66  *                           the selection result. (They might be handled by a callback.)
     67  *                           Use NULL to exclude nothing.
     68  * @param whichSet what converter set to use? Use this to determine whether
     69  *                 to consider only roundtrip mappings or also fallbacks.
     70  * @param status an in/out ICU UErrorCode
     71  * @return the new selector
     72  *
     73  * @stable ICU 4.2
     74  */
     75 U_STABLE UConverterSelector* U_EXPORT2
     76 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
     77              const USet* excludedCodePoints,
     78              const UConverterUnicodeSet whichSet, UErrorCode* status);
     79 
     80 /**
     81  * Closes a selector.
     82  * If any Enumerations were returned by ucnv_select*, they become invalid.
     83  * They can be closed before or after calling ucnv_closeSelector,
     84  * but should never be used after the selector is closed.
     85  *
     86  * @see ucnv_selectForString
     87  * @see ucnv_selectForUTF8
     88  *
     89  * @param sel selector to close
     90  *
     91  * @stable ICU 4.2
     92  */
     93 U_STABLE void U_EXPORT2
     94 ucnvsel_close(UConverterSelector *sel);
     95 
     96 #if U_SHOW_CPLUSPLUS_API
     97 
     98 U_NAMESPACE_BEGIN
     99 
    100 /**
    101  * \class LocalUConverterSelectorPointer
    102  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
    103  * For most methods see the LocalPointerBase base class.
    104  *
    105  * @see LocalPointerBase
    106  * @see LocalPointer
    107  * @stable ICU 4.4
    108  */
    109 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
    110 
    111 U_NAMESPACE_END
    112 
    113 #endif
    114 
    115 /**
    116  * Open a selector from its serialized form.
    117  * The buffer must remain valid and unchanged for the lifetime of the selector.
    118  * This is much faster than creating a selector from scratch.
    119  * Using a serialized form from a different machine (endianness/charset) is supported.
    120  *
    121  * @param buffer pointer to the serialized form of a converter selector;
    122  *               must be 32-bit-aligned
    123  * @param length the capacity of this buffer (can be equal to or larger than
    124  *               the actual data length)
    125  * @param status an in/out ICU UErrorCode
    126  * @return the new selector
    127  *
    128  * @stable ICU 4.2
    129  */
    130 U_STABLE UConverterSelector* U_EXPORT2
    131 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
    132 
    133 /**
    134  * Serialize a selector into a linear buffer.
    135  * The serialized form is portable to different machines.
    136  *
    137  * @param sel selector to consider
    138  * @param buffer pointer to 32-bit-aligned memory to be filled with the
    139  *               serialized form of this converter selector
    140  * @param bufferCapacity the capacity of this buffer
    141  * @param status an in/out ICU UErrorCode
    142  * @return the required buffer capacity to hold serialize data (even if the call fails
    143  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
    144  *
    145  * @stable ICU 4.2
    146  */
    147 U_STABLE int32_t U_EXPORT2
    148 ucnvsel_serialize(const UConverterSelector* sel,
    149                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
    150 
    151 /**
    152  * Select converters that can map all characters in a UTF-16 string,
    153  * ignoring the excluded code points.
    154  *
    155  * @param sel a selector
    156  * @param s UTF-16 string
    157  * @param length length of the string, or -1 if NUL-terminated
    158  * @param status an in/out ICU UErrorCode
    159  * @return an enumeration containing encoding names.
    160  *         The returned encoding names and their order will be the same as
    161  *         supplied when building the selector.
    162  *
    163  * @stable ICU 4.2
    164  */
    165 U_STABLE UEnumeration * U_EXPORT2
    166 ucnvsel_selectForString(const UConverterSelector* sel,
    167                         const UChar *s, int32_t length, UErrorCode *status);
    168 
    169 /**
    170  * Select converters that can map all characters in a UTF-8 string,
    171  * ignoring the excluded code points.
    172  *
    173  * @param sel a selector
    174  * @param s UTF-8 string
    175  * @param length length of the string, or -1 if NUL-terminated
    176  * @param status an in/out ICU UErrorCode
    177  * @return an enumeration containing encoding names.
    178  *         The returned encoding names and their order will be the same as
    179  *         supplied when building the selector.
    180  *
    181  * @stable ICU 4.2
    182  */
    183 U_STABLE UEnumeration * U_EXPORT2
    184 ucnvsel_selectForUTF8(const UConverterSelector* sel,
    185                       const char *s, int32_t length, UErrorCode *status);
    186 
    187 #endif  /* !UCONFIG_NO_CONVERSION */
    188 
    189 #endif  /* __ICU_UCNV_SEL_H__ */
    190