Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2008, International Business Machines
      5 *   Corporation, Google and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 */
      9 /*
     10  * Author : eldawy (at) google.com (Mohamed Eldawy)
     11  * ucnvsel.h
     12  *
     13  * Purpose: To generate a list of encodings capable of handling
     14  * a given Unicode text
     15  *
     16  * Started 09-April-2008
     17  */
     18 
     19 #ifndef __ICU_UCNV_SEL_H__
     20 #define __ICU_UCNV_SEL_H__
     21 
     22 #include "unicode/uset.h"
     23 #include "unicode/utypes.h"
     24 #include "unicode/utf16.h"
     25 #include "unicode/uenum.h"
     26 #include "unicode/ucnv.h"
     27 
     28 /**
     29  * \file
     30  *
     31  * A converter selector is built with a set of encoding/charset names
     32  * and given an input string returns the set of names of the
     33  * corresponding converters which can convert the string.
     34  *
     35  * A converter selector can be serialized into a buffer and reopened
     36  * from the serialized form.
     37  */
     38 
     39 /**
     40  * @{
     41  * The selector data structure
     42  */
     43 struct UConverterSelector;
     44 typedef struct UConverterSelector UConverterSelector;
     45 /** @} */
     46 
     47 /**
     48  * Open a selector.
     49  * If converterListSize is 0, build for all available converters.
     50  * If excludedCodePoints is NULL, don't exclude any code points.
     51  *
     52  * @param converterList a pointer to encoding names needed to be involved.
     53  *                      Can be NULL if converterListSize==0.
     54  *                      The list and the names will be cloned, and the caller
     55  *                      retains ownership of the original.
     56  * @param converterListSize number of encodings in above list.
     57  *                          If 0, builds a selector for all available converters.
     58  * @param excludedCodePoints a set of code points to be excluded from consideration.
     59  *                           That is, excluded code points in a string do not change
     60  *                           the selection result. (They might be handled by a callback.)
     61  *                           Use NULL to exclude nothing.
     62  * @param whichSet what converter set to use? Use this to determine whether
     63  *                 to consider only roundtrip mappings or also fallbacks.
     64  * @param status an in/out ICU UErrorCode
     65  * @return the new selector
     66  *
     67  * @draft ICU 4.2
     68  */
     69 U_CAPI UConverterSelector* U_EXPORT2
     70 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
     71              const USet* excludedCodePoints,
     72              const UConverterUnicodeSet whichSet, UErrorCode* status);
     73 
     74 /**
     75  * Closes a selector.
     76  * If any Enumerations were returned by ucnv_select*, they become invalid.
     77  * They can be closed before or after calling ucnv_closeSelector,
     78  * but should never be used after the selector is closed.
     79  *
     80  * @see ucnv_selectForString
     81  * @see ucnv_selectForUTF8
     82  *
     83  * @param sel selector to close
     84  *
     85  * @draft ICU 4.2
     86  */
     87 U_CAPI void U_EXPORT2
     88 ucnvsel_close(UConverterSelector *sel);
     89 
     90 /**
     91  * Open a selector from its serialized form.
     92  * The buffer must remain valid and unchanged for the lifetime of the selector.
     93  * This is much faster than creating a selector from scratch.
     94  * Using a serialized form from a different machine (endianness/charset) is supported.
     95  *
     96  * @param buffer pointer to the serialized form of a converter selector;
     97  *               must be 32-bit-aligned
     98  * @param length the capacity of this buffer (can be equal to or larger than
     99  *               the actual data length)
    100  * @param status an in/out ICU UErrorCode
    101  * @return the new selector
    102  *
    103  * @draft ICU 4.2
    104  */
    105 U_CAPI UConverterSelector* U_EXPORT2
    106 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
    107 
    108 /**
    109  * Serialize a selector into a linear buffer.
    110  * The serialized form is portable to different machines.
    111  *
    112  * @param sel selector to consider
    113  * @param buffer pointer to 32-bit-aligned memory to be filled with the
    114  *               serialized form of this converter selector
    115  * @param bufferCapacity the capacity of this buffer
    116  * @param status an in/out ICU UErrorCode
    117  * @return the required buffer capacity to hold serialize data (even if the call fails
    118  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
    119  *
    120  * @draft ICU 4.2
    121  */
    122 U_CAPI int32_t U_EXPORT2
    123 ucnvsel_serialize(const UConverterSelector* sel,
    124                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
    125 
    126 /**
    127  * Select converters that can map all characters in a UTF-16 string,
    128  * ignoring the excluded code points.
    129  *
    130  * @param sel a selector
    131  * @param s UTF-16 string
    132  * @param length length of the string, or -1 if NUL-terminated
    133  * @param status an in/out ICU UErrorCode
    134  * @return an enumeration containing encoding names.
    135  *         The returned encoding names and their order will be the same as
    136  *         supplied when building the selector.
    137  *
    138  * @draft ICU 4.2
    139  */
    140 U_CAPI UEnumeration * U_EXPORT2
    141 ucnvsel_selectForString(const UConverterSelector* sel,
    142                         const UChar *s, int32_t length, UErrorCode *status);
    143 
    144 /**
    145  * Select converters that can map all characters in a UTF-8 string,
    146  * ignoring the excluded code points.
    147  *
    148  * @param sel a selector
    149  * @param s UTF-8 string
    150  * @param length length of the string, or -1 if NUL-terminated
    151  * @param status an in/out ICU UErrorCode
    152  * @return an enumeration containing encoding names.
    153  *         The returned encoding names and their order will be the same as
    154  *         supplied when building the selector.
    155  *
    156  * @draft ICU 4.2
    157  */
    158 U_CAPI UEnumeration * U_EXPORT2
    159 ucnvsel_selectForUTF8(const UConverterSelector* sel,
    160                       const char *s, int32_t length, UErrorCode *status);
    161 
    162 #endif  /* __ICU_UCNV_SEL_H__ */
    163