Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2002-2004, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  uset.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2002mar07
     14 *   created by: Markus W. Scherer
     15 *
     16 *   C version of UnicodeSet.
     17 */
     18 
     19 
     20 /**
     21  * \file
     22  * \brief C API: Unicode Set
     23  *
     24  * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
     25  */
     26 
     27 #ifndef __USET_H__
     28 #define __USET_H__
     29 
     30 #include "unicode/utypes.h"
     31 #include "unicode/uchar.h"
     32 
     33 #ifndef UCNV_H
     34 struct USet;
     35 /**
     36  * A UnicodeSet.  Use the uset_* API to manipulate.  Create with
     37  * uset_open*, and destroy with uset_close.
     38  * @stable ICU 2.4
     39  */
     40 typedef struct USet USet;
     41 #endif
     42 
     43 /**
     44  * Bitmask values to be passed to uset_openPatternOptions() or
     45  * uset_applyPattern() taking an option parameter.
     46  * @stable ICU 2.4
     47  */
     48 enum {
     49     /**
     50      * Ignore white space within patterns unless quoted or escaped.
     51      * @stable ICU 2.4
     52      */
     53     USET_IGNORE_SPACE = 1,
     54 
     55     /**
     56      * Enable case insensitive matching.  E.g., "[ab]" with this flag
     57      * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
     58      * match all except 'a', 'A', 'b', and 'B'. This performs a full
     59      * closure over case mappings, e.g. U+017F for s.
     60      * @stable ICU 2.4
     61      */
     62     USET_CASE_INSENSITIVE = 2,
     63 
     64     /**
     65      * Bitmask for UnicodeSet::closeOver() indicating letter case.
     66      * This may be ORed together with other selectors.
     67      * @internal
     68      */
     69     USET_CASE = 2,
     70 
     71     /**
     72      * Enable case insensitive matching.  E.g., "[ab]" with this flag
     73      * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
     74      * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
     75      * title-, and uppercase mappings as well as the case folding
     76      * of each existing element in the set.
     77      * @draft ICU 3.2
     78      */
     79     USET_ADD_CASE_MAPPINGS = 4,
     80 
     81     /**
     82      * Enough for any single-code point set
     83      * @internal
     84      */
     85     USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
     86 };
     87 
     88 /**
     89  * A serialized form of a Unicode set.  Limited manipulations are
     90  * possible directly on a serialized set.  See below.
     91  * @stable ICU 2.4
     92  */
     93 typedef struct USerializedSet {
     94     /**
     95      * The serialized Unicode Set.
     96      * @stable ICU 2.4
     97      */
     98     const uint16_t *array;
     99     /**
    100      * The length of the array that contains BMP characters.
    101      * @stable ICU 2.4
    102      */
    103     int32_t bmpLength;
    104     /**
    105      * The total length of the array.
    106      * @stable ICU 2.4
    107      */
    108     int32_t length;
    109     /**
    110      * A small buffer for the array to reduce memory allocations.
    111      * @stable ICU 2.4
    112      */
    113     uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
    114 } USerializedSet;
    115 
    116 /*********************************************************************
    117  * USet API
    118  *********************************************************************/
    119 
    120 /**
    121  * Creates a USet object that contains the range of characters
    122  * start..end, inclusive.
    123  * @param start first character of the range, inclusive
    124  * @param end last character of the range, inclusive
    125  * @return a newly created USet.  The caller must call uset_close() on
    126  * it when done.
    127  * @stable ICU 2.4
    128  */
    129 U_STABLE USet* U_EXPORT2
    130 uset_open(UChar32 start, UChar32 end);
    131 
    132 /**
    133  * Creates a set from the given pattern.  See the UnicodeSet class
    134  * description for the syntax of the pattern language.
    135  * @param pattern a string specifying what characters are in the set
    136  * @param patternLength the length of the pattern, or -1 if null
    137  * terminated
    138  * @param ec the error code
    139  * @stable ICU 2.4
    140  */
    141 U_STABLE USet* U_EXPORT2
    142 uset_openPattern(const UChar* pattern, int32_t patternLength,
    143                  UErrorCode* ec);
    144 
    145 /**
    146  * Creates a set from the given pattern.  See the UnicodeSet class
    147  * description for the syntax of the pattern language.
    148  * @param pattern a string specifying what characters are in the set
    149  * @param patternLength the length of the pattern, or -1 if null
    150  * terminated
    151  * @param options bitmask for options to apply to the pattern.
    152  * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
    153  * @param ec the error code
    154  * @stable ICU 2.4
    155  */
    156 U_STABLE USet* U_EXPORT2
    157 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
    158                  uint32_t options,
    159                  UErrorCode* ec);
    160 
    161 /**
    162  * Disposes of the storage used by a USet object.  This function should
    163  * be called exactly once for objects returned by uset_open().
    164  * @param set the object to dispose of
    165  * @stable ICU 2.4
    166  */
    167 U_STABLE void U_EXPORT2
    168 uset_close(USet* set);
    169 
    170 /**
    171  * Causes the USet object to represent the range <code>start - end</code>.
    172  * If <code>start > end</code> then this USet is set to an empty range.
    173  * @param set the object to set to the given range
    174  * @param start first character in the set, inclusive
    175  * @param end last character in the set, inclusive
    176  * @draft ICU 3.2
    177  */
    178 U_DRAFT void U_EXPORT2
    179 uset_set(USet* set,
    180          UChar32 start, UChar32 end);
    181 
    182 /**
    183  * Modifies the set to represent the set specified by the given
    184  * pattern. See the UnicodeSet class description for the syntax of
    185  * the pattern language. See also the User Guide chapter about UnicodeSet.
    186  * <em>Empties the set passed before applying the pattern.</em>
    187  * @param set               The set to which the pattern is to be applied.
    188  * @param pattern           A pointer to UChar string specifying what characters are in the set.
    189  *                          The character at pattern[0] must be a '['.
    190  * @param patternLength     The length of the UChar string. -1 if NUL terminated.
    191  * @param options           A bitmask for options to apply to the pattern.
    192  *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
    193  * @param status            Returns an error if the pattern cannot be parsed.
    194  * @return                  Upon successful parse, the value is either
    195  *                          the index of the character after the closing ']'
    196  *                          of the parsed pattern.
    197  *                          If the status code indicates failure, then the return value
    198  *                          is the index of the error in the source.
    199  *
    200  * @draft ICU 2.8
    201  */
    202 U_DRAFT int32_t U_EXPORT2
    203 uset_applyPattern(USet *set,
    204                   const UChar *pattern, int32_t patternLength,
    205                   uint32_t options,
    206                   UErrorCode *status);
    207 
    208 /**
    209  * Modifies the set to contain those code points which have the given value
    210  * for the given binary or enumerated property, as returned by
    211  * u_getIntPropertyValue.  Prior contents of this set are lost.
    212  *
    213  * @param set the object to contain the code points defined by the property
    214  *
    215  * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
    216  * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
    217  * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
    218  *
    219  * @param value a value in the range u_getIntPropertyMinValue(prop)..
    220  * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
    221  * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
    222  * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
    223  * categories such as [:L:] to be represented.
    224  *
    225  * @param ec error code input/output parameter
    226  *
    227  * @draft ICU 3.2
    228  */
    229 U_DRAFT void U_EXPORT2
    230 uset_applyIntPropertyValue(USet* set,
    231                            UProperty prop, int32_t value, UErrorCode* ec);
    232 
    233 /**
    234  * Modifies the set to contain those code points which have the
    235  * given value for the given property.  Prior contents of this
    236  * set are lost.
    237  *
    238  * @param set the object to contain the code points defined by the given
    239  * property and value alias
    240  *
    241  * @param prop a string specifying a property alias, either short or long.
    242  * The name is matched loosely.  See PropertyAliases.txt for names and a
    243  * description of loose matching.  If the value string is empty, then this
    244  * string is interpreted as either a General_Category value alias, a Script
    245  * value alias, a binary property alias, or a special ID.  Special IDs are
    246  * matched loosely and correspond to the following sets:
    247  *
    248  * "ANY" = [\\u0000-\\U0010FFFF],
    249  * "ASCII" = [\\u0000-\\u007F].
    250  *
    251  * @param propLength the length of the prop, or -1 if NULL
    252  *
    253  * @param value a string specifying a value alias, either short or long.
    254  * The name is matched loosely.  See PropertyValueAliases.txt for names
    255  * and a description of loose matching.  In addition to aliases listed,
    256  * numeric values and canonical combining classes may be expressed
    257  * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
    258  * may also be empty.
    259  *
    260  * @param valueLength the length of the value, or -1 if NULL
    261  *
    262  * @param ec error code input/output parameter
    263  *
    264  * @draft ICU 3.2
    265  */
    266 U_DRAFT void U_EXPORT2
    267 uset_applyPropertyAlias(USet* set,
    268                         const UChar *prop, int32_t propLength,
    269                         const UChar *value, int32_t valueLength,
    270                         UErrorCode* ec);
    271 
    272 /**
    273  * Return true if the given position, in the given pattern, appears
    274  * to be the start of a UnicodeSet pattern.
    275  *
    276  * @param pattern a string specifying the pattern
    277  * @param patternLength the length of the pattern, or -1 if NULL
    278  * @param pos the given position
    279  * @draft ICU 3.2
    280  */
    281 U_DRAFT UBool U_EXPORT2
    282 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
    283                       int32_t pos);
    284 
    285 /**
    286  * Returns a string representation of this set.  If the result of
    287  * calling this function is passed to a uset_openPattern(), it
    288  * will produce another set that is equal to this one.
    289  * @param set the set
    290  * @param result the string to receive the rules, may be NULL
    291  * @param resultCapacity the capacity of result, may be 0 if result is NULL
    292  * @param escapeUnprintable if TRUE then convert unprintable
    293  * character to their hex escape representations, \\uxxxx or
    294  * \\Uxxxxxxxx.  Unprintable characters are those other than
    295  * U+000A, U+0020..U+007E.
    296  * @param ec error code.
    297  * @return length of string, possibly larger than resultCapacity
    298  * @stable ICU 2.4
    299  */
    300 U_STABLE int32_t U_EXPORT2
    301 uset_toPattern(const USet* set,
    302                UChar* result, int32_t resultCapacity,
    303                UBool escapeUnprintable,
    304                UErrorCode* ec);
    305 
    306 /**
    307  * Adds the given character to the given USet.  After this call,
    308  * uset_contains(set, c) will return TRUE.
    309  * @param set the object to which to add the character
    310  * @param c the character to add
    311  * @stable ICU 2.4
    312  */
    313 U_STABLE void U_EXPORT2
    314 uset_add(USet* set, UChar32 c);
    315 
    316 /**
    317  * Adds all of the elements in the specified set to this set if
    318  * they're not already present.  This operation effectively
    319  * modifies this set so that its value is the <i>union</i> of the two
    320  * sets.  The behavior of this operation is unspecified if the specified
    321  * collection is modified while the operation is in progress.
    322  *
    323  * @param set the object to which to add the set
    324  * @param additionalSet the source set whose elements are to be added to this set.
    325  * @stable ICU 2.6
    326  */
    327 U_STABLE void U_EXPORT2
    328 uset_addAll(USet* set, const USet *additionalSet);
    329 
    330 /**
    331  * Adds the given range of characters to the given USet.  After this call,
    332  * uset_contains(set, start, end) will return TRUE.
    333  * @param set the object to which to add the character
    334  * @param start the first character of the range to add, inclusive
    335  * @param end the last character of the range to add, inclusive
    336  * @stable ICU 2.2
    337  */
    338 U_STABLE void U_EXPORT2
    339 uset_addRange(USet* set, UChar32 start, UChar32 end);
    340 
    341 /**
    342  * Adds the given string to the given USet.  After this call,
    343  * uset_containsString(set, str, strLen) will return TRUE.
    344  * @param set the object to which to add the character
    345  * @param str the string to add
    346  * @param strLen the length of the string or -1 if null terminated.
    347  * @stable ICU 2.4
    348  */
    349 U_STABLE void U_EXPORT2
    350 uset_addString(USet* set, const UChar* str, int32_t strLen);
    351 
    352 /**
    353  * Removes the given character from the given USet.  After this call,
    354  * uset_contains(set, c) will return FALSE.
    355  * @param set the object from which to remove the character
    356  * @param c the character to remove
    357  * @stable ICU 2.4
    358  */
    359 U_STABLE void U_EXPORT2
    360 uset_remove(USet* set, UChar32 c);
    361 
    362 /**
    363  * Removes the given range of characters from the given USet.  After this call,
    364  * uset_contains(set, start, end) will return FALSE.
    365  * @param set the object to which to add the character
    366  * @param start the first character of the range to remove, inclusive
    367  * @param end the last character of the range to remove, inclusive
    368  * @stable ICU 2.2
    369  */
    370 U_STABLE void U_EXPORT2
    371 uset_removeRange(USet* set, UChar32 start, UChar32 end);
    372 
    373 /**
    374  * Removes the given string to the given USet.  After this call,
    375  * uset_containsString(set, str, strLen) will return FALSE.
    376  * @param set the object to which to add the character
    377  * @param str the string to remove
    378  * @param strLen the length of the string or -1 if null terminated.
    379  * @stable ICU 2.4
    380  */
    381 U_STABLE void U_EXPORT2
    382 uset_removeString(USet* set, const UChar* str, int32_t strLen);
    383 
    384 /**
    385  * Removes from this set all of its elements that are contained in the
    386  * specified set.  This operation effectively modifies this
    387  * set so that its value is the <i>asymmetric set difference</i> of
    388  * the two sets.
    389  * @param set the object from which the elements are to be removed
    390  * @param removeSet the object that defines which elements will be
    391  * removed from this set
    392  * @draft ICU 3.2
    393  */
    394 U_DRAFT void U_EXPORT2
    395 uset_removeAll(USet* set, const USet* removeSet);
    396 
    397 /**
    398  * Retain only the elements in this set that are contained in the
    399  * specified range.  If <code>start > end</code> then an empty range is
    400  * retained, leaving the set empty.  This is equivalent to
    401  * a boolean logic AND, or a set INTERSECTION.
    402  *
    403  * @param set the object for which to retain only the specified range
    404  * @param start first character, inclusive, of range to be retained
    405  * to this set.
    406  * @param end last character, inclusive, of range to be retained
    407  * to this set.
    408  * @draft ICU 3.2
    409  */
    410 U_DRAFT void U_EXPORT2
    411 uset_retain(USet* set, UChar32 start, UChar32 end);
    412 
    413 /**
    414  * Retains only the elements in this set that are contained in the
    415  * specified set.  In other words, removes from this set all of
    416  * its elements that are not contained in the specified set.  This
    417  * operation effectively modifies this set so that its value is
    418  * the <i>intersection</i> of the two sets.
    419  *
    420  * @param set the object on which to perform the retain
    421  * @param retain set that defines which elements this set will retain
    422  * @draft ICU 3.2
    423  */
    424 U_DRAFT void U_EXPORT2
    425 uset_retainAll(USet* set, const USet* retain);
    426 
    427 /**
    428  * Reallocate this objects internal structures to take up the least
    429  * possible space, without changing this object's value.
    430  *
    431  * @param set the object on which to perfrom the compact
    432  * @draft ICU 3.2
    433  */
    434 U_DRAFT void U_EXPORT2
    435 uset_compact(USet* set);
    436 
    437 /**
    438  * Inverts this set.  This operation modifies this set so that
    439  * its value is its complement.  This operation does not affect
    440  * the multicharacter strings, if any.
    441  * @param set the set
    442  * @stable ICU 2.4
    443  */
    444 U_STABLE void U_EXPORT2
    445 uset_complement(USet* set);
    446 
    447 /**
    448  * Complements in this set all elements contained in the specified
    449  * set.  Any character in the other set will be removed if it is
    450  * in this set, or will be added if it is not in this set.
    451  *
    452  * @param set the set with which to complement
    453  * @param complement set that defines which elements will be xor'ed
    454  * from this set.
    455  * @draft ICU 3.2
    456  */
    457 U_DRAFT void U_EXPORT2
    458 uset_complementAll(USet* set, const USet* complement);
    459 
    460 /**
    461  * Removes all of the elements from this set.  This set will be
    462  * empty after this call returns.
    463  * @param set the set
    464  * @stable ICU 2.4
    465  */
    466 U_STABLE void U_EXPORT2
    467 uset_clear(USet* set);
    468 
    469 /**
    470  * Returns TRUE if the given USet contains no characters and no
    471  * strings.
    472  * @param set the set
    473  * @return true if set is empty
    474  * @stable ICU 2.4
    475  */
    476 U_STABLE UBool U_EXPORT2
    477 uset_isEmpty(const USet* set);
    478 
    479 /**
    480  * Returns TRUE if the given USet contains the given character.
    481  * @param set the set
    482  * @param c The codepoint to check for within the set
    483  * @return true if set contains c
    484  * @stable ICU 2.4
    485  */
    486 U_STABLE UBool U_EXPORT2
    487 uset_contains(const USet* set, UChar32 c);
    488 
    489 /**
    490  * Returns TRUE if the given USet contains all characters c
    491  * where start <= c && c <= end.
    492  * @param set the set
    493  * @param start the first character of the range to test, inclusive
    494  * @param end the last character of the range to test, inclusive
    495  * @return TRUE if set contains the range
    496  * @stable ICU 2.2
    497  */
    498 U_STABLE UBool U_EXPORT2
    499 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
    500 
    501 /**
    502  * Returns TRUE if the given USet contains the given string.
    503  * @param set the set
    504  * @param str the string
    505  * @param strLen the length of the string or -1 if null terminated.
    506  * @return true if set contains str
    507  * @stable ICU 2.4
    508  */
    509 U_STABLE UBool U_EXPORT2
    510 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
    511 
    512 /**
    513  * Returns the index of the given character within this set, where
    514  * the set is ordered by ascending code point.  If the character
    515  * is not in this set, return -1.  The inverse of this method is
    516  * <code>charAt()</code>.
    517  * @param set the set
    518  * @param c the character to obtain the index for
    519  * @return an index from 0..size()-1, or -1
    520  * @draft ICU 3.2
    521  */
    522 U_DRAFT int32_t U_EXPORT2
    523 uset_indexOf(const USet* set, UChar32 c);
    524 
    525 /**
    526  * Returns the character at the given index within this set, where
    527  * the set is ordered by ascending code point.  If the index is
    528  * out of range, return (UChar32)-1.  The inverse of this method is
    529  * <code>indexOf()</code>.
    530  * @param set the set
    531  * @param index an index from 0..size()-1 to obtain the char for
    532  * @return the character at the given index, or (UChar32)-1.
    533  * @draft ICU 3.2
    534  */
    535 U_DRAFT UChar32 U_EXPORT2
    536 uset_charAt(const USet* set, int32_t index);
    537 
    538 /**
    539  * Returns the number of characters and strings contained in the given
    540  * USet.
    541  * @param set the set
    542  * @return a non-negative integer counting the characters and strings
    543  * contained in set
    544  * @stable ICU 2.4
    545  */
    546 U_STABLE int32_t U_EXPORT2
    547 uset_size(const USet* set);
    548 
    549 /**
    550  * Returns the number of items in this set.  An item is either a range
    551  * of characters or a single multicharacter string.
    552  * @param set the set
    553  * @return a non-negative integer counting the character ranges
    554  * and/or strings contained in set
    555  * @stable ICU 2.4
    556  */
    557 U_STABLE int32_t U_EXPORT2
    558 uset_getItemCount(const USet* set);
    559 
    560 /**
    561  * Returns an item of this set.  An item is either a range of
    562  * characters or a single multicharacter string.
    563  * @param set the set
    564  * @param itemIndex a non-negative integer in the range 0..
    565  * uset_getItemCount(set)-1
    566  * @param start pointer to variable to receive first character
    567  * in range, inclusive
    568  * @param end pointer to variable to receive last character in range,
    569  * inclusive
    570  * @param str buffer to receive the string, may be NULL
    571  * @param strCapacity capacity of str, or 0 if str is NULL
    572  * @param ec error code
    573  * @return the length of the string (>= 2), or 0 if the item is a
    574  * range, in which case it is the range *start..*end, or -1 if
    575  * itemIndex is out of range
    576  * @stable ICU 2.4
    577  */
    578 U_STABLE int32_t U_EXPORT2
    579 uset_getItem(const USet* set, int32_t itemIndex,
    580              UChar32* start, UChar32* end,
    581              UChar* str, int32_t strCapacity,
    582              UErrorCode* ec);
    583 
    584 /**
    585  * Returns true if set1 contains all the characters and strings
    586  * of set2. It answers the question, 'Is set1 a subset of set2?'
    587  * @param set1 set to be checked for containment
    588  * @param set2 set to be checked for containment
    589  * @return true if the test condition is met
    590  * @draft ICU 3.2
    591  */
    592 U_DRAFT UBool U_EXPORT2
    593 uset_containsAll(const USet* set1, const USet* set2);
    594 
    595 /**
    596  * Returns true if set1 contains none of the characters and strings
    597  * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
    598  * @param set1 set to be checked for containment
    599  * @param set2 set to be checked for containment
    600  * @return true if the test condition is met
    601  * @draft ICU 3.2
    602  */
    603 U_DRAFT UBool U_EXPORT2
    604 uset_containsNone(const USet* set1, const USet* set2);
    605 
    606 /**
    607  * Returns true if set1 contains some of the characters and strings
    608  * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
    609  * @param set1 set to be checked for containment
    610  * @param set2 set to be checked for containment
    611  * @return true if the test condition is met
    612  * @draft ICU 3.2
    613  */
    614 U_DRAFT UBool U_EXPORT2
    615 uset_containsSome(const USet* set1, const USet* set2);
    616 
    617 /**
    618  * Returns true if set1 contains all of the characters and strings
    619  * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
    620  * @param set1 set to be checked for containment
    621  * @param set2 set to be checked for containment
    622  * @return true if the test condition is met
    623  * @draft ICU 3.2
    624  */
    625 U_DRAFT UBool U_EXPORT2
    626 uset_equals(const USet* set1, const USet* set2);
    627 
    628 /*********************************************************************
    629  * Serialized set API
    630  *********************************************************************/
    631 
    632 /**
    633  * Serializes this set into an array of 16-bit integers.  Serialization
    634  * (currently) only records the characters in the set; multicharacter
    635  * strings are ignored.
    636  *
    637  * The array
    638  * has following format (each line is one 16-bit integer):
    639  *
    640  *  length     = (n+2*m) | (m!=0?0x8000:0)
    641  *  bmpLength  = n; present if m!=0
    642  *  bmp[0]
    643  *  bmp[1]
    644  *  ...
    645  *  bmp[n-1]
    646  *  supp-high[0]
    647  *  supp-low[0]
    648  *  supp-high[1]
    649  *  supp-low[1]
    650  *  ...
    651  *  supp-high[m-1]
    652  *  supp-low[m-1]
    653  *
    654  * The array starts with a header.  After the header are n bmp
    655  * code points, then m supplementary code points.  Either n or m
    656  * or both may be zero.  n+2*m is always <= 0x7FFF.
    657  *
    658  * If there are no supplementary characters (if m==0) then the
    659  * header is one 16-bit integer, 'length', with value n.
    660  *
    661  * If there are supplementary characters (if m!=0) then the header
    662  * is two 16-bit integers.  The first, 'length', has value
    663  * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
    664  *
    665  * After the header the code points are stored in ascending order.
    666  * Supplementary code points are stored as most significant 16
    667  * bits followed by least significant 16 bits.
    668  *
    669  * @param set the set
    670  * @param dest pointer to buffer of destCapacity 16-bit integers.
    671  * May be NULL only if destCapacity is zero.
    672  * @param destCapacity size of dest, or zero.  Must not be negative.
    673  * @param pErrorCode pointer to the error code.  Will be set to
    674  * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
    675  * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
    676  * @return the total length of the serialized format, including
    677  * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
    678  * than U_BUFFER_OVERFLOW_ERROR.
    679  * @stable ICU 2.4
    680  */
    681 U_STABLE int32_t U_EXPORT2
    682 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
    683 
    684 /**
    685  * Given a serialized array, fill in the given serialized set object.
    686  * @param fillSet pointer to result
    687  * @param src pointer to start of array
    688  * @param srcLength length of array
    689  * @return true if the given array is valid, otherwise false
    690  * @stable ICU 2.4
    691  */
    692 U_STABLE UBool U_EXPORT2
    693 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
    694 
    695 /**
    696  * Set the USerializedSet to contain the given character (and nothing
    697  * else).
    698  * @param fillSet pointer to result
    699  * @param c The codepoint to set
    700  * @stable ICU 2.4
    701  */
    702 U_STABLE void U_EXPORT2
    703 uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
    704 
    705 /**
    706  * Returns TRUE if the given USerializedSet contains the given
    707  * character.
    708  * @param set the serialized set
    709  * @param c The codepoint to check for within the set
    710  * @return true if set contains c
    711  * @stable ICU 2.4
    712  */
    713 U_STABLE UBool U_EXPORT2
    714 uset_serializedContains(const USerializedSet* set, UChar32 c);
    715 
    716 /**
    717  * Returns the number of disjoint ranges of characters contained in
    718  * the given serialized set.  Ignores any strings contained in the
    719  * set.
    720  * @param set the serialized set
    721  * @return a non-negative integer counting the character ranges
    722  * contained in set
    723  * @stable ICU 2.4
    724  */
    725 U_STABLE int32_t U_EXPORT2
    726 uset_getSerializedRangeCount(const USerializedSet* set);
    727 
    728 /**
    729  * Returns a range of characters contained in the given serialized
    730  * set.
    731  * @param set the serialized set
    732  * @param rangeIndex a non-negative integer in the range 0..
    733  * uset_getSerializedRangeCount(set)-1
    734  * @param pStart pointer to variable to receive first character
    735  * in range, inclusive
    736  * @param pEnd pointer to variable to receive last character in range,
    737  * inclusive
    738  * @return true if rangeIndex is valid, otherwise false
    739  * @stable ICU 2.4
    740  */
    741 U_STABLE UBool U_EXPORT2
    742 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
    743                         UChar32* pStart, UChar32* pEnd);
    744 
    745 #endif
    746