Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (c) 1996-2015, International Business Machines Corporation and others.
      6 * All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 
     10 #ifndef UCOL_H
     11 #define UCOL_H
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_COLLATION
     16 
     17 #include "unicode/unorm.h"
     18 #include "unicode/localpointer.h"
     19 #include "unicode/parseerr.h"
     20 #include "unicode/uloc.h"
     21 #include "unicode/uset.h"
     22 #include "unicode/uscript.h"
     23 
     24 /**
     25  * \file
     26  * \brief C API: Collator
     27  *
     28  * <h2> Collator C API </h2>
     29  *
     30  * The C API for Collator performs locale-sensitive
     31  * string comparison. You use this service to build
     32  * searching and sorting routines for natural language text.
     33  * <p>
     34  * For more information about the collation service see
     35  * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
     36  * <p>
     37  * Collation service provides correct sorting orders for most locales supported in ICU.
     38  * If specific data for a locale is not available, the orders eventually falls back
     39  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
     40  * <p>
     41  * Sort ordering may be customized by providing your own set of rules. For more on
     42  * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
     43  * Collation Customization</a> section of the User Guide.
     44  * <p>
     45  * @see         UCollationResult
     46  * @see         UNormalizationMode
     47  * @see         UCollationStrength
     48  * @see         UCollationElements
     49  */
     50 
     51 /** A collator.
     52 *  For usage in C programs.
     53 */
     54 struct UCollator;
     55 /** structure representing a collator object instance
     56  * @stable ICU 2.0
     57  */
     58 typedef struct UCollator UCollator;
     59 
     60 
     61 /**
     62  * UCOL_LESS is returned if source string is compared to be less than target
     63  * string in the ucol_strcoll() method.
     64  * UCOL_EQUAL is returned if source string is compared to be equal to target
     65  * string in the ucol_strcoll() method.
     66  * UCOL_GREATER is returned if source string is compared to be greater than
     67  * target string in the ucol_strcoll() method.
     68  * @see ucol_strcoll()
     69  * <p>
     70  * Possible values for a comparison result
     71  * @stable ICU 2.0
     72  */
     73 typedef enum {
     74   /** string a == string b */
     75   UCOL_EQUAL    = 0,
     76   /** string a > string b */
     77   UCOL_GREATER    = 1,
     78   /** string a < string b */
     79   UCOL_LESS    = -1
     80 } UCollationResult ;
     81 
     82 
     83 /** Enum containing attribute values for controling collation behavior.
     84  * Here are all the allowable values. Not every attribute can take every value. The only
     85  * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined
     86  * value for that locale
     87  * @stable ICU 2.0
     88  */
     89 typedef enum {
     90   /** accepted by most attributes */
     91   UCOL_DEFAULT = -1,
     92 
     93   /** Primary collation strength */
     94   UCOL_PRIMARY = 0,
     95   /** Secondary collation strength */
     96   UCOL_SECONDARY = 1,
     97   /** Tertiary collation strength */
     98   UCOL_TERTIARY = 2,
     99   /** Default collation strength */
    100   UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
    101   UCOL_CE_STRENGTH_LIMIT,
    102   /** Quaternary collation strength */
    103   UCOL_QUATERNARY=3,
    104   /** Identical collation strength */
    105   UCOL_IDENTICAL=15,
    106   UCOL_STRENGTH_LIMIT,
    107 
    108   /** Turn the feature off - works for UCOL_FRENCH_COLLATION,
    109       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
    110       & UCOL_DECOMPOSITION_MODE*/
    111   UCOL_OFF = 16,
    112   /** Turn the feature on - works for UCOL_FRENCH_COLLATION,
    113       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
    114       & UCOL_DECOMPOSITION_MODE*/
    115   UCOL_ON = 17,
    116 
    117   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
    118   UCOL_SHIFTED = 20,
    119   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
    120   UCOL_NON_IGNORABLE = 21,
    121 
    122   /** Valid for UCOL_CASE_FIRST -
    123       lower case sorts before upper case */
    124   UCOL_LOWER_FIRST = 24,
    125   /** upper case sorts before lower case */
    126   UCOL_UPPER_FIRST = 25,
    127 
    128 #ifndef U_HIDE_DEPRECATED_API
    129     /**
    130      * One more than the highest normal UColAttributeValue value.
    131      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    132      */
    133   UCOL_ATTRIBUTE_VALUE_COUNT
    134 #endif  /* U_HIDE_DEPRECATED_API */
    135 } UColAttributeValue;
    136 
    137 /**
    138  * Enum containing the codes for reordering segments of the collation table that are not script
    139  * codes. These reordering codes are to be used in conjunction with the script codes.
    140  * @see ucol_getReorderCodes
    141  * @see ucol_setReorderCodes
    142  * @see ucol_getEquivalentReorderCodes
    143  * @see UScriptCode
    144  * @stable ICU 4.8
    145  */
    146  typedef enum {
    147    /**
    148     * A special reordering code that is used to specify the default
    149     * reordering codes for a locale.
    150     * @stable ICU 4.8
    151     */
    152     UCOL_REORDER_CODE_DEFAULT       = -1,
    153    /**
    154     * A special reordering code that is used to specify no reordering codes.
    155     * @stable ICU 4.8
    156     */
    157     UCOL_REORDER_CODE_NONE          = USCRIPT_UNKNOWN,
    158    /**
    159     * A special reordering code that is used to specify all other codes used for
    160     * reordering except for the codes lised as UColReorderCode values and those
    161     * listed explicitly in a reordering.
    162     * @stable ICU 4.8
    163     */
    164     UCOL_REORDER_CODE_OTHERS        = USCRIPT_UNKNOWN,
    165    /**
    166     * Characters with the space property.
    167     * This is equivalent to the rule value "space".
    168     * @stable ICU 4.8
    169     */
    170     UCOL_REORDER_CODE_SPACE         = 0x1000,
    171    /**
    172     * The first entry in the enumeration of reordering groups. This is intended for use in
    173     * range checking and enumeration of the reorder codes.
    174     * @stable ICU 4.8
    175     */
    176     UCOL_REORDER_CODE_FIRST         = UCOL_REORDER_CODE_SPACE,
    177    /**
    178     * Characters with the punctuation property.
    179     * This is equivalent to the rule value "punct".
    180     * @stable ICU 4.8
    181     */
    182     UCOL_REORDER_CODE_PUNCTUATION   = 0x1001,
    183    /**
    184     * Characters with the symbol property.
    185     * This is equivalent to the rule value "symbol".
    186     * @stable ICU 4.8
    187     */
    188     UCOL_REORDER_CODE_SYMBOL        = 0x1002,
    189    /**
    190     * Characters with the currency property.
    191     * This is equivalent to the rule value "currency".
    192     * @stable ICU 4.8
    193     */
    194     UCOL_REORDER_CODE_CURRENCY      = 0x1003,
    195    /**
    196     * Characters with the digit property.
    197     * This is equivalent to the rule value "digit".
    198     * @stable ICU 4.8
    199     */
    200     UCOL_REORDER_CODE_DIGIT         = 0x1004,
    201 #ifndef U_HIDE_DEPRECATED_API
    202     /**
    203      * One more than the highest normal UColReorderCode value.
    204      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    205      */
    206     UCOL_REORDER_CODE_LIMIT         = 0x1005
    207 #endif  /* U_HIDE_DEPRECATED_API */
    208 } UColReorderCode;
    209 
    210 /**
    211  * Base letter represents a primary difference.  Set comparison
    212  * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
    213  * Use this to set the strength of a Collator object.
    214  * Example of primary difference, "abc" &lt; "abd"
    215  *
    216  * Diacritical differences on the same base letter represent a secondary
    217  * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
    218  * differences. Use this to set the strength of a Collator object.
    219  * Example of secondary difference, "&auml;" >> "a".
    220  *
    221  * Uppercase and lowercase versions of the same character represents a
    222  * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
    223  * all comparison differences. Use this to set the strength of a Collator
    224  * object.
    225  * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
    226  *
    227  * Two characters are considered "identical" when they have the same
    228  * unicode spellings.  UCOL_IDENTICAL.
    229  * For example, "&auml;" == "&auml;".
    230  *
    231  * UCollationStrength is also used to determine the strength of sort keys
    232  * generated from UCollator objects
    233  * These values can be now found in the UColAttributeValue enum.
    234  * @stable ICU 2.0
    235  **/
    236 typedef UColAttributeValue UCollationStrength;
    237 
    238 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
    239  * value, as well as the values specific to each one.
    240  * @stable ICU 2.0
    241  */
    242 typedef enum {
    243      /** Attribute for direction of secondary weights - used in Canadian French.
    244       * Acceptable values are UCOL_ON, which results in secondary weights
    245       * being considered backwards and UCOL_OFF which treats secondary
    246       * weights in the order they appear.
    247       * @stable ICU 2.0
    248       */
    249      UCOL_FRENCH_COLLATION,
    250      /** Attribute for handling variable elements.
    251       * Acceptable values are UCOL_NON_IGNORABLE (default)
    252       * which treats all the codepoints with non-ignorable
    253       * primary weights in the same way,
    254       * and UCOL_SHIFTED which causes codepoints with primary
    255       * weights that are equal or below the variable top value
    256       * to be ignored on primary level and moved to the quaternary
    257       * level.
    258       * @stable ICU 2.0
    259       */
    260      UCOL_ALTERNATE_HANDLING,
    261      /** Controls the ordering of upper and lower case letters.
    262       * Acceptable values are UCOL_OFF (default), which orders
    263       * upper and lower case letters in accordance to their tertiary
    264       * weights, UCOL_UPPER_FIRST which forces upper case letters to
    265       * sort before lower case letters, and UCOL_LOWER_FIRST which does
    266       * the opposite.
    267       * @stable ICU 2.0
    268       */
    269      UCOL_CASE_FIRST,
    270      /** Controls whether an extra case level (positioned before the third
    271       * level) is generated or not. Acceptable values are UCOL_OFF (default),
    272       * when case level is not generated, and UCOL_ON which causes the case
    273       * level to be generated. Contents of the case level are affected by
    274       * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
    275       * accent differences in a string is to set the strength to UCOL_PRIMARY
    276       * and enable case level.
    277       * @stable ICU 2.0
    278       */
    279      UCOL_CASE_LEVEL,
    280      /** Controls whether the normalization check and necessary normalizations
    281       * are performed. When set to UCOL_OFF (default) no normalization check
    282       * is performed. The correctness of the result is guaranteed only if the
    283       * input data is in so-called FCD form (see users manual for more info).
    284       * When set to UCOL_ON, an incremental check is performed to see whether
    285       * the input data is in the FCD form. If the data is not in the FCD form,
    286       * incremental NFD normalization is performed.
    287       * @stable ICU 2.0
    288       */
    289      UCOL_NORMALIZATION_MODE,
    290      /** An alias for UCOL_NORMALIZATION_MODE attribute.
    291       * @stable ICU 2.0
    292       */
    293      UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
    294      /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
    295       * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
    296       * for most locales (except Japanese) is tertiary.
    297       *
    298       * Quaternary strength
    299       * is useful when combined with shifted setting for alternate handling
    300       * attribute and for JIS X 4061 collation, when it is used to distinguish
    301       * between Katakana and Hiragana.
    302       * Otherwise, quaternary level
    303       * is affected only by the number of non-ignorable code points in
    304       * the string.
    305       *
    306       * Identical strength is rarely useful, as it amounts
    307       * to codepoints of the NFD form of the string.
    308       * @stable ICU 2.0
    309       */
    310      UCOL_STRENGTH,
    311 #ifndef U_HIDE_DEPRECATED_API
    312      /** When turned on, this attribute positions Hiragana before all
    313       * non-ignorables on quaternary level This is a sneaky way to produce JIS
    314       * sort order.
    315       *
    316       * This attribute was an implementation detail of the CLDR Japanese tailoring.
    317       * Since ICU 50, this attribute is not settable any more via API functions.
    318       * Since CLDR 25/ICU 53, explicit quaternary relations are used
    319       * to achieve the same Japanese sort order.
    320       *
    321       * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
    322       */
    323      UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
    324 #endif  /* U_HIDE_DEPRECATED_API */
    325      /**
    326       * When turned on, this attribute makes
    327       * substrings of digits sort according to their numeric values.
    328       *
    329       * This is a way to get '100' to sort AFTER '2'. Note that the longest
    330       * digit substring that can be treated as a single unit is
    331       * 254 digits (not counting leading zeros). If a digit substring is
    332       * longer than that, the digits beyond the limit will be treated as a
    333       * separate digit substring.
    334       *
    335       * A "digit" in this sense is a code point with General_Category=Nd,
    336       * which does not include circled numbers, roman numerals, etc.
    337       * Only a contiguous digit substring is considered, that is,
    338       * non-negative integers without separators.
    339       * There is no support for plus/minus signs, decimals, exponents, etc.
    340       *
    341       * @stable ICU 2.8
    342       */
    343      UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
    344 
    345     /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API,
    346      * it is needed for layout of RuleBasedCollator object. */
    347     /**
    348      * One more than the highest normal UColAttribute value.
    349      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    350      */
    351      UCOL_ATTRIBUTE_COUNT
    352 } UColAttribute;
    353 
    354 /** Options for retrieving the rule string
    355  *  @stable ICU 2.0
    356  */
    357 typedef enum {
    358   /**
    359    * Retrieves the tailoring rules only.
    360    * Same as calling the version of getRules() without UColRuleOption.
    361    * @stable ICU 2.0
    362    */
    363   UCOL_TAILORING_ONLY,
    364   /**
    365    * Retrieves the "UCA rules" concatenated with the tailoring rules.
    366    * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
    367    * They are almost never used or useful at runtime and can be removed from the data.
    368    * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
    369    * @stable ICU 2.0
    370    */
    371   UCOL_FULL_RULES
    372 } UColRuleOption ;
    373 
    374 /**
    375  * Open a UCollator for comparing strings.
    376  *
    377  * For some languages, multiple collation types are available;
    378  * for example, "de@collation=phonebook".
    379  * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
    380  * in the old locale extension syntax ("el@colCaseFirst=upper")
    381  * or in language tag syntax ("el-u-kf-upper").
    382  * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
    383  *
    384  * The UCollator pointer is used in all the calls to the Collation
    385  * service. After finished, collator must be disposed of by calling
    386  * {@link #ucol_close }.
    387  * @param loc The locale containing the required collation rules.
    388  *            Special values for locales can be passed in -
    389  *            if NULL is passed for the locale, the default locale
    390  *            collation rules will be used. If empty string ("") or
    391  *            "root" are passed, the root collator will be returned.
    392  * @param status A pointer to a UErrorCode to receive any errors
    393  * @return A pointer to a UCollator, or 0 if an error occurred.
    394  * @see ucol_openRules
    395  * @see ucol_safeClone
    396  * @see ucol_close
    397  * @stable ICU 2.0
    398  */
    399 U_STABLE UCollator* U_EXPORT2
    400 ucol_open(const char *loc, UErrorCode *status);
    401 
    402 /**
    403  * Produce a UCollator instance according to the rules supplied.
    404  * The rules are used to change the default ordering, defined in the
    405  * UCA in a process called tailoring. The resulting UCollator pointer
    406  * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
    407  * @param rules A string describing the collation rules. For the syntax
    408  *              of the rules please see users guide.
    409  * @param rulesLength The length of rules, or -1 if null-terminated.
    410  * @param normalizationMode The normalization mode: One of
    411  *             UCOL_OFF     (expect the text to not need normalization),
    412  *             UCOL_ON      (normalize), or
    413  *             UCOL_DEFAULT (set the mode according to the rules)
    414  * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
    415  * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
    416  * @param parseError  A pointer to UParseError to recieve information about errors
    417  *                    occurred during parsing. This argument can currently be set
    418  *                    to NULL, but at users own risk. Please provide a real structure.
    419  * @param status A pointer to a UErrorCode to receive any errors
    420  * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
    421  *         of error - please use status argument to check for errors.
    422  * @see ucol_open
    423  * @see ucol_safeClone
    424  * @see ucol_close
    425  * @stable ICU 2.0
    426  */
    427 U_STABLE UCollator* U_EXPORT2
    428 ucol_openRules( const UChar        *rules,
    429                 int32_t            rulesLength,
    430                 UColAttributeValue normalizationMode,
    431                 UCollationStrength strength,
    432                 UParseError        *parseError,
    433                 UErrorCode         *status);
    434 
    435 #ifndef U_HIDE_DEPRECATED_API
    436 /**
    437  * Open a collator defined by a short form string.
    438  * The structure and the syntax of the string is defined in the "Naming collators"
    439  * section of the users guide:
    440  * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
    441  * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
    442  * strength will be 3. 3066bis locale overrides individual locale parts.
    443  * The call to this function is equivalent to a call to ucol_open, followed by a
    444  * series of calls to ucol_setAttribute and ucol_setVariableTop.
    445  * @param definition A short string containing a locale and a set of attributes.
    446  *                   Attributes not explicitly mentioned are left at the default
    447  *                   state for a locale.
    448  * @param parseError if not NULL, structure that will get filled with error's pre
    449  *                   and post context in case of error.
    450  * @param forceDefaults if FALSE, the settings that are the same as the collator
    451  *                   default settings will not be applied (for example, setting
    452  *                   French secondary on a French collator would not be executed).
    453  *                   If TRUE, all the settings will be applied regardless of the
    454  *                   collator default value. If the definition
    455  *                   strings are to be cached, should be set to FALSE.
    456  * @param status     Error code. Apart from regular error conditions connected to
    457  *                   instantiating collators (like out of memory or similar), this
    458  *                   API will return an error if an invalid attribute or attribute/value
    459  *                   combination is specified.
    460  * @return           A pointer to a UCollator or 0 if an error occured (including an
    461  *                   invalid attribute).
    462  * @see ucol_open
    463  * @see ucol_setAttribute
    464  * @see ucol_setVariableTop
    465  * @see ucol_getShortDefinitionString
    466  * @see ucol_normalizeShortDefinitionString
    467  * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead.
    468  */
    469 U_DEPRECATED UCollator* U_EXPORT2
    470 ucol_openFromShortString( const char *definition,
    471                           UBool forceDefaults,
    472                           UParseError *parseError,
    473                           UErrorCode *status);
    474 #endif  /* U_HIDE_DEPRECATED_API */
    475 
    476 #ifndef U_HIDE_DEPRECATED_API
    477 /**
    478  * Get a set containing the contractions defined by the collator. The set includes
    479  * both the root collator's contractions and the contractions defined by the collator. This set
    480  * will contain only strings. If a tailoring explicitly suppresses contractions from
    481  * the root collator (like Russian), removed contractions will not be in the resulting set.
    482  * @param coll collator
    483  * @param conts the set to hold the result. It gets emptied before
    484  *              contractions are added.
    485  * @param status to hold the error code
    486  * @return the size of the contraction set
    487  *
    488  * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
    489  */
    490 U_DEPRECATED int32_t U_EXPORT2
    491 ucol_getContractions( const UCollator *coll,
    492                   USet *conts,
    493                   UErrorCode *status);
    494 #endif  /* U_HIDE_DEPRECATED_API */
    495 
    496 /**
    497  * Get a set containing the expansions defined by the collator. The set includes
    498  * both the root collator's expansions and the expansions defined by the tailoring
    499  * @param coll collator
    500  * @param contractions if not NULL, the set to hold the contractions
    501  * @param expansions if not NULL, the set to hold the expansions
    502  * @param addPrefixes add the prefix contextual elements to contractions
    503  * @param status to hold the error code
    504  *
    505  * @stable ICU 3.4
    506  */
    507 U_STABLE void U_EXPORT2
    508 ucol_getContractionsAndExpansions( const UCollator *coll,
    509                   USet *contractions, USet *expansions,
    510                   UBool addPrefixes, UErrorCode *status);
    511 
    512 /**
    513  * Close a UCollator.
    514  * Once closed, a UCollator should not be used. Every open collator should
    515  * be closed. Otherwise, a memory leak will result.
    516  * @param coll The UCollator to close.
    517  * @see ucol_open
    518  * @see ucol_openRules
    519  * @see ucol_safeClone
    520  * @stable ICU 2.0
    521  */
    522 U_STABLE void U_EXPORT2
    523 ucol_close(UCollator *coll);
    524 
    525 #if U_SHOW_CPLUSPLUS_API
    526 
    527 U_NAMESPACE_BEGIN
    528 
    529 /**
    530  * \class LocalUCollatorPointer
    531  * "Smart pointer" class, closes a UCollator via ucol_close().
    532  * For most methods see the LocalPointerBase base class.
    533  *
    534  * @see LocalPointerBase
    535  * @see LocalPointer
    536  * @stable ICU 4.4
    537  */
    538 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
    539 
    540 U_NAMESPACE_END
    541 
    542 #endif
    543 
    544 /**
    545  * Compare two strings.
    546  * The strings will be compared using the options already specified.
    547  * @param coll The UCollator containing the comparison rules.
    548  * @param source The source string.
    549  * @param sourceLength The length of source, or -1 if null-terminated.
    550  * @param target The target string.
    551  * @param targetLength The length of target, or -1 if null-terminated.
    552  * @return The result of comparing the strings; one of UCOL_EQUAL,
    553  * UCOL_GREATER, UCOL_LESS
    554  * @see ucol_greater
    555  * @see ucol_greaterOrEqual
    556  * @see ucol_equal
    557  * @stable ICU 2.0
    558  */
    559 U_STABLE UCollationResult U_EXPORT2
    560 ucol_strcoll(    const    UCollator    *coll,
    561         const    UChar        *source,
    562         int32_t            sourceLength,
    563         const    UChar        *target,
    564         int32_t            targetLength);
    565 
    566 /**
    567 * Compare two strings in UTF-8.
    568 * The strings will be compared using the options already specified.
    569 * Note: When input string contains malformed a UTF-8 byte sequence,
    570 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
    571 * @param coll The UCollator containing the comparison rules.
    572 * @param source The source UTF-8 string.
    573 * @param sourceLength The length of source, or -1 if null-terminated.
    574 * @param target The target UTF-8 string.
    575 * @param targetLength The length of target, or -1 if null-terminated.
    576 * @param status A pointer to a UErrorCode to receive any errors
    577 * @return The result of comparing the strings; one of UCOL_EQUAL,
    578 * UCOL_GREATER, UCOL_LESS
    579 * @see ucol_greater
    580 * @see ucol_greaterOrEqual
    581 * @see ucol_equal
    582 * @stable ICU 50
    583 */
    584 U_STABLE UCollationResult U_EXPORT2
    585 ucol_strcollUTF8(
    586         const UCollator *coll,
    587         const char      *source,
    588         int32_t         sourceLength,
    589         const char      *target,
    590         int32_t         targetLength,
    591         UErrorCode      *status);
    592 
    593 /**
    594  * Determine if one string is greater than another.
    595  * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
    596  * @param coll The UCollator containing the comparison rules.
    597  * @param source The source string.
    598  * @param sourceLength The length of source, or -1 if null-terminated.
    599  * @param target The target string.
    600  * @param targetLength The length of target, or -1 if null-terminated.
    601  * @return TRUE if source is greater than target, FALSE otherwise.
    602  * @see ucol_strcoll
    603  * @see ucol_greaterOrEqual
    604  * @see ucol_equal
    605  * @stable ICU 2.0
    606  */
    607 U_STABLE UBool U_EXPORT2
    608 ucol_greater(const UCollator *coll,
    609              const UChar     *source, int32_t sourceLength,
    610              const UChar     *target, int32_t targetLength);
    611 
    612 /**
    613  * Determine if one string is greater than or equal to another.
    614  * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
    615  * @param coll The UCollator containing the comparison rules.
    616  * @param source The source string.
    617  * @param sourceLength The length of source, or -1 if null-terminated.
    618  * @param target The target string.
    619  * @param targetLength The length of target, or -1 if null-terminated.
    620  * @return TRUE if source is greater than or equal to target, FALSE otherwise.
    621  * @see ucol_strcoll
    622  * @see ucol_greater
    623  * @see ucol_equal
    624  * @stable ICU 2.0
    625  */
    626 U_STABLE UBool U_EXPORT2
    627 ucol_greaterOrEqual(const UCollator *coll,
    628                     const UChar     *source, int32_t sourceLength,
    629                     const UChar     *target, int32_t targetLength);
    630 
    631 /**
    632  * Compare two strings for equality.
    633  * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
    634  * @param coll The UCollator containing the comparison rules.
    635  * @param source The source string.
    636  * @param sourceLength The length of source, or -1 if null-terminated.
    637  * @param target The target string.
    638  * @param targetLength The length of target, or -1 if null-terminated.
    639  * @return TRUE if source is equal to target, FALSE otherwise
    640  * @see ucol_strcoll
    641  * @see ucol_greater
    642  * @see ucol_greaterOrEqual
    643  * @stable ICU 2.0
    644  */
    645 U_STABLE UBool U_EXPORT2
    646 ucol_equal(const UCollator *coll,
    647            const UChar     *source, int32_t sourceLength,
    648            const UChar     *target, int32_t targetLength);
    649 
    650 /**
    651  * Compare two UTF-8 encoded trings.
    652  * The strings will be compared using the options already specified.
    653  * @param coll The UCollator containing the comparison rules.
    654  * @param sIter The source string iterator.
    655  * @param tIter The target string iterator.
    656  * @return The result of comparing the strings; one of UCOL_EQUAL,
    657  * UCOL_GREATER, UCOL_LESS
    658  * @param status A pointer to a UErrorCode to receive any errors
    659  * @see ucol_strcoll
    660  * @stable ICU 2.6
    661  */
    662 U_STABLE UCollationResult U_EXPORT2
    663 ucol_strcollIter(  const    UCollator    *coll,
    664                   UCharIterator *sIter,
    665                   UCharIterator *tIter,
    666                   UErrorCode *status);
    667 
    668 /**
    669  * Get the collation strength used in a UCollator.
    670  * The strength influences how strings are compared.
    671  * @param coll The UCollator to query.
    672  * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
    673  * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
    674  * @see ucol_setStrength
    675  * @stable ICU 2.0
    676  */
    677 U_STABLE UCollationStrength U_EXPORT2
    678 ucol_getStrength(const UCollator *coll);
    679 
    680 /**
    681  * Set the collation strength used in a UCollator.
    682  * The strength influences how strings are compared.
    683  * @param coll The UCollator to set.
    684  * @param strength The desired collation strength; one of UCOL_PRIMARY,
    685  * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
    686  * @see ucol_getStrength
    687  * @stable ICU 2.0
    688  */
    689 U_STABLE void U_EXPORT2
    690 ucol_setStrength(UCollator *coll,
    691                  UCollationStrength strength);
    692 
    693 /**
    694  * Retrieves the reordering codes for this collator.
    695  * These reordering codes are a combination of UScript codes and UColReorderCode entries.
    696  * @param coll The UCollator to query.
    697  * @param dest The array to fill with the script ordering.
    698  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    699  * will only return the length of the result without writing any codes (pre-flighting).
    700  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
    701  * failure before the function call.
    702  * @return The number of reordering codes written to the dest array.
    703  * @see ucol_setReorderCodes
    704  * @see ucol_getEquivalentReorderCodes
    705  * @see UScriptCode
    706  * @see UColReorderCode
    707  * @stable ICU 4.8
    708  */
    709 U_STABLE int32_t U_EXPORT2
    710 ucol_getReorderCodes(const UCollator* coll,
    711                     int32_t* dest,
    712                     int32_t destCapacity,
    713                     UErrorCode *pErrorCode);
    714 /**
    715  * Sets the reordering codes for this collator.
    716  * Collation reordering allows scripts and some other groups of characters
    717  * to be moved relative to each other. This reordering is done on top of
    718  * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
    719  * at the start and/or the end of the collation order. These groups are specified using
    720  * UScript codes and UColReorderCode entries.
    721  *
    722  * <p>By default, reordering codes specified for the start of the order are placed in the
    723  * order given after several special non-script blocks. These special groups of characters
    724  * are space, punctuation, symbol, currency, and digit. These special groups are represented with
    725  * UColReorderCode entries. Script groups can be intermingled with
    726  * these special non-script groups if those special groups are explicitly specified in the reordering.
    727  *
    728  * <p>The special code OTHERS stands for any script that is not explicitly
    729  * mentioned in the list of reordering codes given. Anything that is after OTHERS
    730  * will go at the very end of the reordering in the order given.
    731  *
    732  * <p>The special reorder code DEFAULT will reset the reordering for this collator
    733  * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
    734  * was specified when this collator was created from resource data or from rules. The
    735  * DEFAULT code <b>must</b> be the sole code supplied when it is used.
    736  * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
    737  *
    738  * <p>The special reorder code NONE will remove any reordering for this collator.
    739  * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
    740  * NONE code <b>must</b> be the sole code supplied when it is used.
    741  *
    742  * @param coll The UCollator to set.
    743  * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    744  * length is also set to 0. An empty array will clear any reordering codes on the collator.
    745  * @param reorderCodesLength The length of reorderCodes.
    746  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
    747  * failure before the function call.
    748  * @see ucol_getReorderCodes
    749  * @see ucol_getEquivalentReorderCodes
    750  * @see UScriptCode
    751  * @see UColReorderCode
    752  * @stable ICU 4.8
    753  */
    754 U_STABLE void U_EXPORT2
    755 ucol_setReorderCodes(UCollator* coll,
    756                     const int32_t* reorderCodes,
    757                     int32_t reorderCodesLength,
    758                     UErrorCode *pErrorCode);
    759 
    760 /**
    761  * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
    762  * codes will be grouped and must reorder together.
    763  * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
    764  * for example Hiragana and Katakana.
    765  *
    766  * @param reorderCode The reorder code to determine equivalence for.
    767  * @param dest The array to fill with the script ordering.
    768  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    769  * will only return the length of the result without writing any codes (pre-flighting).
    770  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
    771  * a failure before the function call.
    772  * @return The number of reordering codes written to the dest array.
    773  * @see ucol_setReorderCodes
    774  * @see ucol_getReorderCodes
    775  * @see UScriptCode
    776  * @see UColReorderCode
    777  * @stable ICU 4.8
    778  */
    779 U_STABLE int32_t U_EXPORT2
    780 ucol_getEquivalentReorderCodes(int32_t reorderCode,
    781                     int32_t* dest,
    782                     int32_t destCapacity,
    783                     UErrorCode *pErrorCode);
    784 
    785 /**
    786  * Get the display name for a UCollator.
    787  * The display name is suitable for presentation to a user.
    788  * @param objLoc The locale of the collator in question.
    789  * @param dispLoc The locale for display.
    790  * @param result A pointer to a buffer to receive the attribute.
    791  * @param resultLength The maximum size of result.
    792  * @param status A pointer to a UErrorCode to receive any errors
    793  * @return The total buffer size needed; if greater than resultLength,
    794  * the output was truncated.
    795  * @stable ICU 2.0
    796  */
    797 U_STABLE int32_t U_EXPORT2
    798 ucol_getDisplayName(    const    char        *objLoc,
    799             const    char        *dispLoc,
    800             UChar             *result,
    801             int32_t         resultLength,
    802             UErrorCode        *status);
    803 
    804 /**
    805  * Get a locale for which collation rules are available.
    806  * A UCollator in a locale returned by this function will perform the correct
    807  * collation for the locale.
    808  * @param localeIndex The index of the desired locale.
    809  * @return A locale for which collation rules are available, or 0 if none.
    810  * @see ucol_countAvailable
    811  * @stable ICU 2.0
    812  */
    813 U_STABLE const char* U_EXPORT2
    814 ucol_getAvailable(int32_t localeIndex);
    815 
    816 /**
    817  * Determine how many locales have collation rules available.
    818  * This function is most useful as determining the loop ending condition for
    819  * calls to {@link #ucol_getAvailable }.
    820  * @return The number of locales for which collation rules are available.
    821  * @see ucol_getAvailable
    822  * @stable ICU 2.0
    823  */
    824 U_STABLE int32_t U_EXPORT2
    825 ucol_countAvailable(void);
    826 
    827 #if !UCONFIG_NO_SERVICE
    828 /**
    829  * Create a string enumerator of all locales for which a valid
    830  * collator may be opened.
    831  * @param status input-output error code
    832  * @return a string enumeration over locale strings. The caller is
    833  * responsible for closing the result.
    834  * @stable ICU 3.0
    835  */
    836 U_STABLE UEnumeration* U_EXPORT2
    837 ucol_openAvailableLocales(UErrorCode *status);
    838 #endif
    839 
    840 /**
    841  * Create a string enumerator of all possible keywords that are relevant to
    842  * collation. At this point, the only recognized keyword for this
    843  * service is "collation".
    844  * @param status input-output error code
    845  * @return a string enumeration over locale strings. The caller is
    846  * responsible for closing the result.
    847  * @stable ICU 3.0
    848  */
    849 U_STABLE UEnumeration* U_EXPORT2
    850 ucol_getKeywords(UErrorCode *status);
    851 
    852 /**
    853  * Given a keyword, create a string enumeration of all values
    854  * for that keyword that are currently in use.
    855  * @param keyword a particular keyword as enumerated by
    856  * ucol_getKeywords. If any other keyword is passed in, *status is set
    857  * to U_ILLEGAL_ARGUMENT_ERROR.
    858  * @param status input-output error code
    859  * @return a string enumeration over collation keyword values, or NULL
    860  * upon error. The caller is responsible for closing the result.
    861  * @stable ICU 3.0
    862  */
    863 U_STABLE UEnumeration* U_EXPORT2
    864 ucol_getKeywordValues(const char *keyword, UErrorCode *status);
    865 
    866 /**
    867  * Given a key and a locale, returns an array of string values in a preferred
    868  * order that would make a difference. These are all and only those values where
    869  * the open (creation) of the service with the locale formed from the input locale
    870  * plus input keyword and that value has different behavior than creation with the
    871  * input locale alone.
    872  * @param key           one of the keys supported by this service.  For now, only
    873  *                      "collation" is supported.
    874  * @param locale        the locale
    875  * @param commonlyUsed  if set to true it will return only commonly used values
    876  *                      with the given locale in preferred order.  Otherwise,
    877  *                      it will return all the available values for the locale.
    878  * @param status error status
    879  * @return a string enumeration over keyword values for the given key and the locale.
    880  * @stable ICU 4.2
    881  */
    882 U_STABLE UEnumeration* U_EXPORT2
    883 ucol_getKeywordValuesForLocale(const char* key,
    884                                const char* locale,
    885                                UBool commonlyUsed,
    886                                UErrorCode* status);
    887 
    888 /**
    889  * Return the functionally equivalent locale for the specified
    890  * input locale, with respect to given keyword, for the
    891  * collation service. If two different input locale + keyword
    892  * combinations produce the same result locale, then collators
    893  * instantiated for these two different input locales will behave
    894  * equivalently. The converse is not always true; two collators
    895  * may in fact be equivalent, but return different results, due to
    896  * internal details. The return result has no other meaning than
    897  * that stated above, and implies nothing as to the relationship
    898  * between the two locales. This is intended for use by
    899  * applications who wish to cache collators, or otherwise reuse
    900  * collators when possible. The functional equivalent may change
    901  * over time. For more information, please see the <a
    902  * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
    903  * Locales and Services</a> section of the ICU User Guide.
    904  * @param result fillin for the functionally equivalent result locale
    905  * @param resultCapacity capacity of the fillin buffer
    906  * @param keyword a particular keyword as enumerated by
    907  * ucol_getKeywords.
    908  * @param locale the specified input locale
    909  * @param isAvailable if non-NULL, pointer to a fillin parameter that
    910  * on return indicates whether the specified input locale was 'available'
    911  * to the collation service. A locale is defined as 'available' if it
    912  * physically exists within the collation locale data.
    913  * @param status pointer to input-output error code
    914  * @return the actual buffer size needed for the locale. If greater
    915  * than resultCapacity, the returned full name will be truncated and
    916  * an error code will be returned.
    917  * @stable ICU 3.0
    918  */
    919 U_STABLE int32_t U_EXPORT2
    920 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
    921                              const char* keyword, const char* locale,
    922                              UBool* isAvailable, UErrorCode* status);
    923 
    924 /**
    925  * Get the collation tailoring rules from a UCollator.
    926  * The rules will follow the rule syntax.
    927  * @param coll The UCollator to query.
    928  * @param length
    929  * @return The collation tailoring rules.
    930  * @stable ICU 2.0
    931  */
    932 U_STABLE const UChar* U_EXPORT2
    933 ucol_getRules(    const    UCollator    *coll,
    934         int32_t            *length);
    935 
    936 #ifndef U_HIDE_DEPRECATED_API
    937 /** Get the short definition string for a collator. This API harvests the collator's
    938  *  locale and the attribute set and produces a string that can be used for opening
    939  *  a collator with the same attributes using the ucol_openFromShortString API.
    940  *  This string will be normalized.
    941  *  The structure and the syntax of the string is defined in the "Naming collators"
    942  *  section of the users guide:
    943  *  http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
    944  *  This API supports preflighting.
    945  *  @param coll a collator
    946  *  @param locale a locale that will appear as a collators locale in the resulting
    947  *                short string definition. If NULL, the locale will be harvested
    948  *                from the collator.
    949  *  @param buffer space to hold the resulting string
    950  *  @param capacity capacity of the buffer
    951  *  @param status for returning errors. All the preflighting errors are featured
    952  *  @return length of the resulting string
    953  *  @see ucol_openFromShortString
    954  *  @see ucol_normalizeShortDefinitionString
    955  *  @deprecated ICU 54
    956  */
    957 U_DEPRECATED int32_t U_EXPORT2
    958 ucol_getShortDefinitionString(const UCollator *coll,
    959                               const char *locale,
    960                               char *buffer,
    961                               int32_t capacity,
    962                               UErrorCode *status);
    963 
    964 /** Verifies and normalizes short definition string.
    965  *  Normalized short definition string has all the option sorted by the argument name,
    966  *  so that equivalent definition strings are the same.
    967  *  This API supports preflighting.
    968  *  @param source definition string
    969  *  @param destination space to hold the resulting string
    970  *  @param capacity capacity of the buffer
    971  *  @param parseError if not NULL, structure that will get filled with error's pre
    972  *                   and post context in case of error.
    973  *  @param status     Error code. This API will return an error if an invalid attribute
    974  *                    or attribute/value combination is specified. All the preflighting
    975  *                    errors are also featured
    976  *  @return length of the resulting normalized string.
    977  *
    978  *  @see ucol_openFromShortString
    979  *  @see ucol_getShortDefinitionString
    980  *
    981  *  @deprecated ICU 54
    982  */
    983 
    984 U_DEPRECATED int32_t U_EXPORT2
    985 ucol_normalizeShortDefinitionString(const char *source,
    986                                     char *destination,
    987                                     int32_t capacity,
    988                                     UParseError *parseError,
    989                                     UErrorCode *status);
    990 #endif  /* U_HIDE_DEPRECATED_API */
    991 
    992 
    993 /**
    994  * Get a sort key for a string from a UCollator.
    995  * Sort keys may be compared using <TT>strcmp</TT>.
    996  *
    997  * Note that sort keys are often less efficient than simply doing comparison.
    998  * For more details, see the ICU User Guide.
    999  *
   1000  * Like ICU functions that write to an output buffer, the buffer contents
   1001  * is undefined if the buffer capacity (resultLength parameter) is too small.
   1002  * Unlike ICU functions that write a string to an output buffer,
   1003  * the terminating zero byte is counted in the sort key length.
   1004  * @param coll The UCollator containing the collation rules.
   1005  * @param source The string to transform.
   1006  * @param sourceLength The length of source, or -1 if null-terminated.
   1007  * @param result A pointer to a buffer to receive the attribute.
   1008  * @param resultLength The maximum size of result.
   1009  * @return The size needed to fully store the sort key.
   1010  *      If there was an internal error generating the sort key,
   1011  *      a zero value is returned.
   1012  * @see ucol_keyHashCode
   1013  * @stable ICU 2.0
   1014  */
   1015 U_STABLE int32_t U_EXPORT2
   1016 ucol_getSortKey(const    UCollator    *coll,
   1017         const    UChar        *source,
   1018         int32_t        sourceLength,
   1019         uint8_t        *result,
   1020         int32_t        resultLength);
   1021 
   1022 
   1023 /** Gets the next count bytes of a sort key. Caller needs
   1024  *  to preserve state array between calls and to provide
   1025  *  the same type of UCharIterator set with the same string.
   1026  *  The destination buffer provided must be big enough to store
   1027  *  the number of requested bytes.
   1028  *
   1029  *  The generated sort key may or may not be compatible with
   1030  *  sort keys generated using ucol_getSortKey().
   1031  *  @param coll The UCollator containing the collation rules.
   1032  *  @param iter UCharIterator containing the string we need
   1033  *              the sort key to be calculated for.
   1034  *  @param state Opaque state of sortkey iteration.
   1035  *  @param dest Buffer to hold the resulting sortkey part
   1036  *  @param count number of sort key bytes required.
   1037  *  @param status error code indicator.
   1038  *  @return the actual number of bytes of a sortkey. It can be
   1039  *          smaller than count if we have reached the end of
   1040  *          the sort key.
   1041  *  @stable ICU 2.6
   1042  */
   1043 U_STABLE int32_t U_EXPORT2
   1044 ucol_nextSortKeyPart(const UCollator *coll,
   1045                      UCharIterator *iter,
   1046                      uint32_t state[2],
   1047                      uint8_t *dest, int32_t count,
   1048                      UErrorCode *status);
   1049 
   1050 /** enum that is taken by ucol_getBound API
   1051  * See below for explanation
   1052  * do not change the values assigned to the
   1053  * members of this enum. Underlying code
   1054  * depends on them having these numbers
   1055  * @stable ICU 2.0
   1056  */
   1057 typedef enum {
   1058   /** lower bound */
   1059   UCOL_BOUND_LOWER = 0,
   1060   /** upper bound that will match strings of exact size */
   1061   UCOL_BOUND_UPPER = 1,
   1062   /** upper bound that will match all the strings that have the same initial substring as the given string */
   1063   UCOL_BOUND_UPPER_LONG = 2,
   1064 #ifndef U_HIDE_DEPRECATED_API
   1065     /**
   1066      * One more than the highest normal UColBoundMode value.
   1067      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
   1068      */
   1069     UCOL_BOUND_VALUE_COUNT
   1070 #endif  /* U_HIDE_DEPRECATED_API */
   1071 } UColBoundMode;
   1072 
   1073 /**
   1074  * Produce a bound for a given sortkey and a number of levels.
   1075  * Return value is always the number of bytes needed, regardless of
   1076  * whether the result buffer was big enough or even valid.<br>
   1077  * Resulting bounds can be used to produce a range of strings that are
   1078  * between upper and lower bounds. For example, if bounds are produced
   1079  * for a sortkey of string "smith", strings between upper and lower
   1080  * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
   1081  * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
   1082  * is produced, strings matched would be as above. However, if bound
   1083  * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
   1084  * also match "Smithsonian" and similar.<br>
   1085  * For more on usage, see example in cintltst/capitst.c in procedure
   1086  * TestBounds.
   1087  * Sort keys may be compared using <TT>strcmp</TT>.
   1088  * @param source The source sortkey.
   1089  * @param sourceLength The length of source, or -1 if null-terminated.
   1090  *                     (If an unmodified sortkey is passed, it is always null
   1091  *                      terminated).
   1092  * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
   1093  *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
   1094  *                  produces upper bound that matches strings of the same length
   1095  *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
   1096  *                  same starting substring as the source string.
   1097  * @param noOfLevels  Number of levels required in the resulting bound (for most
   1098  *                    uses, the recommended value is 1). See users guide for
   1099  *                    explanation on number of levels a sortkey can have.
   1100  * @param result A pointer to a buffer to receive the resulting sortkey.
   1101  * @param resultLength The maximum size of result.
   1102  * @param status Used for returning error code if something went wrong. If the
   1103  *               number of levels requested is higher than the number of levels
   1104  *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
   1105  *               issued.
   1106  * @return The size needed to fully store the bound.
   1107  * @see ucol_keyHashCode
   1108  * @stable ICU 2.1
   1109  */
   1110 U_STABLE int32_t U_EXPORT2
   1111 ucol_getBound(const uint8_t       *source,
   1112         int32_t             sourceLength,
   1113         UColBoundMode       boundType,
   1114         uint32_t            noOfLevels,
   1115         uint8_t             *result,
   1116         int32_t             resultLength,
   1117         UErrorCode          *status);
   1118 
   1119 /**
   1120  * Gets the version information for a Collator. Version is currently
   1121  * an opaque 32-bit number which depends, among other things, on major
   1122  * versions of the collator tailoring and UCA.
   1123  * @param coll The UCollator to query.
   1124  * @param info the version # information, the result will be filled in
   1125  * @stable ICU 2.0
   1126  */
   1127 U_STABLE void U_EXPORT2
   1128 ucol_getVersion(const UCollator* coll, UVersionInfo info);
   1129 
   1130 /**
   1131  * Gets the UCA version information for a Collator. Version is the
   1132  * UCA version number (3.1.1, 4.0).
   1133  * @param coll The UCollator to query.
   1134  * @param info the version # information, the result will be filled in
   1135  * @stable ICU 2.8
   1136  */
   1137 U_STABLE void U_EXPORT2
   1138 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
   1139 
   1140 /**
   1141  * Merges two sort keys. The levels are merged with their corresponding counterparts
   1142  * (primaries with primaries, secondaries with secondaries etc.). Between the values
   1143  * from the same level a separator is inserted.
   1144  *
   1145  * This is useful, for example, for combining sort keys from first and last names
   1146  * to sort such pairs.
   1147  * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
   1148  *
   1149  * The recommended way to achieve "merged" sorting is by
   1150  * concatenating strings with U+FFFE between them.
   1151  * The concatenation has the same sort order as the merged sort keys,
   1152  * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2).
   1153  * Using strings with U+FFFE may yield shorter sort keys.
   1154  *
   1155  * For details about Sort Key Features see
   1156  * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features
   1157  *
   1158  * It is possible to merge multiple sort keys by consecutively merging
   1159  * another one with the intermediate result.
   1160  *
   1161  * The length of the merge result is the sum of the lengths of the input sort keys.
   1162  *
   1163  * Example (uncompressed):
   1164  * <pre>191B1D 01 050505 01 910505 00
   1165  * 1F2123 01 050505 01 910505 00</pre>
   1166  * will be merged as
   1167  * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
   1168  *
   1169  * If the destination buffer is not big enough, then its contents are undefined.
   1170  * If any of source lengths are zero or any of the source pointers are NULL/undefined,
   1171  * the result is of size zero.
   1172  *
   1173  * @param src1 the first sort key
   1174  * @param src1Length the length of the first sort key, including the zero byte at the end;
   1175  *        can be -1 if the function is to find the length
   1176  * @param src2 the second sort key
   1177  * @param src2Length the length of the second sort key, including the zero byte at the end;
   1178  *        can be -1 if the function is to find the length
   1179  * @param dest the buffer where the merged sort key is written,
   1180  *        can be NULL if destCapacity==0
   1181  * @param destCapacity the number of bytes in the dest buffer
   1182  * @return the length of the merged sort key, src1Length+src2Length;
   1183  *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
   1184  *         in which cases the contents of dest is undefined
   1185  * @stable ICU 2.0
   1186  */
   1187 U_STABLE int32_t U_EXPORT2
   1188 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
   1189                    const uint8_t *src2, int32_t src2Length,
   1190                    uint8_t *dest, int32_t destCapacity);
   1191 
   1192 /**
   1193  * Universal attribute setter
   1194  * @param coll collator which attributes are to be changed
   1195  * @param attr attribute type
   1196  * @param value attribute value
   1197  * @param status to indicate whether the operation went on smoothly or there were errors
   1198  * @see UColAttribute
   1199  * @see UColAttributeValue
   1200  * @see ucol_getAttribute
   1201  * @stable ICU 2.0
   1202  */
   1203 U_STABLE void U_EXPORT2
   1204 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
   1205 
   1206 /**
   1207  * Universal attribute getter
   1208  * @param coll collator which attributes are to be changed
   1209  * @param attr attribute type
   1210  * @return attribute value
   1211  * @param status to indicate whether the operation went on smoothly or there were errors
   1212  * @see UColAttribute
   1213  * @see UColAttributeValue
   1214  * @see ucol_setAttribute
   1215  * @stable ICU 2.0
   1216  */
   1217 U_STABLE UColAttributeValue  U_EXPORT2
   1218 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
   1219 
   1220 /**
   1221  * Sets the variable top to the top of the specified reordering group.
   1222  * The variable top determines the highest-sorting character
   1223  * which is affected by UCOL_ALTERNATE_HANDLING.
   1224  * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
   1225  * @param coll the collator
   1226  * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
   1227  *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
   1228  *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
   1229  * @param pErrorCode Standard ICU error code. Its input value must
   1230  *                   pass the U_SUCCESS() test, or else the function returns
   1231  *                   immediately. Check for U_FAILURE() on output or use with
   1232  *                   function chaining. (See User Guide for details.)
   1233  * @see ucol_getMaxVariable
   1234  * @stable ICU 53
   1235  */
   1236 U_STABLE void U_EXPORT2
   1237 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode);
   1238 
   1239 /**
   1240  * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
   1241  * @param coll the collator
   1242  * @return the maximum variable reordering group.
   1243  * @see ucol_setMaxVariable
   1244  * @stable ICU 53
   1245  */
   1246 U_STABLE UColReorderCode U_EXPORT2
   1247 ucol_getMaxVariable(const UCollator *coll);
   1248 
   1249 #ifndef U_HIDE_DEPRECATED_API
   1250 /**
   1251  * Sets the variable top to the primary weight of the specified string.
   1252  *
   1253  * Beginning with ICU 53, the variable top is pinned to
   1254  * the top of one of the supported reordering groups,
   1255  * and it must not be beyond the last of those groups.
   1256  * See ucol_setMaxVariable().
   1257  * @param coll the collator
   1258  * @param varTop one or more (if contraction) UChars to which the variable top should be set
   1259  * @param len length of variable top string. If -1 it is considered to be zero terminated.
   1260  * @param status error code. If error code is set, the return value is undefined.
   1261  *               Errors set by this function are:<br>
   1262  *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
   1263  *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
   1264  *    the last reordering group supported by ucol_setMaxVariable()
   1265  * @return variable top primary weight
   1266  * @see ucol_getVariableTop
   1267  * @see ucol_restoreVariableTop
   1268  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
   1269  */
   1270 U_DEPRECATED uint32_t U_EXPORT2
   1271 ucol_setVariableTop(UCollator *coll,
   1272                     const UChar *varTop, int32_t len,
   1273                     UErrorCode *status);
   1274 #endif  /* U_HIDE_DEPRECATED_API */
   1275 
   1276 /**
   1277  * Gets the variable top value of a Collator.
   1278  * @param coll collator which variable top needs to be retrieved
   1279  * @param status error code (not changed by function). If error code is set,
   1280  *               the return value is undefined.
   1281  * @return the variable top primary weight
   1282  * @see ucol_getMaxVariable
   1283  * @see ucol_setVariableTop
   1284  * @see ucol_restoreVariableTop
   1285  * @stable ICU 2.0
   1286  */
   1287 U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
   1288 
   1289 #ifndef U_HIDE_DEPRECATED_API
   1290 /**
   1291  * Sets the variable top to the specified primary weight.
   1292  *
   1293  * Beginning with ICU 53, the variable top is pinned to
   1294  * the top of one of the supported reordering groups,
   1295  * and it must not be beyond the last of those groups.
   1296  * See ucol_setMaxVariable().
   1297  * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop
   1298  * @param status error code
   1299  * @see ucol_getVariableTop
   1300  * @see ucol_setVariableTop
   1301  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
   1302  */
   1303 U_DEPRECATED void U_EXPORT2
   1304 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
   1305 #endif  /* U_HIDE_DEPRECATED_API */
   1306 
   1307 /**
   1308  * Thread safe cloning operation. The result is a clone of a given collator.
   1309  * @param coll collator to be cloned
   1310  * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
   1311  * user allocated space for the new clone.
   1312  * If NULL new memory will be allocated.
   1313  *  If buffer is not large enough, new memory will be allocated.
   1314  *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
   1315  * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
   1316  *  pointer to size of allocated space.
   1317  *  If *pBufferSize == 0, a sufficient size for use in cloning will
   1318  *  be returned ('pre-flighting')
   1319  *  If *pBufferSize is not enough for a stack-based safe clone,
   1320  *  new memory will be allocated.
   1321  * @param status to indicate whether the operation went on smoothly or there were errors
   1322  *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
   1323  * allocations were necessary.
   1324  * @return pointer to the new clone
   1325  * @see ucol_open
   1326  * @see ucol_openRules
   1327  * @see ucol_close
   1328  * @stable ICU 2.0
   1329  */
   1330 U_STABLE UCollator* U_EXPORT2
   1331 ucol_safeClone(const UCollator *coll,
   1332                void            *stackBuffer,
   1333                int32_t         *pBufferSize,
   1334                UErrorCode      *status);
   1335 
   1336 #ifndef U_HIDE_DEPRECATED_API
   1337 
   1338 /** default memory size for the new clone.
   1339  * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
   1340  */
   1341 #define U_COL_SAFECLONE_BUFFERSIZE 1
   1342 
   1343 #endif /* U_HIDE_DEPRECATED_API */
   1344 
   1345 /**
   1346  * Returns current rules. Delta defines whether full rules are returned or just the tailoring.
   1347  * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
   1348  * to store rules, will store up to available space.
   1349  *
   1350  * ucol_getRules() should normally be used instead.
   1351  * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
   1352  * @param coll collator to get the rules from
   1353  * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
   1354  * @param buffer buffer to store the result in. If NULL, you'll get no rules.
   1355  * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
   1356  * @return current rules
   1357  * @stable ICU 2.0
   1358  * @see UCOL_FULL_RULES
   1359  */
   1360 U_STABLE int32_t U_EXPORT2
   1361 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
   1362 
   1363 #ifndef U_HIDE_DEPRECATED_API
   1364 /**
   1365  * gets the locale name of the collator. If the collator
   1366  * is instantiated from the rules, then this function returns
   1367  * NULL.
   1368  * @param coll The UCollator for which the locale is needed
   1369  * @param type You can choose between requested, valid and actual
   1370  *             locale. For description see the definition of
   1371  *             ULocDataLocaleType in uloc.h
   1372  * @param status error code of the operation
   1373  * @return real locale name from which the collation data comes.
   1374  *         If the collator was instantiated from rules, returns
   1375  *         NULL.
   1376  * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
   1377  */
   1378 U_DEPRECATED const char * U_EXPORT2
   1379 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
   1380 #endif  /* U_HIDE_DEPRECATED_API */
   1381 
   1382 /**
   1383  * gets the locale name of the collator. If the collator
   1384  * is instantiated from the rules, then this function returns
   1385  * NULL.
   1386  * @param coll The UCollator for which the locale is needed
   1387  * @param type You can choose between requested, valid and actual
   1388  *             locale. For description see the definition of
   1389  *             ULocDataLocaleType in uloc.h
   1390  * @param status error code of the operation
   1391  * @return real locale name from which the collation data comes.
   1392  *         If the collator was instantiated from rules, returns
   1393  *         NULL.
   1394  * @stable ICU 2.8
   1395  */
   1396 U_STABLE const char * U_EXPORT2
   1397 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
   1398 
   1399 /**
   1400  * Get a Unicode set that contains all the characters and sequences tailored in
   1401  * this collator. The result must be disposed of by using uset_close.
   1402  * @param coll        The UCollator for which we want to get tailored chars
   1403  * @param status      error code of the operation
   1404  * @return a pointer to newly created USet. Must be be disposed by using uset_close
   1405  * @see ucol_openRules
   1406  * @see uset_close
   1407  * @stable ICU 2.4
   1408  */
   1409 U_STABLE USet * U_EXPORT2
   1410 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
   1411 
   1412 #ifndef U_HIDE_INTERNAL_API
   1413 /** Calculates the set of unsafe code points, given a collator.
   1414  *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
   1415  *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
   1416  *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
   1417  *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
   1418  *  @param coll Collator
   1419  *  @param unsafe a fill-in set to receive the unsafe points
   1420  *  @param status for catching errors
   1421  *  @return number of elements in the set
   1422  *  @internal ICU 3.0
   1423  */
   1424 U_INTERNAL int32_t U_EXPORT2
   1425 ucol_getUnsafeSet( const UCollator *coll,
   1426                   USet *unsafe,
   1427                   UErrorCode *status);
   1428 
   1429 /** Touches all resources needed for instantiating a collator from a short string definition,
   1430  *  thus filling up the cache.
   1431  * @param definition A short string containing a locale and a set of attributes.
   1432  *                   Attributes not explicitly mentioned are left at the default
   1433  *                   state for a locale.
   1434  * @param parseError if not NULL, structure that will get filled with error's pre
   1435  *                   and post context in case of error.
   1436  * @param forceDefaults if FALSE, the settings that are the same as the collator
   1437  *                   default settings will not be applied (for example, setting
   1438  *                   French secondary on a French collator would not be executed).
   1439  *                   If TRUE, all the settings will be applied regardless of the
   1440  *                   collator default value. If the definition
   1441  *                   strings are to be cached, should be set to FALSE.
   1442  * @param status     Error code. Apart from regular error conditions connected to
   1443  *                   instantiating collators (like out of memory or similar), this
   1444  *                   API will return an error if an invalid attribute or attribute/value
   1445  *                   combination is specified.
   1446  * @see ucol_openFromShortString
   1447  * @internal ICU 3.2.1
   1448  */
   1449 U_INTERNAL void U_EXPORT2
   1450 ucol_prepareShortStringOpen( const char *definition,
   1451                           UBool forceDefaults,
   1452                           UParseError *parseError,
   1453                           UErrorCode *status);
   1454 #endif  /* U_HIDE_INTERNAL_API */
   1455 
   1456 /** Creates a binary image of a collator. This binary image can be stored and
   1457  *  later used to instantiate a collator using ucol_openBinary.
   1458  *  This API supports preflighting.
   1459  *  @param coll Collator
   1460  *  @param buffer a fill-in buffer to receive the binary image
   1461  *  @param capacity capacity of the destination buffer
   1462  *  @param status for catching errors
   1463  *  @return size of the image
   1464  *  @see ucol_openBinary
   1465  *  @stable ICU 3.2
   1466  */
   1467 U_STABLE int32_t U_EXPORT2
   1468 ucol_cloneBinary(const UCollator *coll,
   1469                  uint8_t *buffer, int32_t capacity,
   1470                  UErrorCode *status);
   1471 
   1472 /** Opens a collator from a collator binary image created using
   1473  *  ucol_cloneBinary. Binary image used in instantiation of the
   1474  *  collator remains owned by the user and should stay around for
   1475  *  the lifetime of the collator. The API also takes a base collator
   1476  *  which must be the root collator.
   1477  *  @param bin binary image owned by the user and required through the
   1478  *             lifetime of the collator
   1479  *  @param length size of the image. If negative, the API will try to
   1480  *                figure out the length of the image
   1481  *  @param base Base collator, for lookup of untailored characters.
   1482  *              Must be the root collator, must not be NULL.
   1483  *              The base is required to be present through the lifetime of the collator.
   1484  *  @param status for catching errors
   1485  *  @return newly created collator
   1486  *  @see ucol_cloneBinary
   1487  *  @stable ICU 3.2
   1488  */
   1489 U_STABLE UCollator* U_EXPORT2
   1490 ucol_openBinary(const uint8_t *bin, int32_t length,
   1491                 const UCollator *base,
   1492                 UErrorCode *status);
   1493 
   1494 
   1495 #endif /* #if !UCONFIG_NO_COLLATION */
   1496 
   1497 #endif
   1498