Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 *   Copyright (C) 1996-2010, International Business Machines                 *
      4 *   Corporation and others.  All Rights Reserved.                            *
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: Collation Service.
     11  */
     12 
     13 /**
     14 * File coll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 * 02/5/97      aliu        Modified createDefault to load collation data from
     22 *                          binary files when possible.  Added related methods
     23 *                          createCollationFromFile, chopLocale, createPathName.
     24 * 02/11/97     aliu        Added members addToCache, findInCache, and fgCache.
     25 * 02/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
     26 *                          Moved cache out of Collation class.
     27 * 02/13/97     aliu        Moved several methods out of this class and into
     28 *                          RuleBasedCollator, with modifications.  Modified
     29 *                          createDefault() to call new RuleBasedCollator(Locale&)
     30 *                          constructor.  General clean up and documentation.
     31 * 02/20/97     helena      Added clone, operator==, operator!=, operator=, copy
     32 *                          constructor and getDynamicClassID.
     33 * 03/25/97     helena      Updated with platform independent data types.
     34 * 05/06/97     helena      Added memory allocation error detection.
     35 * 06/20/97     helena      Java class name change.
     36 * 09/03/97     helena      Added createCollationKeyValues().
     37 * 02/10/98     damiba      Added compare() with length as parameter.
     38 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     39 *                          Normalizer::EMode.
     40 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     41 *                          UnicodeString construction and special case for NO_OP.
     42 * 11/23/99     srl         More performance enhancements. Inlining of
     43 *                          critical accessors.
     44 * 05/15/00     helena      Added version information API.
     45 * 01/29/01     synwee      Modified into a C++ wrapper which calls C apis
     46 *                          (ucoll.h).
     47 */
     48 
     49 #ifndef COLL_H
     50 #define COLL_H
     51 
     52 #include "unicode/utypes.h"
     53 
     54 #if !UCONFIG_NO_COLLATION
     55 
     56 #include "unicode/uobject.h"
     57 #include "unicode/ucol.h"
     58 #include "unicode/normlzr.h"
     59 #include "unicode/locid.h"
     60 #include "unicode/uniset.h"
     61 #include "unicode/umisc.h"
     62 #include "unicode/uiter.h"
     63 #include "unicode/stringpiece.h"
     64 
     65 U_NAMESPACE_BEGIN
     66 
     67 class StringEnumeration;
     68 
     69 #if !UCONFIG_NO_SERVICE
     70 /**
     71  * @stable ICU 2.6
     72  */
     73 class CollatorFactory;
     74 #endif
     75 
     76 /**
     77 * @stable ICU 2.0
     78 */
     79 class CollationKey;
     80 
     81 /**
     82 * The <code>Collator</code> class performs locale-sensitive string
     83 * comparison.<br>
     84 * You use this class to build searching and sorting routines for natural
     85 * language text.<br>
     86 * <em>Important: </em>The ICU collation service has been reimplemented
     87 * in order to achieve better performance and UCA compliance.
     88 * For details, see the
     89 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
     90 * collation design document</a>.
     91 * <p>
     92 * <code>Collator</code> is an abstract base class. Subclasses implement
     93 * specific collation strategies. One subclass,
     94 * <code>RuleBasedCollator</code>, is currently provided and is applicable
     95 * to a wide set of languages. Other subclasses may be created to handle more
     96 * specialized needs.
     97 * <p>
     98 * Like other locale-sensitive classes, you can use the static factory method,
     99 * <code>createInstance</code>, to obtain the appropriate
    100 * <code>Collator</code> object for a given locale. You will only need to
    101 * look at the subclasses of <code>Collator</code> if you need to
    102 * understand the details of a particular collation strategy or if you need to
    103 * modify that strategy.
    104 * <p>
    105 * The following example shows how to compare two strings using the
    106 * <code>Collator</code> for the default locale.
    107 * \htmlonly<blockquote>\endhtmlonly
    108 * <pre>
    109 * \code
    110 * // Compare two strings in the default locale
    111 * UErrorCode success = U_ZERO_ERROR;
    112 * Collator* myCollator = Collator::createInstance(success);
    113 * if (myCollator->compare("abc", "ABC") < 0)
    114 *   cout << "abc is less than ABC" << endl;
    115 * else
    116 *   cout << "abc is greater than or equal to ABC" << endl;
    117 * \endcode
    118 * </pre>
    119 * \htmlonly</blockquote>\endhtmlonly
    120 * <p>
    121 * You can set a <code>Collator</code>'s <em>strength</em> property to
    122 * determine the level of difference considered significant in comparisons.
    123 * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
    124 * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
    125 * The exact assignment of strengths to language features is locale dependant.
    126 * For example, in Czech, "e" and "f" are considered primary differences,
    127 * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
    128 * differences and "e" and "e" are identical. The following shows how both case
    129 * and accents could be ignored for US English.
    130 * \htmlonly<blockquote>\endhtmlonly
    131 * <pre>
    132 * \code
    133 * //Get the Collator for US English and set its strength to PRIMARY
    134 * UErrorCode success = U_ZERO_ERROR;
    135 * Collator* usCollator = Collator::createInstance(Locale::US, success);
    136 * usCollator->setStrength(Collator::PRIMARY);
    137 * if (usCollator->compare("abc", "ABC") == 0)
    138 *     cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
    139 * \endcode
    140 * </pre>
    141 * \htmlonly</blockquote>\endhtmlonly
    142 * <p>
    143 * For comparing strings exactly once, the <code>compare</code> method
    144 * provides the best performance. When sorting a list of strings however, it
    145 * is generally necessary to compare each string multiple times. In this case,
    146 * sort keys provide better performance. The <code>getSortKey</code> methods
    147 * convert a string to a series of bytes that can be compared bitwise against
    148 * other sort keys using <code>strcmp()</code>. Sort keys are written as
    149 * zero-terminated byte strings. They consist of several substrings, one for
    150 * each collation strength level, that are delimited by 0x01 bytes.
    151 * If the string code points are appended for UCOL_IDENTICAL, then they are
    152 * processed for correct code point order comparison and may contain 0x01
    153 * bytes but not zero bytes.
    154 * </p>
    155 * <p>
    156 * An older set of APIs returns a <code>CollationKey</code> object that wraps
    157 * the sort key bytes instead of returning the bytes themselves.
    158 * Its use is deprecated, but it is still available for compatibility with
    159 * Java.
    160 * </p>
    161 * <p>
    162 * <strong>Note:</strong> <code>Collator</code>s with different Locale,
    163 * and CollationStrength settings will return different sort
    164 * orders for the same set of strings. Locales have specific collation rules,
    165 * and the way in which secondary and tertiary differences are taken into
    166 * account, for example, will result in a different sorting order for same
    167 * strings.
    168 * </p>
    169 * @see         RuleBasedCollator
    170 * @see         CollationKey
    171 * @see         CollationElementIterator
    172 * @see         Locale
    173 * @see         Normalizer
    174 * @version     2.0 11/15/01
    175 */
    176 
    177 class U_I18N_API Collator : public UObject {
    178 public:
    179 
    180     // Collator public enums -----------------------------------------------
    181 
    182     /**
    183      * Base letter represents a primary difference. Set comparison level to
    184      * PRIMARY to ignore secondary and tertiary differences.<br>
    185      * Use this to set the strength of a Collator object.<br>
    186      * Example of primary difference, "abc" &lt; "abd"
    187      *
    188      * Diacritical differences on the same base letter represent a secondary
    189      * difference. Set comparison level to SECONDARY to ignore tertiary
    190      * differences. Use this to set the strength of a Collator object.<br>
    191      * Example of secondary difference, "&auml;" >> "a".
    192      *
    193      * Uppercase and lowercase versions of the same character represents a
    194      * tertiary difference.  Set comparison level to TERTIARY to include all
    195      * comparison differences. Use this to set the strength of a Collator
    196      * object.<br>
    197      * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
    198      *
    199      * Two characters are considered "identical" when they have the same unicode
    200      * spellings.<br>
    201      * For example, "&auml;" == "&auml;".
    202      *
    203      * UCollationStrength is also used to determine the strength of sort keys
    204      * generated from Collator objects.
    205      * @stable ICU 2.0
    206      */
    207     enum ECollationStrength
    208     {
    209         PRIMARY    = 0,
    210         SECONDARY  = 1,
    211         TERTIARY   = 2,
    212         QUATERNARY = 3,
    213         IDENTICAL  = 15
    214     };
    215 
    216     /**
    217      * LESS is returned if source string is compared to be less than target
    218      * string in the compare() method.
    219      * EQUAL is returned if source string is compared to be equal to target
    220      * string in the compare() method.
    221      * GREATER is returned if source string is compared to be greater than
    222      * target string in the compare() method.
    223      * @see Collator#compare
    224      * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
    225      */
    226     enum EComparisonResult
    227     {
    228         LESS = -1,
    229         EQUAL = 0,
    230         GREATER = 1
    231     };
    232 
    233     // Collator public destructor -----------------------------------------
    234 
    235     /**
    236      * Destructor
    237      * @stable ICU 2.0
    238      */
    239     virtual ~Collator();
    240 
    241     // Collator public methods --------------------------------------------
    242 
    243     /**
    244      * Returns true if "other" is the same as "this"
    245      * @param other Collator object to be compared
    246      * @return true if other is the same as this.
    247      * @stable ICU 2.0
    248      */
    249     virtual UBool operator==(const Collator& other) const;
    250 
    251     /**
    252      * Returns true if "other" is not the same as "this".
    253      * @param other Collator object to be compared
    254      * @return true if other is not the same as this.
    255      * @stable ICU 2.0
    256      */
    257     virtual UBool operator!=(const Collator& other) const;
    258 
    259     /**
    260      * Makes a shallow copy of the current object.
    261      * @return a copy of this object
    262      * @stable ICU 2.0
    263      */
    264     virtual Collator* clone(void) const = 0;
    265 
    266     /**
    267      * Creates the Collator object for the current default locale.
    268      * The default locale is determined by Locale::getDefault.
    269      * The UErrorCode& err parameter is used to return status information to the user.
    270      * To check whether the construction succeeded or not, you should check the
    271      * value of U_SUCCESS(err).  If you wish more detailed information, you can
    272      * check for informational error results which still indicate success.
    273      * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
    274      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
    275      * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
    276      * used; neither the requested locale nor any of its fall back locales
    277      * could be found.
    278      * The caller owns the returned object and is responsible for deleting it.
    279      *
    280      * @param err    the error code status.
    281      * @return       the collation object of the default locale.(for example, en_US)
    282      * @see Locale#getDefault
    283      * @stable ICU 2.0
    284      */
    285     static Collator* U_EXPORT2 createInstance(UErrorCode&  err);
    286 
    287     /**
    288      * Gets the table-based collation object for the desired locale. The
    289      * resource of the desired locale will be loaded by ResourceLoader.
    290      * Locale::ENGLISH is the base collation table and all other languages are
    291      * built on top of it with additional language-specific modifications.
    292      * The UErrorCode& err parameter is used to return status information to the user.
    293      * To check whether the construction succeeded or not, you should check
    294      * the value of U_SUCCESS(err).  If you wish more detailed information, you
    295      * can check for informational error results which still indicate success.
    296      * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
    297      * example, 'de_CH' was requested, but nothing was found there, so 'de' was
    298      * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
    299      * used; neither the requested locale nor any of its fall back locales
    300      * could be found.
    301      * The caller owns the returned object and is responsible for deleting it.
    302      * @param loc    The locale ID for which to open a collator.
    303      * @param err    the error code status.
    304      * @return       the created table-based collation object based on the desired
    305      *               locale.
    306      * @see Locale
    307      * @see ResourceLoader
    308      * @stable ICU 2.2
    309      */
    310     static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
    311 
    312 #ifdef U_USE_COLLATION_OBSOLETE_2_6
    313     /**
    314      * Create a Collator with a specific version.
    315      * This is the same as createInstance(loc, err) except that getVersion() of
    316      * the returned object is guaranteed to be the same as the version
    317      * parameter.
    318      * This is designed to be used to open the same collator for a given
    319      * locale even when ICU is updated.
    320      * The same locale and version guarantees the same sort keys and
    321      * comparison results.
    322      * <p>
    323      * Note: this API will be removed in a future release.  Use
    324      * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p>
    325      *
    326      * @param loc The locale ID for which to open a collator.
    327      * @param version The requested collator version.
    328      * @param err A reference to a UErrorCode,
    329      *            must not indicate a failure before calling this function.
    330      * @return A pointer to a Collator, or 0 if an error occurred
    331      *         or a collator with the requested version is not available.
    332      *
    333      * @see getVersion
    334      * @obsolete ICU 2.6
    335      */
    336     static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err);
    337 #endif
    338 
    339     /**
    340      * The comparison function compares the character data stored in two
    341      * different strings. Returns information about whether a string is less
    342      * than, greater than or equal to another string.
    343      * @param source the source string to be compared with.
    344      * @param target the string that is to be compared with the source string.
    345      * @return Returns a byte value. GREATER if source is greater
    346      * than target; EQUAL if source is equal to target; LESS if source is less
    347      * than target
    348      * @deprecated ICU 2.6 use the overload with UErrorCode &
    349      */
    350     virtual EComparisonResult compare(const UnicodeString& source,
    351                                       const UnicodeString& target) const;
    352 
    353     /**
    354      * The comparison function compares the character data stored in two
    355      * different strings. Returns information about whether a string is less
    356      * than, greater than or equal to another string.
    357      * @param source the source string to be compared with.
    358      * @param target the string that is to be compared with the source string.
    359      * @param status possible error code
    360      * @return Returns an enum value. UCOL_GREATER if source is greater
    361      * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    362      * than target
    363      * @stable ICU 2.6
    364      */
    365     virtual UCollationResult compare(const UnicodeString& source,
    366                                       const UnicodeString& target,
    367                                       UErrorCode &status) const = 0;
    368 
    369     /**
    370      * Does the same thing as compare but limits the comparison to a specified
    371      * length
    372      * @param source the source string to be compared with.
    373      * @param target the string that is to be compared with the source string.
    374      * @param length the length the comparison is limited to
    375      * @return Returns a byte value. GREATER if source (up to the specified
    376      *         length) is greater than target; EQUAL if source (up to specified
    377      *         length) is equal to target; LESS if source (up to the specified
    378      *         length) is less  than target.
    379      * @deprecated ICU 2.6 use the overload with UErrorCode &
    380      */
    381     virtual EComparisonResult compare(const UnicodeString& source,
    382                                       const UnicodeString& target,
    383                                       int32_t length) const;
    384 
    385     /**
    386      * Does the same thing as compare but limits the comparison to a specified
    387      * length
    388      * @param source the source string to be compared with.
    389      * @param target the string that is to be compared with the source string.
    390      * @param length the length the comparison is limited to
    391      * @param status possible error code
    392      * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    393      *         length) is greater than target; UCOL_EQUAL if source (up to specified
    394      *         length) is equal to target; UCOL_LESS if source (up to the specified
    395      *         length) is less  than target.
    396      * @stable ICU 2.6
    397      */
    398     virtual UCollationResult compare(const UnicodeString& source,
    399                                       const UnicodeString& target,
    400                                       int32_t length,
    401                                       UErrorCode &status) const = 0;
    402 
    403     /**
    404      * The comparison function compares the character data stored in two
    405      * different string arrays. Returns information about whether a string array
    406      * is less than, greater than or equal to another string array.
    407      * @param source the source string array to be compared with.
    408      * @param sourceLength the length of the source string array.  If this value
    409      *        is equal to -1, the string array is null-terminated.
    410      * @param target the string that is to be compared with the source string.
    411      * @param targetLength the length of the target string array.  If this value
    412      *        is equal to -1, the string array is null-terminated.
    413      * @return Returns a byte value. GREATER if source is greater than target;
    414      *         EQUAL if source is equal to target; LESS if source is less than
    415      *         target
    416      * @deprecated ICU 2.6 use the overload with UErrorCode &
    417      */
    418     virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
    419                                       const UChar* target, int32_t targetLength)
    420                                       const;
    421 
    422     /**
    423      * The comparison function compares the character data stored in two
    424      * different string arrays. Returns information about whether a string array
    425      * is less than, greater than or equal to another string array.
    426      * @param source the source string array to be compared with.
    427      * @param sourceLength the length of the source string array.  If this value
    428      *        is equal to -1, the string array is null-terminated.
    429      * @param target the string that is to be compared with the source string.
    430      * @param targetLength the length of the target string array.  If this value
    431      *        is equal to -1, the string array is null-terminated.
    432      * @param status possible error code
    433      * @return Returns an enum value. UCOL_GREATER if source is greater
    434      * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    435      * than target
    436      * @stable ICU 2.6
    437      */
    438     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    439                                       const UChar* target, int32_t targetLength,
    440                                       UErrorCode &status) const = 0;
    441 
    442     /**
    443      * Compares two strings using the Collator.
    444      * Returns whether the first one compares less than/equal to/greater than
    445      * the second one.
    446      * This version takes UCharIterator input.
    447      * @param sIter the first ("source") string iterator
    448      * @param tIter the second ("target") string iterator
    449      * @param status ICU status
    450      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    451      * @stable ICU 4.2
    452      */
    453     virtual UCollationResult compare(UCharIterator &sIter,
    454                                      UCharIterator &tIter,
    455                                      UErrorCode &status) const;
    456 
    457     /**
    458      * Compares two UTF-8 strings using the Collator.
    459      * Returns whether the first one compares less than/equal to/greater than
    460      * the second one.
    461      * This version takes UTF-8 input.
    462      * Note that a StringPiece can be implicitly constructed
    463      * from a std::string or a NUL-terminated const char * string.
    464      * @param source the first UTF-8 string
    465      * @param target the second UTF-8 string
    466      * @param status ICU status
    467      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    468      * @stable ICU 4.2
    469      */
    470     virtual UCollationResult compareUTF8(const StringPiece &source,
    471                                          const StringPiece &target,
    472                                          UErrorCode &status) const;
    473 
    474     /**
    475      * Transforms the string into a series of characters that can be compared
    476      * with CollationKey::compareTo. It is not possible to restore the original
    477      * string from the chars in the sort key.  The generated sort key handles
    478      * only a limited number of ignorable characters.
    479      * <p>Use CollationKey::equals or CollationKey::compare to compare the
    480      * generated sort keys.
    481      * If the source string is null, a null collation key will be returned.
    482      * @param source the source string to be transformed into a sort key.
    483      * @param key the collation key to be filled in
    484      * @param status the error code status.
    485      * @return the collation key of the string based on the collation rules.
    486      * @see CollationKey#compare
    487      * @deprecated ICU 2.8 Use getSortKey(...) instead
    488      */
    489     virtual CollationKey& getCollationKey(const UnicodeString&  source,
    490                                           CollationKey& key,
    491                                           UErrorCode& status) const = 0;
    492 
    493     /**
    494      * Transforms the string into a series of characters that can be compared
    495      * with CollationKey::compareTo. It is not possible to restore the original
    496      * string from the chars in the sort key.  The generated sort key handles
    497      * only a limited number of ignorable characters.
    498      * <p>Use CollationKey::equals or CollationKey::compare to compare the
    499      * generated sort keys.
    500      * <p>If the source string is null, a null collation key will be returned.
    501      * @param source the source string to be transformed into a sort key.
    502      * @param sourceLength length of the collation key
    503      * @param key the collation key to be filled in
    504      * @param status the error code status.
    505      * @return the collation key of the string based on the collation rules.
    506      * @see CollationKey#compare
    507      * @deprecated ICU 2.8 Use getSortKey(...) instead
    508      */
    509     virtual CollationKey& getCollationKey(const UChar*source,
    510                                           int32_t sourceLength,
    511                                           CollationKey& key,
    512                                           UErrorCode& status) const = 0;
    513     /**
    514      * Generates the hash code for the collation object
    515      * @stable ICU 2.0
    516      */
    517     virtual int32_t hashCode(void) const = 0;
    518 
    519     /**
    520      * Gets the locale of the Collator
    521      *
    522      * @param type can be either requested, valid or actual locale. For more
    523      *             information see the definition of ULocDataLocaleType in
    524      *             uloc.h
    525      * @param status the error code status.
    526      * @return locale where the collation data lives. If the collator
    527      *         was instantiated from rules, locale is empty.
    528      * @deprecated ICU 2.8 This API is under consideration for revision
    529      * in ICU 3.0.
    530      */
    531     virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
    532 
    533     /**
    534      * Convenience method for comparing two strings based on the collation rules.
    535      * @param source the source string to be compared with.
    536      * @param target the target string to be compared with.
    537      * @return true if the first string is greater than the second one,
    538      *         according to the collation rules. false, otherwise.
    539      * @see Collator#compare
    540      * @stable ICU 2.0
    541      */
    542     UBool greater(const UnicodeString& source, const UnicodeString& target)
    543                   const;
    544 
    545     /**
    546      * Convenience method for comparing two strings based on the collation rules.
    547      * @param source the source string to be compared with.
    548      * @param target the target string to be compared with.
    549      * @return true if the first string is greater than or equal to the second
    550      *         one, according to the collation rules. false, otherwise.
    551      * @see Collator#compare
    552      * @stable ICU 2.0
    553      */
    554     UBool greaterOrEqual(const UnicodeString& source,
    555                          const UnicodeString& target) const;
    556 
    557     /**
    558      * Convenience method for comparing two strings based on the collation rules.
    559      * @param source the source string to be compared with.
    560      * @param target the target string to be compared with.
    561      * @return true if the strings are equal according to the collation rules.
    562      *         false, otherwise.
    563      * @see Collator#compare
    564      * @stable ICU 2.0
    565      */
    566     UBool equals(const UnicodeString& source, const UnicodeString& target) const;
    567 
    568     /**
    569      * Determines the minimum strength that will be use in comparison or
    570      * transformation.
    571      * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
    572      * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
    573      * are ignored.
    574      * @return the current comparison level.
    575      * @see Collator#setStrength
    576      * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
    577      */
    578     virtual ECollationStrength getStrength(void) const = 0;
    579 
    580     /**
    581      * Sets the minimum strength to be used in comparison or transformation.
    582      * <p>Example of use:
    583      * <pre>
    584      *  \code
    585      *  UErrorCode status = U_ZERO_ERROR;
    586      *  Collator*myCollation = Collator::createInstance(Locale::US, status);
    587      *  if (U_FAILURE(status)) return;
    588      *  myCollation->setStrength(Collator::PRIMARY);
    589      *  // result will be "abc" == "ABC"
    590      *  // tertiary differences will be ignored
    591      *  Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
    592      * \endcode
    593      * </pre>
    594      * @see Collator#getStrength
    595      * @param newStrength the new comparison level.
    596      * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
    597      */
    598     virtual void setStrength(ECollationStrength newStrength) = 0;
    599 
    600     /**
    601      * Get the current reordering of scripts (if one has been set).
    602      * @param dest The array to fill with the script ordering.
    603      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
    604      * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
    605      * @return The length of the array of the script ordering.
    606      * @see ucol_getReorderCodes
    607      * @internal
    608      */
    609     virtual int32_t getReorderCodes(int32_t *dest,
    610                                     int32_t destCapacity,
    611                                     UErrorCode& status) const;
    612 
    613     /**
    614      * Set the ordering of scripts for this collator.
    615      * @param reorderCodes An array of reorder codes in the new order.
    616      * @param reorderCodesLength The length of reorderCodes.
    617      * @see ucol_setReorderCodes
    618      * @internal
    619      */
    620     virtual void setReorderCodes(const int32_t* reorderCodes,
    621                                 int32_t reorderCodesLength,
    622                                 UErrorCode& status) ;
    623 
    624     /**
    625      * Get name of the object for the desired Locale, in the desired langauge
    626      * @param objectLocale must be from getAvailableLocales
    627      * @param displayLocale specifies the desired locale for output
    628      * @param name the fill-in parameter of the return value
    629      * @return display-able name of the object for the object locale in the
    630      *         desired language
    631      * @stable ICU 2.0
    632      */
    633     static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
    634                                          const Locale& displayLocale,
    635                                          UnicodeString& name);
    636 
    637     /**
    638     * Get name of the object for the desired Locale, in the langauge of the
    639     * default locale.
    640     * @param objectLocale must be from getAvailableLocales
    641     * @param name the fill-in parameter of the return value
    642     * @return name of the object for the desired locale in the default language
    643     * @stable ICU 2.0
    644     */
    645     static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
    646                                          UnicodeString& name);
    647 
    648     /**
    649      * Get the set of Locales for which Collations are installed.
    650      *
    651      * <p>Note this does not include locales supported by registered collators.
    652      * If collators might have been registered, use the overload of getAvailableLocales
    653      * that returns a StringEnumeration.</p>
    654      *
    655      * @param count the output parameter of number of elements in the locale list
    656      * @return the list of available locales for which collations are installed
    657      * @stable ICU 2.0
    658      */
    659     static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
    660 
    661     /**
    662      * Return a StringEnumeration over the locales available at the time of the call,
    663      * including registered locales.  If a severe error occurs (such as out of memory
    664      * condition) this will return null. If there is no locale data, an empty enumeration
    665      * will be returned.
    666      * @return a StringEnumeration over the locales available at the time of the call
    667      * @stable ICU 2.6
    668      */
    669     static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
    670 
    671     /**
    672      * Create a string enumerator of all possible keywords that are relevant to
    673      * collation. At this point, the only recognized keyword for this
    674      * service is "collation".
    675      * @param status input-output error code
    676      * @return a string enumeration over locale strings. The caller is
    677      * responsible for closing the result.
    678      * @stable ICU 3.0
    679      */
    680     static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
    681 
    682     /**
    683      * Given a keyword, create a string enumeration of all values
    684      * for that keyword that are currently in use.
    685      * @param keyword a particular keyword as enumerated by
    686      * ucol_getKeywords. If any other keyword is passed in, status is set
    687      * to U_ILLEGAL_ARGUMENT_ERROR.
    688      * @param status input-output error code
    689      * @return a string enumeration over collation keyword values, or NULL
    690      * upon error. The caller is responsible for deleting the result.
    691      * @stable ICU 3.0
    692      */
    693     static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
    694 
    695     /**
    696      * Given a key and a locale, returns an array of string values in a preferred
    697      * order that would make a difference. These are all and only those values where
    698      * the open (creation) of the service with the locale formed from the input locale
    699      * plus input keyword and that value has different behavior than creation with the
    700      * input locale alone.
    701      * @param keyword        one of the keys supported by this service.  For now, only
    702      *                      "collation" is supported.
    703      * @param locale        the locale
    704      * @param commonlyUsed  if set to true it will return only commonly used values
    705      *                      with the given locale in preferred order.  Otherwise,
    706      *                      it will return all the available values for the locale.
    707      * @param status ICU status
    708      * @return a string enumeration over keyword values for the given key and the locale.
    709      * @stable ICU 4.2
    710      */
    711     static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
    712                                                                     UBool commonlyUsed, UErrorCode& status);
    713 
    714     /**
    715      * Return the functionally equivalent locale for the given
    716      * requested locale, with respect to given keyword, for the
    717      * collation service.  If two locales return the same result, then
    718      * collators instantiated for these locales will behave
    719      * equivalently.  The converse is not always true; two collators
    720      * may in fact be equivalent, but return different results, due to
    721      * internal details.  The return result has no other meaning than
    722      * that stated above, and implies nothing as to the relationship
    723      * between the two locales.  This is intended for use by
    724      * applications who wish to cache collators, or otherwise reuse
    725      * collators when possible.  The functional equivalent may change
    726      * over time.  For more information, please see the <a
    727      * href="http://icu-project.org/userguide/locale.html#services">
    728      * Locales and Services</a> section of the ICU User Guide.
    729      * @param keyword a particular keyword as enumerated by
    730      * ucol_getKeywords.
    731      * @param locale the requested locale
    732      * @param isAvailable reference to a fillin parameter that
    733      * indicates whether the requested locale was 'available' to the
    734      * collation service. A locale is defined as 'available' if it
    735      * physically exists within the collation locale data.
    736      * @param status reference to input-output error code
    737      * @return the functionally equivalent collation locale, or the root
    738      * locale upon error.
    739      * @stable ICU 3.0
    740      */
    741     static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
    742                                           UBool& isAvailable, UErrorCode& status);
    743 
    744 #if !UCONFIG_NO_SERVICE
    745     /**
    746      * Register a new Collator.  The collator will be adopted.
    747      * @param toAdopt the Collator instance to be adopted
    748      * @param locale the locale with which the collator will be associated
    749      * @param status the in/out status code, no special meanings are assigned
    750      * @return a registry key that can be used to unregister this collator
    751      * @stable ICU 2.6
    752      */
    753     static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
    754 
    755     /**
    756      * Register a new CollatorFactory.  The factory will be adopted.
    757      * @param toAdopt the CollatorFactory instance to be adopted
    758      * @param status the in/out status code, no special meanings are assigned
    759      * @return a registry key that can be used to unregister this collator
    760      * @stable ICU 2.6
    761      */
    762     static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
    763 
    764     /**
    765      * Unregister a previously-registered Collator or CollatorFactory
    766      * using the key returned from the register call.  Key becomes
    767      * invalid after a successful call and should not be used again.
    768      * The object corresponding to the key will be deleted.
    769      * @param key the registry key returned by a previous call to registerInstance
    770      * @param status the in/out status code, no special meanings are assigned
    771      * @return TRUE if the collator for the key was successfully unregistered
    772      * @stable ICU 2.6
    773      */
    774     static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
    775 #endif /* UCONFIG_NO_SERVICE */
    776 
    777     /**
    778      * Gets the version information for a Collator.
    779      * @param info the version # information, the result will be filled in
    780      * @stable ICU 2.0
    781      */
    782     virtual void getVersion(UVersionInfo info) const = 0;
    783 
    784     /**
    785      * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
    786      * This method is to implement a simple version of RTTI, since not all C++
    787      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    788      * methods call this method.
    789      * @return The class ID for this object. All objects of a given class have
    790      *         the same class ID.  Objects of other classes have different class
    791      *         IDs.
    792      * @stable ICU 2.0
    793      */
    794     virtual UClassID getDynamicClassID(void) const = 0;
    795 
    796     /**
    797      * Universal attribute setter
    798      * @param attr attribute type
    799      * @param value attribute value
    800      * @param status to indicate whether the operation went on smoothly or
    801      *        there were errors
    802      * @stable ICU 2.2
    803      */
    804     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    805                               UErrorCode &status) = 0;
    806 
    807     /**
    808      * Universal attribute getter
    809      * @param attr attribute type
    810      * @param status to indicate whether the operation went on smoothly or
    811      *        there were errors
    812      * @return attribute value
    813      * @stable ICU 2.2
    814      */
    815     virtual UColAttributeValue getAttribute(UColAttribute attr,
    816                                             UErrorCode &status) = 0;
    817 
    818     /**
    819      * Sets the variable top to a collation element value of a string supplied.
    820      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    821      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    822      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    823      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    824      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    825      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    826      * @stable ICU 2.0
    827      */
    828     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
    829 
    830     /**
    831      * Sets the variable top to a collation element value of a string supplied.
    832      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    833      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    834      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    835      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    836      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    837      * @stable ICU 2.0
    838      */
    839     virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status) = 0;
    840 
    841     /**
    842      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
    843      * Lower 16 bits are ignored.
    844      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
    845      * @param status error code (not changed by function)
    846      * @stable ICU 2.0
    847      */
    848     virtual void setVariableTop(const uint32_t varTop, UErrorCode &status) = 0;
    849 
    850     /**
    851      * Gets the variable top value of a Collator.
    852      * Lower 16 bits are undefined and should be ignored.
    853      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    854      * @stable ICU 2.0
    855      */
    856     virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
    857 
    858     /**
    859      * Get an UnicodeSet that contains all the characters and sequences
    860      * tailored in this collator.
    861      * @param status      error code of the operation
    862      * @return a pointer to a UnicodeSet object containing all the
    863      *         code points and sequences that may sort differently than
    864      *         in the UCA. The object must be disposed of by using delete
    865      * @stable ICU 2.4
    866      */
    867     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    868 
    869 
    870     /**
    871      * Thread safe cloning operation
    872      * @return pointer to the new clone, user should remove it.
    873      * @stable ICU 2.2
    874      */
    875     virtual Collator* safeClone(void) = 0;
    876 
    877     /**
    878      * Get the sort key as an array of bytes from an UnicodeString.
    879      * Sort key byte arrays are zero-terminated and can be compared using
    880      * strcmp().
    881      * @param source string to be processed.
    882      * @param result buffer to store result in. If NULL, number of bytes needed
    883      *        will be returned.
    884      * @param resultLength length of the result buffer. If if not enough the
    885      *        buffer will be filled to capacity.
    886      * @return Number of bytes needed for storing the sort key
    887      * @stable ICU 2.2
    888      */
    889     virtual int32_t getSortKey(const UnicodeString& source,
    890                               uint8_t* result,
    891                               int32_t resultLength) const = 0;
    892 
    893     /**
    894      * Get the sort key as an array of bytes from an UChar buffer.
    895      * Sort key byte arrays are zero-terminated and can be compared using
    896      * strcmp().
    897      * @param source string to be processed.
    898      * @param sourceLength length of string to be processed.
    899      *        If -1, the string is 0 terminated and length will be decided by the
    900      *        function.
    901      * @param result buffer to store result in. If NULL, number of bytes needed
    902      *        will be returned.
    903      * @param resultLength length of the result buffer. If if not enough the
    904      *        buffer will be filled to capacity.
    905      * @return Number of bytes needed for storing the sort key
    906      * @stable ICU 2.2
    907      */
    908     virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
    909                                uint8_t*result, int32_t resultLength) const = 0;
    910 
    911     /**
    912      * Produce a bound for a given sortkey and a number of levels.
    913      * Return value is always the number of bytes needed, regardless of
    914      * whether the result buffer was big enough or even valid.<br>
    915      * Resulting bounds can be used to produce a range of strings that are
    916      * between upper and lower bounds. For example, if bounds are produced
    917      * for a sortkey of string "smith", strings between upper and lower
    918      * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
    919      * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
    920      * is produced, strings matched would be as above. However, if bound
    921      * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
    922      * also match "Smithsonian" and similar.<br>
    923      * For more on usage, see example in cintltst/capitst.c in procedure
    924      * TestBounds.
    925      * Sort keys may be compared using <TT>strcmp</TT>.
    926      * @param source The source sortkey.
    927      * @param sourceLength The length of source, or -1 if null-terminated.
    928      *                     (If an unmodified sortkey is passed, it is always null
    929      *                      terminated).
    930      * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
    931      *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
    932      *                  produces upper bound that matches strings of the same length
    933      *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
    934      *                  same starting substring as the source string.
    935      * @param noOfLevels  Number of levels required in the resulting bound (for most
    936      *                    uses, the recommended value is 1). See users guide for
    937      *                    explanation on number of levels a sortkey can have.
    938      * @param result A pointer to a buffer to receive the resulting sortkey.
    939      * @param resultLength The maximum size of result.
    940      * @param status Used for returning error code if something went wrong. If the
    941      *               number of levels requested is higher than the number of levels
    942      *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
    943      *               issued.
    944      * @return The size needed to fully store the bound.
    945      * @see ucol_keyHashCode
    946      * @stable ICU 2.1
    947      */
    948     static int32_t U_EXPORT2 getBound(const uint8_t       *source,
    949             int32_t             sourceLength,
    950             UColBoundMode       boundType,
    951             uint32_t            noOfLevels,
    952             uint8_t             *result,
    953             int32_t             resultLength,
    954             UErrorCode          &status);
    955 
    956 
    957 protected:
    958 
    959     // Collator protected constructors -------------------------------------
    960 
    961     /**
    962     * Default constructor.
    963     * Constructor is different from the old default Collator constructor.
    964     * The task for determing the default collation strength and normalization
    965     * mode is left to the child class.
    966     * @stable ICU 2.0
    967     */
    968     Collator();
    969 
    970     /**
    971     * Constructor.
    972     * Empty constructor, does not handle the arguments.
    973     * This constructor is done for backward compatibility with 1.7 and 1.8.
    974     * The task for handling the argument collation strength and normalization
    975     * mode is left to the child class.
    976     * @param collationStrength collation strength
    977     * @param decompositionMode
    978     * @deprecated ICU 2.4. Subclasses should use the default constructor
    979     * instead and handle the strength and normalization mode themselves.
    980     */
    981     Collator(UCollationStrength collationStrength,
    982              UNormalizationMode decompositionMode);
    983 
    984     /**
    985     * Copy constructor.
    986     * @param other Collator object to be copied from
    987     * @stable ICU 2.0
    988     */
    989     Collator(const Collator& other);
    990 
    991     // Collator protected methods -----------------------------------------
    992 
    993 
    994    /**
    995     * Used internally by registraton to define the requested and valid locales.
    996     * @param requestedLocale the requested locale
    997     * @param validLocale the valid locale
    998     * @param actualLocale the actual locale
    999     * @internal
   1000     */
   1001     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
   1002 
   1003 public:
   1004 #if !UCONFIG_NO_SERVICE
   1005     /**
   1006      * used only by ucol_open, not for public use
   1007      * @internal
   1008      */
   1009     static UCollator* createUCollator(const char* loc, UErrorCode* status);
   1010 #endif
   1011 private:
   1012     /**
   1013      * Assignment operator. Private for now.
   1014      * @internal
   1015      */
   1016     Collator& operator=(const Collator& other);
   1017 
   1018     friend class CFactory;
   1019     friend class SimpleCFactory;
   1020     friend class ICUCollatorFactory;
   1021     friend class ICUCollatorService;
   1022     static Collator* makeInstance(const Locale& desiredLocale,
   1023                                   UErrorCode& status);
   1024 
   1025     // Collator private data members ---------------------------------------
   1026 
   1027     /*
   1028     synwee : removed as attributes to be handled by child class
   1029     UCollationStrength  strength;
   1030     Normalizer::EMode  decmp;
   1031     */
   1032     /* This is useless information */
   1033 /*  static const UVersionInfo fVersion;*/
   1034 };
   1035 
   1036 #if !UCONFIG_NO_SERVICE
   1037 /**
   1038  * A factory, used with registerFactory, the creates multiple collators and provides
   1039  * display names for them.  A factory supports some number of locales-- these are the
   1040  * locales for which it can create collators.  The factory can be visible, in which
   1041  * case the supported locales will be enumerated by getAvailableLocales, or invisible,
   1042  * in which they are not.  Invisible locales are still supported, they are just not
   1043  * listed by getAvailableLocales.
   1044  * <p>
   1045  * If standard locale display names are sufficient, Collator instances can
   1046  * be registered using registerInstance instead.</p>
   1047  * <p>
   1048  * Note: if the collators are to be used from C APIs, they must be instances
   1049  * of RuleBasedCollator.</p>
   1050  *
   1051  * @stable ICU 2.6
   1052  */
   1053 class U_I18N_API CollatorFactory : public UObject {
   1054 public:
   1055 
   1056     /**
   1057      * Destructor
   1058      * @stable ICU 3.0
   1059      */
   1060     virtual ~CollatorFactory();
   1061 
   1062     /**
   1063      * Return true if this factory is visible.  Default is true.
   1064      * If not visible, the locales supported by this factory will not
   1065      * be listed by getAvailableLocales.
   1066      * @return true if the factory is visible.
   1067      * @stable ICU 2.6
   1068      */
   1069     virtual UBool visible(void) const;
   1070 
   1071     /**
   1072      * Return a collator for the provided locale.  If the locale
   1073      * is not supported, return NULL.
   1074      * @param loc the locale identifying the collator to be created.
   1075      * @return a new collator if the locale is supported, otherwise NULL.
   1076      * @stable ICU 2.6
   1077      */
   1078     virtual Collator* createCollator(const Locale& loc) = 0;
   1079 
   1080     /**
   1081      * Return the name of the collator for the objectLocale, localized for the displayLocale.
   1082      * If objectLocale is not supported, or the factory is not visible, set the result string
   1083      * to bogus.
   1084      * @param objectLocale the locale identifying the collator
   1085      * @param displayLocale the locale for which the display name of the collator should be localized
   1086      * @param result an output parameter for the display name, set to bogus if not supported.
   1087      * @return the display name
   1088      * @stable ICU 2.6
   1089      */
   1090     virtual  UnicodeString& getDisplayName(const Locale& objectLocale,
   1091                                            const Locale& displayLocale,
   1092                                            UnicodeString& result);
   1093 
   1094     /**
   1095      * Return an array of all the locale names directly supported by this factory.
   1096      * The number of names is returned in count.  This array is owned by the factory.
   1097      * Its contents must never change.
   1098      * @param count output parameter for the number of locales supported by the factory
   1099      * @param status the in/out error code
   1100      * @return a pointer to an array of count UnicodeStrings.
   1101      * @stable ICU 2.6
   1102      */
   1103     virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
   1104 };
   1105 #endif /* UCONFIG_NO_SERVICE */
   1106 
   1107 // Collator inline methods -----------------------------------------------
   1108 
   1109 U_NAMESPACE_END
   1110 
   1111 #endif /* #if !UCONFIG_NO_COLLATION */
   1112 
   1113 #endif
   1114