Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1996-2009, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
     11  */
     12 
     13 /**
     14 * File tblcoll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     22 *                          constructor which reads RuleBasedCollator object from
     23 *                          a binary file.  Added writeToFile method which streams
     24 *                          RuleBasedCollator out to a binary file.  The streamIn
     25 *                          and streamOut methods use istream and ostream objects
     26 *                          in binary mode.
     27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     28 *                          hold invariant data.
     29 *  2/13/97     aliu        Moved several methods into this class from Collation.
     30 *                          Added a private RuleBasedCollator(Locale&) constructor,
     31 *                          to be used by Collator::createDefault().  General
     32 *                          clean up.
     33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     34 *                          constructor and getDynamicClassID.
     35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     36 *                          specifying whether or not binary loading is to be
     37 *                          attempted.  This is required for dynamic rule loading.
     38 * 05/07/97     helena      Added memory allocation error detection.
     39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     40 *                          use MergeCollation::getPattern.
     41 *  6/20/97     helena      Java class name change.
     42 *  8/18/97     helena      Added internal API documentation.
     43 * 09/03/97     helena      Added createCollationKeyValues().
     44 * 02/10/98     damiba      Added compare with "length" parameter
     45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     47 *                          Normalizer::EMode
     48 * 06/14/99     stephen     Removed kResourceBundleSuffix
     49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     50 *                          UnicodeString construction and special case for NO_OP.
     51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     52 *                          internal state management.
     53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     54 *                          to implementation file.
     55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     56 *                          (ucol.h)
     57 */
     58 
     59 #ifndef TBLCOLL_H
     60 #define TBLCOLL_H
     61 
     62 #include "unicode/utypes.h"
     63 
     64 
     65 #if !UCONFIG_NO_COLLATION
     66 
     67 #include "unicode/coll.h"
     68 #include "unicode/ucol.h"
     69 #include "unicode/sortkey.h"
     70 #include "unicode/normlzr.h"
     71 
     72 U_NAMESPACE_BEGIN
     73 
     74 /**
     75 * @stable ICU 2.0
     76 */
     77 class StringSearch;
     78 /**
     79 * @stable ICU 2.0
     80 */
     81 class CollationElementIterator;
     82 
     83 /**
     84  * The RuleBasedCollator class provides the simple implementation of
     85  * Collator, using data-driven tables. The user can create a customized
     86  * table-based collation.
     87  * <P>
     88  * <em>Important: </em>The ICU collation service has been reimplemented
     89  * in order to achieve better performance and UCA compliance.
     90  * For details, see the
     91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
     92  * collation design document</a>.
     93  * <p>
     94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
     95  * <p>
     96  * For more information about the collation service see
     97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
     98  * <p>
     99  * Collation service provides correct sorting orders for most locales supported in ICU.
    100  * If specific data for a locale is not available, the orders eventually falls back
    101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
    102  * <p>
    103  * Sort ordering may be customized by providing your own set of rules. For more on
    104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
    105  * Collation customization</a> section of the users guide.
    106  * <p>
    107  * Note, RuleBasedCollator is not to be subclassed.
    108  * @see        Collator
    109  * @version    2.0 11/15/2001
    110  */
    111 class U_I18N_API RuleBasedCollator : public Collator
    112 {
    113 public:
    114 
    115   // constructor -------------------------------------------------------------
    116 
    117     /**
    118      * RuleBasedCollator constructor. This takes the table rules and builds a
    119      * collation table out of them. Please see RuleBasedCollator class
    120      * description for more details on the collation rule syntax.
    121      * @param rules the collation rules to build the collation table from.
    122      * @param status reporting a success or an error.
    123      * @see Locale
    124      * @stable ICU 2.0
    125      */
    126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    127 
    128     /**
    129      * RuleBasedCollator constructor. This takes the table rules and builds a
    130      * collation table out of them. Please see RuleBasedCollator class
    131      * description for more details on the collation rule syntax.
    132      * @param rules the collation rules to build the collation table from.
    133      * @param collationStrength default strength for comparison
    134      * @param status reporting a success or an error.
    135      * @see Locale
    136      * @stable ICU 2.0
    137      */
    138     RuleBasedCollator(const UnicodeString& rules,
    139                        ECollationStrength collationStrength,
    140                        UErrorCode& status);
    141 
    142     /**
    143      * RuleBasedCollator constructor. This takes the table rules and builds a
    144      * collation table out of them. Please see RuleBasedCollator class
    145      * description for more details on the collation rule syntax.
    146      * @param rules the collation rules to build the collation table from.
    147      * @param decompositionMode the normalisation mode
    148      * @param status reporting a success or an error.
    149      * @see Locale
    150      * @stable ICU 2.0
    151      */
    152     RuleBasedCollator(const UnicodeString& rules,
    153                     UColAttributeValue decompositionMode,
    154                     UErrorCode& status);
    155 
    156     /**
    157      * RuleBasedCollator constructor. This takes the table rules and builds a
    158      * collation table out of them. Please see RuleBasedCollator class
    159      * description for more details on the collation rule syntax.
    160      * @param rules the collation rules to build the collation table from.
    161      * @param collationStrength default strength for comparison
    162      * @param decompositionMode the normalisation mode
    163      * @param status reporting a success or an error.
    164      * @see Locale
    165      * @stable ICU 2.0
    166      */
    167     RuleBasedCollator(const UnicodeString& rules,
    168                     ECollationStrength collationStrength,
    169                     UColAttributeValue decompositionMode,
    170                     UErrorCode& status);
    171 
    172     /**
    173      * Copy constructor.
    174      * @param other the RuleBasedCollator object to be copied
    175      * @see Locale
    176      * @stable ICU 2.0
    177      */
    178     RuleBasedCollator(const RuleBasedCollator& other);
    179 
    180 
    181     /** Opens a collator from a collator binary image created using
    182     *  cloneBinary. Binary image used in instantiation of the
    183     *  collator remains owned by the user and should stay around for
    184     *  the lifetime of the collator. The API also takes a base collator
    185     *  which usualy should be UCA.
    186     *  @param bin binary image owned by the user and required through the
    187     *             lifetime of the collator
    188     *  @param length size of the image. If negative, the API will try to
    189     *                figure out the length of the image
    190     *  @param base fallback collator, usually UCA. Base is required to be
    191     *              present through the lifetime of the collator. Currently
    192     *              it cannot be NULL.
    193     *  @param status for catching errors
    194     *  @return newly created collator
    195     *  @see cloneBinary
    196     *  @stable ICU 3.4
    197     */
    198     RuleBasedCollator(const uint8_t *bin, int32_t length,
    199                     const RuleBasedCollator *base,
    200                     UErrorCode &status);
    201     // destructor --------------------------------------------------------------
    202 
    203     /**
    204      * Destructor.
    205      * @stable ICU 2.0
    206      */
    207     virtual ~RuleBasedCollator();
    208 
    209     // public methods ----------------------------------------------------------
    210 
    211     /**
    212      * Assignment operator.
    213      * @param other other RuleBasedCollator object to compare with.
    214      * @stable ICU 2.0
    215      */
    216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    217 
    218     /**
    219      * Returns true if argument is the same as this object.
    220      * @param other Collator object to be compared.
    221      * @return true if arguments is the same as this object.
    222      * @stable ICU 2.0
    223      */
    224     virtual UBool operator==(const Collator& other) const;
    225 
    226     /**
    227      * Returns true if argument is not the same as this object.
    228      * @param other Collator object to be compared
    229      * @return returns true if argument is not the same as this object.
    230      * @stable ICU 2.0
    231      */
    232     virtual UBool operator!=(const Collator& other) const;
    233 
    234     /**
    235      * Makes a deep copy of the object.
    236      * The caller owns the returned object.
    237      * @return the cloned object.
    238      * @stable ICU 2.0
    239      */
    240     virtual Collator* clone(void) const;
    241 
    242     /**
    243      * Creates a collation element iterator for the source string. The caller of
    244      * this method is responsible for the memory management of the return
    245      * pointer.
    246      * @param source the string over which the CollationElementIterator will
    247      *        iterate.
    248      * @return the collation element iterator of the source string using this as
    249      *         the based Collator.
    250      * @stable ICU 2.2
    251      */
    252     virtual CollationElementIterator* createCollationElementIterator(
    253                                            const UnicodeString& source) const;
    254 
    255     /**
    256      * Creates a collation element iterator for the source. The caller of this
    257      * method is responsible for the memory management of the returned pointer.
    258      * @param source the CharacterIterator which produces the characters over
    259      *        which the CollationElementItgerator will iterate.
    260      * @return the collation element iterator of the source using this as the
    261      *         based Collator.
    262      * @stable ICU 2.2
    263      */
    264     virtual CollationElementIterator* createCollationElementIterator(
    265                                          const CharacterIterator& source) const;
    266 
    267     /**
    268      * Compares a range of character data stored in two different strings based
    269      * on the collation rules. Returns information about whether a string is
    270      * less than, greater than or equal to another string in a language.
    271      * This can be overriden in a subclass.
    272      * @param source the source string.
    273      * @param target the target string to be compared with the source string.
    274      * @return the comparison result. GREATER if the source string is greater
    275      *         than the target string, LESS if the source is less than the
    276      *         target. Otherwise, returns EQUAL.
    277      * @deprecated ICU 2.6 Use overload with UErrorCode&
    278      */
    279     virtual EComparisonResult compare(const UnicodeString& source,
    280                                       const UnicodeString& target) const;
    281 
    282 
    283     /**
    284     * The comparison function compares the character data stored in two
    285     * different strings. Returns information about whether a string is less
    286     * than, greater than or equal to another string.
    287     * @param source the source string to be compared with.
    288     * @param target the string that is to be compared with the source string.
    289     * @param status possible error code
    290     * @return Returns an enum value. UCOL_GREATER if source is greater
    291     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    292     * than target
    293     * @stable ICU 2.6
    294     **/
    295     virtual UCollationResult compare(const UnicodeString& source,
    296                                       const UnicodeString& target,
    297                                       UErrorCode &status) const;
    298 
    299     /**
    300      * Compares a range of character data stored in two different strings based
    301      * on the collation rules up to the specified length. Returns information
    302      * about whether a string is less than, greater than or equal to another
    303      * string in a language. This can be overriden in a subclass.
    304      * @param source the source string.
    305      * @param target the target string to be compared with the source string.
    306      * @param length compares up to the specified length
    307      * @return the comparison result. GREATER if the source string is greater
    308      *         than the target string, LESS if the source is less than the
    309      *         target. Otherwise, returns EQUAL.
    310      * @deprecated ICU 2.6 Use overload with UErrorCode&
    311      */
    312     virtual EComparisonResult compare(const UnicodeString& source,
    313                                       const UnicodeString&  target,
    314                                       int32_t length) const;
    315 
    316     /**
    317     * Does the same thing as compare but limits the comparison to a specified
    318     * length
    319     * @param source the source string to be compared with.
    320     * @param target the string that is to be compared with the source string.
    321     * @param length the length the comparison is limited to
    322     * @param status possible error code
    323     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    324     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    325     *         length) is equal to target; UCOL_LESS if source (up to the specified
    326     *         length) is less  than target.
    327     * @stable ICU 2.6
    328     */
    329     virtual UCollationResult compare(const UnicodeString& source,
    330                                       const UnicodeString& target,
    331                                       int32_t length,
    332                                       UErrorCode &status) const;
    333 
    334     /**
    335      * The comparison function compares the character data stored in two
    336      * different string arrays. Returns information about whether a string array
    337      * is less than, greater than or equal to another string array.
    338      * <p>Example of use:
    339      * <pre>
    340      * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
    341      * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
    342      * .       UErrorCode status = U_ZERO_ERROR;
    343      * .       Collator *myCollation =
    344      * .                         Collator::createInstance(Locale::US, status);
    345      * .       if (U_FAILURE(status)) return;
    346      * .       myCollation->setStrength(Collator::PRIMARY);
    347      * .       // result would be Collator::EQUAL ("abc" == "ABC")
    348      * .       // (no primary difference between "abc" and "ABC")
    349      * .       Collator::EComparisonResult result =
    350      * .                             myCollation->compare(abc, 3, ABC, 3);
    351      * .       myCollation->setStrength(Collator::TERTIARY);
    352      * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
    353      * .       // (with tertiary difference between "abc" and "ABC")
    354      * .       result =  myCollation->compare(abc, 3, ABC, 3);
    355      * </pre>
    356      * @param source the source string array to be compared with.
    357      * @param sourceLength the length of the source string array. If this value
    358      *        is equal to -1, the string array is null-terminated.
    359      * @param target the string that is to be compared with the source string.
    360      * @param targetLength the length of the target string array. If this value
    361      *        is equal to -1, the string array is null-terminated.
    362      * @return Returns a byte value. GREATER if source is greater than target;
    363      *         EQUAL if source is equal to target; LESS if source is less than
    364      *         target
    365      * @deprecated ICU 2.6 Use overload with UErrorCode&
    366      */
    367     virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
    368                                       const UChar* target, int32_t targetLength)
    369                                       const;
    370 
    371     /**
    372     * The comparison function compares the character data stored in two
    373     * different string arrays. Returns information about whether a string array
    374     * is less than, greater than or equal to another string array.
    375     * @param source the source string array to be compared with.
    376     * @param sourceLength the length of the source string array.  If this value
    377     *        is equal to -1, the string array is null-terminated.
    378     * @param target the string that is to be compared with the source string.
    379     * @param targetLength the length of the target string array.  If this value
    380     *        is equal to -1, the string array is null-terminated.
    381     * @param status possible error code
    382     * @return Returns an enum value. UCOL_GREATER if source is greater
    383     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    384     * than target
    385     * @stable ICU 2.6
    386     */
    387     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    388                                       const UChar* target, int32_t targetLength,
    389                                       UErrorCode &status) const;
    390 
    391     /**
    392      * Compares two strings using the Collator.
    393      * Returns whether the first one compares less than/equal to/greater than
    394      * the second one.
    395      * This version takes UCharIterator input.
    396      * @param sIter the first ("source") string iterator
    397      * @param tIter the second ("target") string iterator
    398      * @param status ICU status
    399      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    400      * @draft ICU 4.2
    401      */
    402     virtual UCollationResult compare(UCharIterator &sIter,
    403                                      UCharIterator &tIter,
    404                                      UErrorCode &status) const;
    405 
    406     /**
    407     * Transforms a specified region of the string into a series of characters
    408     * that can be compared with CollationKey.compare. Use a CollationKey when
    409     * you need to do repeated comparisions on the same string. For a single
    410     * comparison the compare method will be faster.
    411     * @param source the source string.
    412     * @param key the transformed key of the source string.
    413     * @param status the error code status.
    414     * @return the transformed key.
    415     * @see CollationKey
    416     * @deprecated ICU 2.8 Use getSortKey(...) instead
    417     */
    418     virtual CollationKey& getCollationKey(const UnicodeString& source,
    419                                           CollationKey& key,
    420                                           UErrorCode& status) const;
    421 
    422     /**
    423     * Transforms a specified region of the string into a series of characters
    424     * that can be compared with CollationKey.compare. Use a CollationKey when
    425     * you need to do repeated comparisions on the same string. For a single
    426     * comparison the compare method will be faster.
    427     * @param source the source string.
    428     * @param sourceLength the length of the source string.
    429     * @param key the transformed key of the source string.
    430     * @param status the error code status.
    431     * @return the transformed key.
    432     * @see CollationKey
    433     * @deprecated ICU 2.8 Use getSortKey(...) instead
    434     */
    435     virtual CollationKey& getCollationKey(const UChar *source,
    436                                           int32_t sourceLength,
    437                                           CollationKey& key,
    438                                           UErrorCode& status) const;
    439 
    440     /**
    441      * Generates the hash code for the rule-based collation object.
    442      * @return the hash code.
    443      * @stable ICU 2.0
    444      */
    445     virtual int32_t hashCode(void) const;
    446 
    447     /**
    448     * Gets the locale of the Collator
    449     * @param type can be either requested, valid or actual locale. For more
    450     *             information see the definition of ULocDataLocaleType in
    451     *             uloc.h
    452     * @param status the error code status.
    453     * @return locale where the collation data lives. If the collator
    454     *         was instantiated from rules, locale is empty.
    455     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    456     */
    457     virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    458 
    459     /**
    460      * Gets the table-based rules for the collation object.
    461      * @return returns the collation rules that the table collation object was
    462      *         created from.
    463      * @stable ICU 2.0
    464      */
    465     const UnicodeString& getRules(void) const;
    466 
    467     /**
    468      * Gets the version information for a Collator.
    469      * @param info the version # information, the result will be filled in
    470      * @stable ICU 2.0
    471      */
    472     virtual void getVersion(UVersionInfo info) const;
    473 
    474     /**
    475      * Return the maximum length of any expansion sequences that end with the
    476      * specified comparison order.
    477      * @param order a collation order returned by previous or next.
    478      * @return maximum size of the expansion sequences ending with the collation
    479      *         element or 1 if collation element does not occur at the end of
    480      *         any expansion sequence
    481      * @see CollationElementIterator#getMaxExpansion
    482      * @stable ICU 2.0
    483      */
    484     int32_t getMaxExpansion(int32_t order) const;
    485 
    486     /**
    487      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    488      * method is to implement a simple version of RTTI, since not all C++
    489      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    490      * methods call this method.
    491      * @return The class ID for this object. All objects of a given class have
    492      *         the same class ID. Objects of other classes have different class
    493      *         IDs.
    494      * @stable ICU 2.0
    495      */
    496     virtual UClassID getDynamicClassID(void) const;
    497 
    498     /**
    499      * Returns the class ID for this class. This is useful only for comparing to
    500      * a return value from getDynamicClassID(). For example:
    501      * <pre>
    502      * Base* polymorphic_pointer = createPolymorphicObject();
    503      * if (polymorphic_pointer->getDynamicClassID() ==
    504      *                                          Derived::getStaticClassID()) ...
    505      * </pre>
    506      * @return The class ID for all objects of this class.
    507      * @stable ICU 2.0
    508      */
    509     static UClassID U_EXPORT2 getStaticClassID(void);
    510 
    511     /**
    512      * Returns the binary format of the class's rules. The format is that of
    513      * .col files.
    514      * @param length Returns the length of the data, in bytes
    515      * @param status the error code status.
    516      * @return memory, owned by the caller, of size 'length' bytes.
    517      * @stable ICU 2.2
    518      */
    519     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
    520 
    521 
    522     /** Creates a binary image of a collator. This binary image can be stored and
    523     *  later used to instantiate a collator using ucol_openBinary.
    524     *  This API supports preflighting.
    525     *  @param buffer a fill-in buffer to receive the binary image
    526     *  @param capacity capacity of the destination buffer
    527     *  @param status for catching errors
    528     *  @return size of the image
    529     *  @see ucol_openBinary
    530     *  @stable ICU 3.4
    531     */
    532     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
    533 
    534     /**
    535      * Returns current rules. Delta defines whether full rules are returned or
    536      * just the tailoring.
    537      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    538      * @param buffer UnicodeString to store the result rules
    539      * @stable ICU 2.2
    540      */
    541     void getRules(UColRuleOption delta, UnicodeString &buffer);
    542 
    543     /**
    544      * Universal attribute setter
    545      * @param attr attribute type
    546      * @param value attribute value
    547      * @param status to indicate whether the operation went on smoothly or there were errors
    548      * @stable ICU 2.2
    549      */
    550     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    551                               UErrorCode &status);
    552 
    553     /**
    554      * Universal attribute getter.
    555      * @param attr attribute type
    556      * @param status to indicate whether the operation went on smoothly or there were errors
    557      * @return attribute value
    558      * @stable ICU 2.2
    559      */
    560     virtual UColAttributeValue getAttribute(UColAttribute attr,
    561                                             UErrorCode &status);
    562 
    563     /**
    564      * Sets the variable top to a collation element value of a string supplied.
    565      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    566      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    567      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    568      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    569      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    570      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    571      * @stable ICU 2.0
    572      */
    573     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
    574 
    575     /**
    576      * Sets the variable top to a collation element value of a string supplied.
    577      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    578      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    579      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    580      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    581      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    582      * @stable ICU 2.0
    583      */
    584     virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
    585 
    586     /**
    587      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
    588      * Lower 16 bits are ignored.
    589      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
    590      * @param status error code (not changed by function)
    591      * @stable ICU 2.0
    592      */
    593     virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
    594 
    595     /**
    596      * Gets the variable top value of a Collator.
    597      * Lower 16 bits are undefined and should be ignored.
    598      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    599      * @stable ICU 2.0
    600      */
    601     virtual uint32_t getVariableTop(UErrorCode &status) const;
    602 
    603     /**
    604      * Get an UnicodeSet that contains all the characters and sequences tailored in
    605      * this collator.
    606      * @param status      error code of the operation
    607      * @return a pointer to a UnicodeSet object containing all the
    608      *         code points and sequences that may sort differently than
    609      *         in the UCA. The object must be disposed of by using delete
    610      * @stable ICU 2.4
    611      */
    612     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    613 
    614     /**
    615      * Thread safe cloning operation.
    616      * @return pointer to the new clone, user should remove it.
    617      * @stable ICU 2.2
    618      */
    619     virtual Collator* safeClone(void);
    620 
    621     /**
    622      * Get the sort key as an array of bytes from an UnicodeString.
    623      * @param source string to be processed.
    624      * @param result buffer to store result in. If NULL, number of bytes needed
    625      *        will be returned.
    626      * @param resultLength length of the result buffer. If if not enough the
    627      *        buffer will be filled to capacity.
    628      * @return Number of bytes needed for storing the sort key
    629      * @stable ICU 2.0
    630      */
    631     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    632                                int32_t resultLength) const;
    633 
    634     /**
    635      * Get the sort key as an array of bytes from an UChar buffer.
    636      * @param source string to be processed.
    637      * @param sourceLength length of string to be processed. If -1, the string
    638      *        is 0 terminated and length will be decided by the function.
    639      * @param result buffer to store result in. If NULL, number of bytes needed
    640      *        will be returned.
    641      * @param resultLength length of the result buffer. If if not enough the
    642      *        buffer will be filled to capacity.
    643      * @return Number of bytes needed for storing the sort key
    644      * @stable ICU 2.2
    645      */
    646     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
    647                                uint8_t *result, int32_t resultLength) const;
    648 
    649     /**
    650     * Determines the minimum strength that will be use in comparison or
    651     * transformation.
    652     * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
    653     * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
    654     * are ignored.
    655     * @return the current comparison level.
    656     * @see RuleBasedCollator#setStrength
    657     * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
    658     */
    659     virtual ECollationStrength getStrength(void) const;
    660 
    661     /**
    662     * Sets the minimum strength to be used in comparison or transformation.
    663     * @see RuleBasedCollator#getStrength
    664     * @param newStrength the new comparison level.
    665     * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
    666     */
    667     virtual void setStrength(ECollationStrength newStrength);
    668 
    669 private:
    670 
    671     // private static constants -----------------------------------------------
    672 
    673     enum {
    674         /* need look up in .commit() */
    675         CHARINDEX = 0x70000000,
    676         /* Expand index follows */
    677         EXPANDCHARINDEX = 0x7E000000,
    678         /* contract indexes follows */
    679         CONTRACTCHARINDEX = 0x7F000000,
    680         /* unmapped character values */
    681         UNMAPPED = 0xFFFFFFFF,
    682         /* primary strength increment */
    683         PRIMARYORDERINCREMENT = 0x00010000,
    684         /* secondary strength increment */
    685         SECONDARYORDERINCREMENT = 0x00000100,
    686         /* tertiary strength increment */
    687         TERTIARYORDERINCREMENT = 0x00000001,
    688         /* mask off anything but primary order */
    689         PRIMARYORDERMASK = 0xffff0000,
    690         /* mask off anything but secondary order */
    691         SECONDARYORDERMASK = 0x0000ff00,
    692         /* mask off anything but tertiary order */
    693         TERTIARYORDERMASK = 0x000000ff,
    694         /* mask off ignorable char order */
    695         IGNORABLEMASK = 0x0000ffff,
    696         /* use only the primary difference */
    697         PRIMARYDIFFERENCEONLY = 0xffff0000,
    698         /* use only the primary and secondary difference */
    699         SECONDARYDIFFERENCEONLY = 0xffffff00,
    700         /* primary order shift */
    701         PRIMARYORDERSHIFT = 16,
    702         /* secondary order shift */
    703         SECONDARYORDERSHIFT = 8,
    704         /* starting value for collation elements */
    705         COLELEMENTSTART = 0x02020202,
    706         /* testing mask for primary low element */
    707         PRIMARYLOWZEROMASK = 0x00FF0000,
    708         /* reseting value for secondaries and tertiaries */
    709         RESETSECONDARYTERTIARY = 0x00000202,
    710         /* reseting value for tertiaries */
    711         RESETTERTIARY = 0x00000002,
    712 
    713         PRIMIGNORABLE = 0x0202
    714     };
    715 
    716     // private data members ---------------------------------------------------
    717 
    718     UBool dataIsOwned;
    719 
    720     UBool isWriteThroughAlias;
    721 
    722     /**
    723     * c struct for collation. All initialisation for it has to be done through
    724     * setUCollator().
    725     */
    726     UCollator *ucollator;
    727 
    728     /**
    729     * Rule UnicodeString
    730     */
    731     UnicodeString urulestring;
    732 
    733     // friend classes --------------------------------------------------------
    734 
    735     /**
    736     * Used to iterate over collation elements in a character source.
    737     */
    738     friend class CollationElementIterator;
    739 
    740     /**
    741     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
    742     *                                                       UErrorCode&)
    743     */
    744     friend class Collator;
    745 
    746     /**
    747     * Searching over collation elements in a character source
    748     */
    749     friend class StringSearch;
    750 
    751     // private constructors --------------------------------------------------
    752 
    753     /**
    754      * Default constructor
    755      */
    756     RuleBasedCollator();
    757 
    758     /**
    759      * RuleBasedCollator constructor. This constructor takes a locale. The
    760      * only caller of this class should be Collator::createInstance(). If
    761      * createInstance() happens to know that the requested locale's collation is
    762      * implemented as a RuleBasedCollator, it can then call this constructor.
    763      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
    764      * COLLATION TABLE. It does this by falling back to defaults.
    765      * @param desiredLocale locale used
    766      * @param status error code status
    767      */
    768     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
    769 
    770     /**
    771      * common constructor implementation
    772      *
    773      * @param rules the collation rules to build the collation table from.
    774      * @param collationStrength default strength for comparison
    775      * @param decompositionMode the normalisation mode
    776      * @param status reporting a success or an error.
    777      */
    778     void
    779     construct(const UnicodeString& rules,
    780               UColAttributeValue collationStrength,
    781               UColAttributeValue decompositionMode,
    782               UErrorCode& status);
    783 
    784     // private methods -------------------------------------------------------
    785 
    786     /**
    787     * Creates the c struct for ucollator
    788     * @param locale desired locale
    789     * @param status error status
    790     */
    791     void setUCollator(const Locale& locale, UErrorCode& status);
    792 
    793     /**
    794     * Creates the c struct for ucollator
    795     * @param locale desired locale name
    796     * @param status error status
    797     */
    798     void setUCollator(const char* locale, UErrorCode& status);
    799 
    800     /**
    801     * Creates the c struct for ucollator. This used internally by StringSearch.
    802     * Hence the responsibility of cleaning up the ucollator is not done by
    803     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
    804     * @param collator new ucollator data
    805     * @param rules corresponding collation rules
    806     */
    807     void setUCollator(UCollator *collator);
    808 
    809 public:
    810     /**
    811     * Get UCollator data struct. Used only by StringSearch & intltest.
    812     * @return UCollator data struct
    813     * @internal
    814     */
    815     const UCollator * getUCollator();
    816 
    817 protected:
    818    /**
    819     * Used internally by registraton to define the requested and valid locales.
    820     * @param requestedLocale the requsted locale
    821     * @param validLocale the valid locale
    822     * @param actualLocale the actual locale
    823     * @internal
    824     */
    825     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    826 
    827 private:
    828 
    829     // if not owned and not a write through alias, copy the ucollator
    830     void checkOwned(void);
    831 
    832     // utility to init rule string used by checkOwned and construct
    833     void setRuleStringFromCollator();
    834 
    835     /**
    836     * Converts C's UCollationResult to EComparisonResult
    837     * @param result member of the enum UComparisonResult
    838     * @return EComparisonResult equivalent of UCollationResult
    839     * @deprecated ICU 2.6. We will not need it.
    840     */
    841     Collator::EComparisonResult getEComparisonResult(
    842                                             const UCollationResult &result) const;
    843 
    844     /**
    845     * Converts C's UCollationStrength to ECollationStrength
    846     * @param strength member of the enum UCollationStrength
    847     * @return ECollationStrength equivalent of UCollationStrength
    848     */
    849     Collator::ECollationStrength getECollationStrength(
    850                                         const UCollationStrength &strength) const;
    851 
    852     /**
    853     * Converts C++'s ECollationStrength to UCollationStrength
    854     * @param strength member of the enum ECollationStrength
    855     * @return UCollationStrength equivalent of ECollationStrength
    856     */
    857     UCollationStrength getUCollationStrength(
    858       const Collator::ECollationStrength &strength) const;
    859 };
    860 
    861 // inline method implementation ---------------------------------------------
    862 
    863 inline void RuleBasedCollator::setUCollator(const Locale &locale,
    864                                                UErrorCode &status)
    865 {
    866     setUCollator(locale.getName(), status);
    867 }
    868 
    869 
    870 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
    871 {
    872 
    873     if (ucollator && dataIsOwned) {
    874         ucol_close(ucollator);
    875     }
    876     ucollator   = collator;
    877     dataIsOwned = FALSE;
    878     isWriteThroughAlias = TRUE;
    879     setRuleStringFromCollator();
    880 }
    881 
    882 inline const UCollator * RuleBasedCollator::getUCollator()
    883 {
    884     return ucollator;
    885 }
    886 
    887 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
    888                                            const UCollationResult &result) const
    889 {
    890     switch (result)
    891     {
    892     case UCOL_LESS :
    893         return Collator::LESS;
    894     case UCOL_EQUAL :
    895         return Collator::EQUAL;
    896     default :
    897         return Collator::GREATER;
    898     }
    899 }
    900 
    901 inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
    902                                        const UCollationStrength &strength) const
    903 {
    904     switch (strength)
    905     {
    906     case UCOL_PRIMARY :
    907         return Collator::PRIMARY;
    908     case UCOL_SECONDARY :
    909         return Collator::SECONDARY;
    910     case UCOL_TERTIARY :
    911         return Collator::TERTIARY;
    912     case UCOL_QUATERNARY :
    913         return Collator::QUATERNARY;
    914     default :
    915         return Collator::IDENTICAL;
    916     }
    917 }
    918 
    919 inline UCollationStrength RuleBasedCollator::getUCollationStrength(
    920                              const Collator::ECollationStrength &strength) const
    921 {
    922     switch (strength)
    923     {
    924     case Collator::PRIMARY :
    925         return UCOL_PRIMARY;
    926     case Collator::SECONDARY :
    927         return UCOL_SECONDARY;
    928     case Collator::TERTIARY :
    929         return UCOL_TERTIARY;
    930     case Collator::QUATERNARY :
    931         return UCOL_QUATERNARY;
    932     default :
    933         return UCOL_IDENTICAL;
    934     }
    935 }
    936 
    937 U_NAMESPACE_END
    938 
    939 #endif /* #if !UCONFIG_NO_COLLATION */
    940 
    941 #endif
    942