Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1996-2011, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
     11  */
     12 
     13 /**
     14 * File tblcoll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     22 *                          constructor which reads RuleBasedCollator object from
     23 *                          a binary file.  Added writeToFile method which streams
     24 *                          RuleBasedCollator out to a binary file.  The streamIn
     25 *                          and streamOut methods use istream and ostream objects
     26 *                          in binary mode.
     27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     28 *                          hold invariant data.
     29 *  2/13/97     aliu        Moved several methods into this class from Collation.
     30 *                          Added a private RuleBasedCollator(Locale&) constructor,
     31 *                          to be used by Collator::createDefault().  General
     32 *                          clean up.
     33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     34 *                          constructor and getDynamicClassID.
     35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     36 *                          specifying whether or not binary loading is to be
     37 *                          attempted.  This is required for dynamic rule loading.
     38 * 05/07/97     helena      Added memory allocation error detection.
     39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     40 *                          use MergeCollation::getPattern.
     41 *  6/20/97     helena      Java class name change.
     42 *  8/18/97     helena      Added internal API documentation.
     43 * 09/03/97     helena      Added createCollationKeyValues().
     44 * 02/10/98     damiba      Added compare with "length" parameter
     45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     47 *                          Normalizer::EMode
     48 * 06/14/99     stephen     Removed kResourceBundleSuffix
     49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     50 *                          UnicodeString construction and special case for NO_OP.
     51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     52 *                          internal state management.
     53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     54 *                          to implementation file.
     55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     56 *                          (ucol.h)
     57 */
     58 
     59 #ifndef TBLCOLL_H
     60 #define TBLCOLL_H
     61 
     62 #include "unicode/utypes.h"
     63 
     64 
     65 #if !UCONFIG_NO_COLLATION
     66 
     67 #include "unicode/coll.h"
     68 #include "unicode/ucol.h"
     69 #include "unicode/sortkey.h"
     70 #include "unicode/normlzr.h"
     71 
     72 U_NAMESPACE_BEGIN
     73 
     74 /**
     75 * @stable ICU 2.0
     76 */
     77 class StringSearch;
     78 /**
     79 * @stable ICU 2.0
     80 */
     81 class CollationElementIterator;
     82 
     83 /**
     84  * The RuleBasedCollator class provides the simple implementation of
     85  * Collator, using data-driven tables. The user can create a customized
     86  * table-based collation.
     87  * <P>
     88  * <em>Important: </em>The ICU collation service has been reimplemented
     89  * in order to achieve better performance and UCA compliance.
     90  * For details, see the
     91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
     92  * collation design document</a>.
     93  * <p>
     94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
     95  * <p>
     96  * For more information about the collation service see
     97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
     98  * <p>
     99  * Collation service provides correct sorting orders for most locales supported in ICU.
    100  * If specific data for a locale is not available, the orders eventually falls back
    101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
    102  * <p>
    103  * Sort ordering may be customized by providing your own set of rules. For more on
    104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
    105  * Collation customization</a> section of the users guide.
    106  * <p>
    107  * Note, RuleBasedCollator is not to be subclassed.
    108  * @see        Collator
    109  * @version    2.0 11/15/2001
    110  */
    111 class U_I18N_API RuleBasedCollator : public Collator
    112 {
    113 public:
    114 
    115   // constructor -------------------------------------------------------------
    116 
    117     /**
    118      * RuleBasedCollator constructor. This takes the table rules and builds a
    119      * collation table out of them. Please see RuleBasedCollator class
    120      * description for more details on the collation rule syntax.
    121      * @param rules the collation rules to build the collation table from.
    122      * @param status reporting a success or an error.
    123      * @see Locale
    124      * @stable ICU 2.0
    125      */
    126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    127 
    128     /**
    129      * RuleBasedCollator constructor. This takes the table rules and builds a
    130      * collation table out of them. Please see RuleBasedCollator class
    131      * description for more details on the collation rule syntax.
    132      * @param rules the collation rules to build the collation table from.
    133      * @param collationStrength default strength for comparison
    134      * @param status reporting a success or an error.
    135      * @see Locale
    136      * @stable ICU 2.0
    137      */
    138     RuleBasedCollator(const UnicodeString& rules,
    139                        ECollationStrength collationStrength,
    140                        UErrorCode& status);
    141 
    142     /**
    143      * RuleBasedCollator constructor. This takes the table rules and builds a
    144      * collation table out of them. Please see RuleBasedCollator class
    145      * description for more details on the collation rule syntax.
    146      * @param rules the collation rules to build the collation table from.
    147      * @param decompositionMode the normalisation mode
    148      * @param status reporting a success or an error.
    149      * @see Locale
    150      * @stable ICU 2.0
    151      */
    152     RuleBasedCollator(const UnicodeString& rules,
    153                     UColAttributeValue decompositionMode,
    154                     UErrorCode& status);
    155 
    156     /**
    157      * RuleBasedCollator constructor. This takes the table rules and builds a
    158      * collation table out of them. Please see RuleBasedCollator class
    159      * description for more details on the collation rule syntax.
    160      * @param rules the collation rules to build the collation table from.
    161      * @param collationStrength default strength for comparison
    162      * @param decompositionMode the normalisation mode
    163      * @param status reporting a success or an error.
    164      * @see Locale
    165      * @stable ICU 2.0
    166      */
    167     RuleBasedCollator(const UnicodeString& rules,
    168                     ECollationStrength collationStrength,
    169                     UColAttributeValue decompositionMode,
    170                     UErrorCode& status);
    171 
    172     /**
    173      * Copy constructor.
    174      * @param other the RuleBasedCollator object to be copied
    175      * @see Locale
    176      * @stable ICU 2.0
    177      */
    178     RuleBasedCollator(const RuleBasedCollator& other);
    179 
    180 
    181     /** Opens a collator from a collator binary image created using
    182     *  cloneBinary. Binary image used in instantiation of the
    183     *  collator remains owned by the user and should stay around for
    184     *  the lifetime of the collator. The API also takes a base collator
    185     *  which usualy should be UCA.
    186     *  @param bin binary image owned by the user and required through the
    187     *             lifetime of the collator
    188     *  @param length size of the image. If negative, the API will try to
    189     *                figure out the length of the image
    190     *  @param base fallback collator, usually UCA. Base is required to be
    191     *              present through the lifetime of the collator. Currently
    192     *              it cannot be NULL.
    193     *  @param status for catching errors
    194     *  @return newly created collator
    195     *  @see cloneBinary
    196     *  @stable ICU 3.4
    197     */
    198     RuleBasedCollator(const uint8_t *bin, int32_t length,
    199                     const RuleBasedCollator *base,
    200                     UErrorCode &status);
    201     // destructor --------------------------------------------------------------
    202 
    203     /**
    204      * Destructor.
    205      * @stable ICU 2.0
    206      */
    207     virtual ~RuleBasedCollator();
    208 
    209     // public methods ----------------------------------------------------------
    210 
    211     /**
    212      * Assignment operator.
    213      * @param other other RuleBasedCollator object to compare with.
    214      * @stable ICU 2.0
    215      */
    216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    217 
    218     /**
    219      * Returns true if argument is the same as this object.
    220      * @param other Collator object to be compared.
    221      * @return true if arguments is the same as this object.
    222      * @stable ICU 2.0
    223      */
    224     virtual UBool operator==(const Collator& other) const;
    225 
    226     /**
    227      * Returns true if argument is not the same as this object.
    228      * @param other Collator object to be compared
    229      * @return returns true if argument is not the same as this object.
    230      * @stable ICU 2.0
    231      */
    232     virtual UBool operator!=(const Collator& other) const;
    233 
    234     /**
    235      * Makes a deep copy of the object.
    236      * The caller owns the returned object.
    237      * @return the cloned object.
    238      * @stable ICU 2.0
    239      */
    240     virtual Collator* clone(void) const;
    241 
    242     /**
    243      * Creates a collation element iterator for the source string. The caller of
    244      * this method is responsible for the memory management of the return
    245      * pointer.
    246      * @param source the string over which the CollationElementIterator will
    247      *        iterate.
    248      * @return the collation element iterator of the source string using this as
    249      *         the based Collator.
    250      * @stable ICU 2.2
    251      */
    252     virtual CollationElementIterator* createCollationElementIterator(
    253                                            const UnicodeString& source) const;
    254 
    255     /**
    256      * Creates a collation element iterator for the source. The caller of this
    257      * method is responsible for the memory management of the returned pointer.
    258      * @param source the CharacterIterator which produces the characters over
    259      *        which the CollationElementItgerator will iterate.
    260      * @return the collation element iterator of the source using this as the
    261      *         based Collator.
    262      * @stable ICU 2.2
    263      */
    264     virtual CollationElementIterator* createCollationElementIterator(
    265                                          const CharacterIterator& source) const;
    266 
    267     /**
    268      * Compares a range of character data stored in two different strings based
    269      * on the collation rules. Returns information about whether a string is
    270      * less than, greater than or equal to another string in a language.
    271      * This can be overriden in a subclass.
    272      * @param source the source string.
    273      * @param target the target string to be compared with the source string.
    274      * @return the comparison result. GREATER if the source string is greater
    275      *         than the target string, LESS if the source is less than the
    276      *         target. Otherwise, returns EQUAL.
    277      * @deprecated ICU 2.6 Use overload with UErrorCode&
    278      */
    279     virtual EComparisonResult compare(const UnicodeString& source,
    280                                       const UnicodeString& target) const;
    281 
    282 
    283     /**
    284     * The comparison function compares the character data stored in two
    285     * different strings. Returns information about whether a string is less
    286     * than, greater than or equal to another string.
    287     * @param source the source string to be compared with.
    288     * @param target the string that is to be compared with the source string.
    289     * @param status possible error code
    290     * @return Returns an enum value. UCOL_GREATER if source is greater
    291     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    292     * than target
    293     * @stable ICU 2.6
    294     **/
    295     virtual UCollationResult compare(const UnicodeString& source,
    296                                       const UnicodeString& target,
    297                                       UErrorCode &status) const;
    298 
    299     /**
    300      * Compares a range of character data stored in two different strings based
    301      * on the collation rules up to the specified length. Returns information
    302      * about whether a string is less than, greater than or equal to another
    303      * string in a language. This can be overriden in a subclass.
    304      * @param source the source string.
    305      * @param target the target string to be compared with the source string.
    306      * @param length compares up to the specified length
    307      * @return the comparison result. GREATER if the source string is greater
    308      *         than the target string, LESS if the source is less than the
    309      *         target. Otherwise, returns EQUAL.
    310      * @deprecated ICU 2.6 Use overload with UErrorCode&
    311      */
    312     virtual EComparisonResult compare(const UnicodeString& source,
    313                                       const UnicodeString&  target,
    314                                       int32_t length) const;
    315 
    316     /**
    317     * Does the same thing as compare but limits the comparison to a specified
    318     * length
    319     * @param source the source string to be compared with.
    320     * @param target the string that is to be compared with the source string.
    321     * @param length the length the comparison is limited to
    322     * @param status possible error code
    323     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    324     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    325     *         length) is equal to target; UCOL_LESS if source (up to the specified
    326     *         length) is less  than target.
    327     * @stable ICU 2.6
    328     */
    329     virtual UCollationResult compare(const UnicodeString& source,
    330                                       const UnicodeString& target,
    331                                       int32_t length,
    332                                       UErrorCode &status) const;
    333 
    334     /**
    335      * The comparison function compares the character data stored in two
    336      * different string arrays. Returns information about whether a string array
    337      * is less than, greater than or equal to another string array.
    338      * <p>Example of use:
    339      * <pre>
    340      * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
    341      * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
    342      * .       UErrorCode status = U_ZERO_ERROR;
    343      * .       Collator *myCollation =
    344      * .                         Collator::createInstance(Locale::US, status);
    345      * .       if (U_FAILURE(status)) return;
    346      * .       myCollation->setStrength(Collator::PRIMARY);
    347      * .       // result would be Collator::EQUAL ("abc" == "ABC")
    348      * .       // (no primary difference between "abc" and "ABC")
    349      * .       Collator::EComparisonResult result =
    350      * .                             myCollation->compare(abc, 3, ABC, 3);
    351      * .       myCollation->setStrength(Collator::TERTIARY);
    352      * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
    353      * .       // (with tertiary difference between "abc" and "ABC")
    354      * .       result =  myCollation->compare(abc, 3, ABC, 3);
    355      * </pre>
    356      * @param source the source string array to be compared with.
    357      * @param sourceLength the length of the source string array. If this value
    358      *        is equal to -1, the string array is null-terminated.
    359      * @param target the string that is to be compared with the source string.
    360      * @param targetLength the length of the target string array. If this value
    361      *        is equal to -1, the string array is null-terminated.
    362      * @return Returns a byte value. GREATER if source is greater than target;
    363      *         EQUAL if source is equal to target; LESS if source is less than
    364      *         target
    365      * @deprecated ICU 2.6 Use overload with UErrorCode&
    366      */
    367     virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
    368                                       const UChar* target, int32_t targetLength)
    369                                       const;
    370 
    371     /**
    372     * The comparison function compares the character data stored in two
    373     * different string arrays. Returns information about whether a string array
    374     * is less than, greater than or equal to another string array.
    375     * @param source the source string array to be compared with.
    376     * @param sourceLength the length of the source string array.  If this value
    377     *        is equal to -1, the string array is null-terminated.
    378     * @param target the string that is to be compared with the source string.
    379     * @param targetLength the length of the target string array.  If this value
    380     *        is equal to -1, the string array is null-terminated.
    381     * @param status possible error code
    382     * @return Returns an enum value. UCOL_GREATER if source is greater
    383     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    384     * than target
    385     * @stable ICU 2.6
    386     */
    387     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    388                                       const UChar* target, int32_t targetLength,
    389                                       UErrorCode &status) const;
    390 
    391     /**
    392      * Compares two strings using the Collator.
    393      * Returns whether the first one compares less than/equal to/greater than
    394      * the second one.
    395      * This version takes UCharIterator input.
    396      * @param sIter the first ("source") string iterator
    397      * @param tIter the second ("target") string iterator
    398      * @param status ICU status
    399      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    400      * @stable ICU 4.2
    401      */
    402     virtual UCollationResult compare(UCharIterator &sIter,
    403                                      UCharIterator &tIter,
    404                                      UErrorCode &status) const;
    405 
    406     /**
    407     * Transforms a specified region of the string into a series of characters
    408     * that can be compared with CollationKey.compare. Use a CollationKey when
    409     * you need to do repeated comparisions on the same string. For a single
    410     * comparison the compare method will be faster.
    411     * @param source the source string.
    412     * @param key the transformed key of the source string.
    413     * @param status the error code status.
    414     * @return the transformed key.
    415     * @see CollationKey
    416     * @deprecated ICU 2.8 Use getSortKey(...) instead
    417     */
    418     virtual CollationKey& getCollationKey(const UnicodeString& source,
    419                                           CollationKey& key,
    420                                           UErrorCode& status) const;
    421 
    422     /**
    423     * Transforms a specified region of the string into a series of characters
    424     * that can be compared with CollationKey.compare. Use a CollationKey when
    425     * you need to do repeated comparisions on the same string. For a single
    426     * comparison the compare method will be faster.
    427     * @param source the source string.
    428     * @param sourceLength the length of the source string.
    429     * @param key the transformed key of the source string.
    430     * @param status the error code status.
    431     * @return the transformed key.
    432     * @see CollationKey
    433     * @deprecated ICU 2.8 Use getSortKey(...) instead
    434     */
    435     virtual CollationKey& getCollationKey(const UChar *source,
    436                                           int32_t sourceLength,
    437                                           CollationKey& key,
    438                                           UErrorCode& status) const;
    439 
    440     /**
    441      * Generates the hash code for the rule-based collation object.
    442      * @return the hash code.
    443      * @stable ICU 2.0
    444      */
    445     virtual int32_t hashCode(void) const;
    446 
    447     /**
    448     * Gets the locale of the Collator
    449     * @param type can be either requested, valid or actual locale. For more
    450     *             information see the definition of ULocDataLocaleType in
    451     *             uloc.h
    452     * @param status the error code status.
    453     * @return locale where the collation data lives. If the collator
    454     *         was instantiated from rules, locale is empty.
    455     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    456     */
    457     virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    458 
    459     /**
    460      * Gets the table-based rules for the collation object.
    461      * @return returns the collation rules that the table collation object was
    462      *         created from.
    463      * @stable ICU 2.0
    464      */
    465     const UnicodeString& getRules(void) const;
    466 
    467     /**
    468      * Gets the version information for a Collator.
    469      * @param info the version # information, the result will be filled in
    470      * @stable ICU 2.0
    471      */
    472     virtual void getVersion(UVersionInfo info) const;
    473 
    474     /**
    475      * Return the maximum length of any expansion sequences that end with the
    476      * specified comparison order.
    477      * @param order a collation order returned by previous or next.
    478      * @return maximum size of the expansion sequences ending with the collation
    479      *         element or 1 if collation element does not occur at the end of
    480      *         any expansion sequence
    481      * @see CollationElementIterator#getMaxExpansion
    482      * @stable ICU 2.0
    483      */
    484     int32_t getMaxExpansion(int32_t order) const;
    485 
    486     /**
    487      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    488      * method is to implement a simple version of RTTI, since not all C++
    489      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    490      * methods call this method.
    491      * @return The class ID for this object. All objects of a given class have
    492      *         the same class ID. Objects of other classes have different class
    493      *         IDs.
    494      * @stable ICU 2.0
    495      */
    496     virtual UClassID getDynamicClassID(void) const;
    497 
    498     /**
    499      * Returns the class ID for this class. This is useful only for comparing to
    500      * a return value from getDynamicClassID(). For example:
    501      * <pre>
    502      * Base* polymorphic_pointer = createPolymorphicObject();
    503      * if (polymorphic_pointer->getDynamicClassID() ==
    504      *                                          Derived::getStaticClassID()) ...
    505      * </pre>
    506      * @return The class ID for all objects of this class.
    507      * @stable ICU 2.0
    508      */
    509     static UClassID U_EXPORT2 getStaticClassID(void);
    510 
    511     /**
    512      * Returns the binary format of the class's rules. The format is that of
    513      * .col files.
    514      * @param length Returns the length of the data, in bytes
    515      * @param status the error code status.
    516      * @return memory, owned by the caller, of size 'length' bytes.
    517      * @stable ICU 2.2
    518      */
    519     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
    520 
    521 
    522     /** Creates a binary image of a collator. This binary image can be stored and
    523     *  later used to instantiate a collator using ucol_openBinary.
    524     *  This API supports preflighting.
    525     *  @param buffer a fill-in buffer to receive the binary image
    526     *  @param capacity capacity of the destination buffer
    527     *  @param status for catching errors
    528     *  @return size of the image
    529     *  @see ucol_openBinary
    530     *  @stable ICU 3.4
    531     */
    532     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
    533 
    534     /**
    535      * Returns current rules. Delta defines whether full rules are returned or
    536      * just the tailoring.
    537      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    538      * @param buffer UnicodeString to store the result rules
    539      * @stable ICU 2.2
    540      */
    541     void getRules(UColRuleOption delta, UnicodeString &buffer);
    542 
    543     /**
    544      * Universal attribute setter
    545      * @param attr attribute type
    546      * @param value attribute value
    547      * @param status to indicate whether the operation went on smoothly or there were errors
    548      * @stable ICU 2.2
    549      */
    550     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    551                               UErrorCode &status);
    552 
    553     /**
    554      * Universal attribute getter.
    555      * @param attr attribute type
    556      * @param status to indicate whether the operation went on smoothly or there were errors
    557      * @return attribute value
    558      * @stable ICU 2.2
    559      */
    560     virtual UColAttributeValue getAttribute(UColAttribute attr,
    561                                             UErrorCode &status);
    562 
    563     /**
    564      * Sets the variable top to a collation element value of a string supplied.
    565      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    566      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    567      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    568      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    569      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    570      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    571      * @stable ICU 2.0
    572      */
    573     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
    574 
    575     /**
    576      * Sets the variable top to a collation element value of a string supplied.
    577      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    578      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    579      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    580      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    581      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    582      * @stable ICU 2.0
    583      */
    584     virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
    585 
    586     /**
    587      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
    588      * Lower 16 bits are ignored.
    589      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
    590      * @param status error code (not changed by function)
    591      * @stable ICU 2.0
    592      */
    593     virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
    594 
    595     /**
    596      * Gets the variable top value of a Collator.
    597      * Lower 16 bits are undefined and should be ignored.
    598      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    599      * @stable ICU 2.0
    600      */
    601     virtual uint32_t getVariableTop(UErrorCode &status) const;
    602 
    603     /**
    604      * Get an UnicodeSet that contains all the characters and sequences tailored in
    605      * this collator.
    606      * @param status      error code of the operation
    607      * @return a pointer to a UnicodeSet object containing all the
    608      *         code points and sequences that may sort differently than
    609      *         in the UCA. The object must be disposed of by using delete
    610      * @stable ICU 2.4
    611      */
    612     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    613 
    614     /**
    615      * Thread safe cloning operation.
    616      * @return pointer to the new clone, user should remove it.
    617      * @stable ICU 2.2
    618      */
    619     virtual Collator* safeClone(void);
    620 
    621     /**
    622      * Get the sort key as an array of bytes from an UnicodeString.
    623      * @param source string to be processed.
    624      * @param result buffer to store result in. If NULL, number of bytes needed
    625      *        will be returned.
    626      * @param resultLength length of the result buffer. If if not enough the
    627      *        buffer will be filled to capacity.
    628      * @return Number of bytes needed for storing the sort key
    629      * @stable ICU 2.0
    630      */
    631     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    632                                int32_t resultLength) const;
    633 
    634     /**
    635      * Get the sort key as an array of bytes from an UChar buffer.
    636      * @param source string to be processed.
    637      * @param sourceLength length of string to be processed. If -1, the string
    638      *        is 0 terminated and length will be decided by the function.
    639      * @param result buffer to store result in. If NULL, number of bytes needed
    640      *        will be returned.
    641      * @param resultLength length of the result buffer. If if not enough the
    642      *        buffer will be filled to capacity.
    643      * @return Number of bytes needed for storing the sort key
    644      * @stable ICU 2.2
    645      */
    646     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
    647                                uint8_t *result, int32_t resultLength) const;
    648 
    649     /**
    650     * Determines the minimum strength that will be use in comparison or
    651     * transformation.
    652     * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
    653     * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
    654     * are ignored.
    655     * @return the current comparison level.
    656     * @see RuleBasedCollator#setStrength
    657     * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
    658     */
    659     virtual ECollationStrength getStrength(void) const;
    660 
    661     /**
    662     * Sets the minimum strength to be used in comparison or transformation.
    663     * @see RuleBasedCollator#getStrength
    664     * @param newStrength the new comparison level.
    665     * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
    666     */
    667     virtual void setStrength(ECollationStrength newStrength);
    668 
    669     /**
    670      * Retrieves the reordering codes for this collator.
    671      * @param dest The array to fill with the script ordering.
    672      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    673      *  will only return the length of the result without writing any of the result string (pre-flighting).
    674      * @param status A reference to an error code value, which must not indicate
    675      * a failure before the function call.
    676      * @return The length of the script ordering array.
    677      * @see ucol_setReorderCodes
    678      * @see Collator#getEquivalentReorderCodes
    679      * @see Collator#setReorderCodes
    680      * @draft ICU 4.8
    681      */
    682      virtual int32_t U_EXPORT2 getReorderCodes(int32_t *dest,
    683                                     int32_t destCapacity,
    684                                     UErrorCode& status) const;
    685 
    686     /**
    687      * Sets the ordering of scripts for this collator.
    688      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    689      * length is also set to 0. An empty array will clear any reordering codes on the collator.
    690      * @param reorderCodesLength The length of reorderCodes.
    691      * @param status error code
    692      * @see Collator#getReorderCodes
    693      * @see Collator#getEquivalentReorderCodes
    694      * @draft ICU 4.8
    695      */
    696      virtual void U_EXPORT2 setReorderCodes(const int32_t* reorderCodes,
    697                                 int32_t reorderCodesLength,
    698                                 UErrorCode& status) ;
    699 
    700     /**
    701      * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
    702      * codes will be grouped and must reorder together.
    703      * @param reorderCode The reorder code to determine equivalence for.
    704      * @param dest The array to fill with the script equivalene reordering codes.
    705      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
    706      * function will only return the length of the result without writing any of the result
    707      * string (pre-flighting).
    708      * @param status A reference to an error code value, which must not indicate
    709      * a failure before the function call.
    710      * @return The length of the of the reordering code equivalence array.
    711      * @see ucol_setReorderCodes
    712      * @see Collator#getReorderCodes
    713      * @see Collator#setReorderCodes
    714      * @draft ICU 4.8
    715      */
    716     static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
    717                                 int32_t* dest,
    718                                 int32_t destCapacity,
    719                                 UErrorCode& status);
    720 
    721 
    722 private:
    723 
    724     // private static constants -----------------------------------------------
    725 
    726     enum {
    727         /* need look up in .commit() */
    728         CHARINDEX = 0x70000000,
    729         /* Expand index follows */
    730         EXPANDCHARINDEX = 0x7E000000,
    731         /* contract indexes follows */
    732         CONTRACTCHARINDEX = 0x7F000000,
    733         /* unmapped character values */
    734         UNMAPPED = 0xFFFFFFFF,
    735         /* primary strength increment */
    736         PRIMARYORDERINCREMENT = 0x00010000,
    737         /* secondary strength increment */
    738         SECONDARYORDERINCREMENT = 0x00000100,
    739         /* tertiary strength increment */
    740         TERTIARYORDERINCREMENT = 0x00000001,
    741         /* mask off anything but primary order */
    742         PRIMARYORDERMASK = 0xffff0000,
    743         /* mask off anything but secondary order */
    744         SECONDARYORDERMASK = 0x0000ff00,
    745         /* mask off anything but tertiary order */
    746         TERTIARYORDERMASK = 0x000000ff,
    747         /* mask off ignorable char order */
    748         IGNORABLEMASK = 0x0000ffff,
    749         /* use only the primary difference */
    750         PRIMARYDIFFERENCEONLY = 0xffff0000,
    751         /* use only the primary and secondary difference */
    752         SECONDARYDIFFERENCEONLY = 0xffffff00,
    753         /* primary order shift */
    754         PRIMARYORDERSHIFT = 16,
    755         /* secondary order shift */
    756         SECONDARYORDERSHIFT = 8,
    757         /* starting value for collation elements */
    758         COLELEMENTSTART = 0x02020202,
    759         /* testing mask for primary low element */
    760         PRIMARYLOWZEROMASK = 0x00FF0000,
    761         /* reseting value for secondaries and tertiaries */
    762         RESETSECONDARYTERTIARY = 0x00000202,
    763         /* reseting value for tertiaries */
    764         RESETTERTIARY = 0x00000002,
    765 
    766         PRIMIGNORABLE = 0x0202
    767     };
    768 
    769     // private data members ---------------------------------------------------
    770 
    771     UBool dataIsOwned;
    772 
    773     UBool isWriteThroughAlias;
    774 
    775     /**
    776     * c struct for collation. All initialisation for it has to be done through
    777     * setUCollator().
    778     */
    779     UCollator *ucollator;
    780 
    781     /**
    782     * Rule UnicodeString
    783     */
    784     UnicodeString urulestring;
    785 
    786     // friend classes --------------------------------------------------------
    787 
    788     /**
    789     * Used to iterate over collation elements in a character source.
    790     */
    791     friend class CollationElementIterator;
    792 
    793     /**
    794     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
    795     *                                                       UErrorCode&)
    796     */
    797     friend class Collator;
    798 
    799     /**
    800     * Searching over collation elements in a character source
    801     */
    802     friend class StringSearch;
    803 
    804     // private constructors --------------------------------------------------
    805 
    806     /**
    807      * Default constructor
    808      */
    809     RuleBasedCollator();
    810 
    811     /**
    812      * RuleBasedCollator constructor. This constructor takes a locale. The
    813      * only caller of this class should be Collator::createInstance(). If
    814      * createInstance() happens to know that the requested locale's collation is
    815      * implemented as a RuleBasedCollator, it can then call this constructor.
    816      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
    817      * COLLATION TABLE. It does this by falling back to defaults.
    818      * @param desiredLocale locale used
    819      * @param status error code status
    820      */
    821     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
    822 
    823     /**
    824      * common constructor implementation
    825      *
    826      * @param rules the collation rules to build the collation table from.
    827      * @param collationStrength default strength for comparison
    828      * @param decompositionMode the normalisation mode
    829      * @param status reporting a success or an error.
    830      */
    831     void
    832     construct(const UnicodeString& rules,
    833               UColAttributeValue collationStrength,
    834               UColAttributeValue decompositionMode,
    835               UErrorCode& status);
    836 
    837     // private methods -------------------------------------------------------
    838 
    839     /**
    840     * Creates the c struct for ucollator
    841     * @param locale desired locale
    842     * @param status error status
    843     */
    844     void setUCollator(const Locale& locale, UErrorCode& status);
    845 
    846     /**
    847     * Creates the c struct for ucollator
    848     * @param locale desired locale name
    849     * @param status error status
    850     */
    851     void setUCollator(const char* locale, UErrorCode& status);
    852 
    853     /**
    854     * Creates the c struct for ucollator. This used internally by StringSearch.
    855     * Hence the responsibility of cleaning up the ucollator is not done by
    856     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
    857     * @param collator new ucollator data
    858     */
    859     void setUCollator(UCollator *collator);
    860 
    861 public:
    862     /**
    863     * Get UCollator data struct. Used only by StringSearch & intltest.
    864     * @return UCollator data struct
    865     * @internal
    866     */
    867     const UCollator * getUCollator();
    868 
    869 protected:
    870    /**
    871     * Used internally by registraton to define the requested and valid locales.
    872     * @param requestedLocale the requsted locale
    873     * @param validLocale the valid locale
    874     * @param actualLocale the actual locale
    875     * @internal
    876     */
    877     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    878 
    879 private:
    880 
    881     // if not owned and not a write through alias, copy the ucollator
    882     void checkOwned(void);
    883 
    884     // utility to init rule string used by checkOwned and construct
    885     void setRuleStringFromCollator();
    886 
    887     /**
    888     * Converts C's UCollationResult to EComparisonResult
    889     * @param result member of the enum UComparisonResult
    890     * @return EComparisonResult equivalent of UCollationResult
    891     * @deprecated ICU 2.6. We will not need it.
    892     */
    893     Collator::EComparisonResult getEComparisonResult(
    894                                             const UCollationResult &result) const;
    895 
    896     /**
    897     * Converts C's UCollationStrength to ECollationStrength
    898     * @param strength member of the enum UCollationStrength
    899     * @return ECollationStrength equivalent of UCollationStrength
    900     */
    901     Collator::ECollationStrength getECollationStrength(
    902                                         const UCollationStrength &strength) const;
    903 
    904     /**
    905     * Converts C++'s ECollationStrength to UCollationStrength
    906     * @param strength member of the enum ECollationStrength
    907     * @return UCollationStrength equivalent of ECollationStrength
    908     */
    909     UCollationStrength getUCollationStrength(
    910       const Collator::ECollationStrength &strength) const;
    911 };
    912 
    913 // inline method implementation ---------------------------------------------
    914 
    915 inline void RuleBasedCollator::setUCollator(const Locale &locale,
    916                                                UErrorCode &status)
    917 {
    918     setUCollator(locale.getName(), status);
    919 }
    920 
    921 
    922 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
    923 {
    924 
    925     if (ucollator && dataIsOwned) {
    926         ucol_close(ucollator);
    927     }
    928     ucollator   = collator;
    929     dataIsOwned = FALSE;
    930     isWriteThroughAlias = TRUE;
    931     setRuleStringFromCollator();
    932 }
    933 
    934 inline const UCollator * RuleBasedCollator::getUCollator()
    935 {
    936     return ucollator;
    937 }
    938 
    939 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
    940                                            const UCollationResult &result) const
    941 {
    942     switch (result)
    943     {
    944     case UCOL_LESS :
    945         return Collator::LESS;
    946     case UCOL_EQUAL :
    947         return Collator::EQUAL;
    948     default :
    949         return Collator::GREATER;
    950     }
    951 }
    952 
    953 inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
    954                                        const UCollationStrength &strength) const
    955 {
    956     switch (strength)
    957     {
    958     case UCOL_PRIMARY :
    959         return Collator::PRIMARY;
    960     case UCOL_SECONDARY :
    961         return Collator::SECONDARY;
    962     case UCOL_TERTIARY :
    963         return Collator::TERTIARY;
    964     case UCOL_QUATERNARY :
    965         return Collator::QUATERNARY;
    966     default :
    967         return Collator::IDENTICAL;
    968     }
    969 }
    970 
    971 inline UCollationStrength RuleBasedCollator::getUCollationStrength(
    972                              const Collator::ECollationStrength &strength) const
    973 {
    974     switch (strength)
    975     {
    976     case Collator::PRIMARY :
    977         return UCOL_PRIMARY;
    978     case Collator::SECONDARY :
    979         return UCOL_SECONDARY;
    980     case Collator::TERTIARY :
    981         return UCOL_TERTIARY;
    982     case Collator::QUATERNARY :
    983         return UCOL_QUATERNARY;
    984     default :
    985         return UCOL_IDENTICAL;
    986     }
    987 }
    988 
    989 U_NAMESPACE_END
    990 
    991 #endif /* #if !UCONFIG_NO_COLLATION */
    992 
    993 #endif
    994