Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1996-2015, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class.
     11  */
     12 
     13 /**
     14 * File tblcoll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     22 *                          constructor which reads RuleBasedCollator object from
     23 *                          a binary file.  Added writeToFile method which streams
     24 *                          RuleBasedCollator out to a binary file.  The streamIn
     25 *                          and streamOut methods use istream and ostream objects
     26 *                          in binary mode.
     27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     28 *                          hold invariant data.
     29 *  2/13/97     aliu        Moved several methods into this class from Collation.
     30 *                          Added a private RuleBasedCollator(Locale&) constructor,
     31 *                          to be used by Collator::createDefault().  General
     32 *                          clean up.
     33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     34 *                          constructor and getDynamicClassID.
     35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     36 *                          specifying whether or not binary loading is to be
     37 *                          attempted.  This is required for dynamic rule loading.
     38 * 05/07/97     helena      Added memory allocation error detection.
     39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     40 *                          use MergeCollation::getPattern.
     41 *  6/20/97     helena      Java class name change.
     42 *  8/18/97     helena      Added internal API documentation.
     43 * 09/03/97     helena      Added createCollationKeyValues().
     44 * 02/10/98     damiba      Added compare with "length" parameter
     45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     47 *                          Normalizer::EMode
     48 * 06/14/99     stephen     Removed kResourceBundleSuffix
     49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     50 *                          UnicodeString construction and special case for NO_OP.
     51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     52 *                          internal state management.
     53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     54 *                          to implementation file.
     55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     56 *                          (ucol.h)
     57 * 2012-2014    markus      Rewritten in C++ again.
     58 */
     59 
     60 #ifndef TBLCOLL_H
     61 #define TBLCOLL_H
     62 
     63 #include "unicode/utypes.h"
     64 
     65 #if !UCONFIG_NO_COLLATION
     66 
     67 #include "unicode/coll.h"
     68 #include "unicode/locid.h"
     69 #include "unicode/uiter.h"
     70 #include "unicode/ucol.h"
     71 
     72 U_NAMESPACE_BEGIN
     73 
     74 struct CollationCacheEntry;
     75 struct CollationData;
     76 struct CollationSettings;
     77 struct CollationTailoring;
     78 /**
     79 * @stable ICU 2.0
     80 */
     81 class StringSearch;
     82 /**
     83 * @stable ICU 2.0
     84 */
     85 class CollationElementIterator;
     86 class CollationKey;
     87 class SortKeyByteSink;
     88 class UnicodeSet;
     89 class UnicodeString;
     90 class UVector64;
     91 
     92 /**
     93  * The RuleBasedCollator class provides the implementation of
     94  * Collator, using data-driven tables. The user can create a customized
     95  * table-based collation.
     96  * <p>
     97  * For more information about the collation service see
     98  * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
     99  * <p>
    100  * Collation service provides correct sorting orders for most locales supported in ICU.
    101  * If specific data for a locale is not available, the orders eventually falls back
    102  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
    103  * <p>
    104  * Sort ordering may be customized by providing your own set of rules. For more on
    105  * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
    106  * Collation Customization</a> section of the User Guide.
    107  * <p>
    108  * Note, RuleBasedCollator is not to be subclassed.
    109  * @see        Collator
    110  */
    111 class U_I18N_API RuleBasedCollator : public Collator {
    112 public:
    113     /**
    114      * RuleBasedCollator constructor. This takes the table rules and builds a
    115      * collation table out of them. Please see RuleBasedCollator class
    116      * description for more details on the collation rule syntax.
    117      * @param rules the collation rules to build the collation table from.
    118      * @param status reporting a success or an error.
    119      * @stable ICU 2.0
    120      */
    121     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    122 
    123     /**
    124      * RuleBasedCollator constructor. This takes the table rules and builds a
    125      * collation table out of them. Please see RuleBasedCollator class
    126      * description for more details on the collation rule syntax.
    127      * @param rules the collation rules to build the collation table from.
    128      * @param collationStrength strength for comparison
    129      * @param status reporting a success or an error.
    130      * @stable ICU 2.0
    131      */
    132     RuleBasedCollator(const UnicodeString& rules,
    133                        ECollationStrength collationStrength,
    134                        UErrorCode& status);
    135 
    136     /**
    137      * RuleBasedCollator constructor. This takes the table rules and builds a
    138      * collation table out of them. Please see RuleBasedCollator class
    139      * description for more details on the collation rule syntax.
    140      * @param rules the collation rules to build the collation table from.
    141      * @param decompositionMode the normalisation mode
    142      * @param status reporting a success or an error.
    143      * @stable ICU 2.0
    144      */
    145     RuleBasedCollator(const UnicodeString& rules,
    146                     UColAttributeValue decompositionMode,
    147                     UErrorCode& status);
    148 
    149     /**
    150      * RuleBasedCollator constructor. This takes the table rules and builds a
    151      * collation table out of them. Please see RuleBasedCollator class
    152      * description for more details on the collation rule syntax.
    153      * @param rules the collation rules to build the collation table from.
    154      * @param collationStrength strength for comparison
    155      * @param decompositionMode the normalisation mode
    156      * @param status reporting a success or an error.
    157      * @stable ICU 2.0
    158      */
    159     RuleBasedCollator(const UnicodeString& rules,
    160                     ECollationStrength collationStrength,
    161                     UColAttributeValue decompositionMode,
    162                     UErrorCode& status);
    163 
    164 #ifndef U_HIDE_INTERNAL_API
    165     /**
    166      * TODO: document & propose as public API
    167      * @internal
    168      */
    169     RuleBasedCollator(const UnicodeString &rules,
    170                       UParseError &parseError, UnicodeString &reason,
    171                       UErrorCode &errorCode);
    172 #endif  /* U_HIDE_INTERNAL_API */
    173 
    174     /**
    175      * Copy constructor.
    176      * @param other the RuleBasedCollator object to be copied
    177      * @stable ICU 2.0
    178      */
    179     RuleBasedCollator(const RuleBasedCollator& other);
    180 
    181 
    182     /** Opens a collator from a collator binary image created using
    183     *  cloneBinary. Binary image used in instantiation of the
    184     *  collator remains owned by the user and should stay around for
    185     *  the lifetime of the collator. The API also takes a base collator
    186     *  which must be the root collator.
    187     *  @param bin binary image owned by the user and required through the
    188     *             lifetime of the collator
    189     *  @param length size of the image. If negative, the API will try to
    190     *                figure out the length of the image
    191     *  @param base Base collator, for lookup of untailored characters.
    192     *              Must be the root collator, must not be NULL.
    193     *              The base is required to be present through the lifetime of the collator.
    194     *  @param status for catching errors
    195     *  @return newly created collator
    196     *  @see cloneBinary
    197     *  @stable ICU 3.4
    198     */
    199     RuleBasedCollator(const uint8_t *bin, int32_t length,
    200                     const RuleBasedCollator *base,
    201                     UErrorCode &status);
    202 
    203     /**
    204      * Destructor.
    205      * @stable ICU 2.0
    206      */
    207     virtual ~RuleBasedCollator();
    208 
    209     /**
    210      * Assignment operator.
    211      * @param other other RuleBasedCollator object to copy from.
    212      * @stable ICU 2.0
    213      */
    214     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    215 
    216     /**
    217      * Returns true if argument is the same as this object.
    218      * @param other Collator object to be compared.
    219      * @return true if arguments is the same as this object.
    220      * @stable ICU 2.0
    221      */
    222     virtual UBool operator==(const Collator& other) const;
    223 
    224     /**
    225      * Makes a copy of this object.
    226      * @return a copy of this object, owned by the caller
    227      * @stable ICU 2.0
    228      */
    229     virtual Collator* clone(void) const;
    230 
    231     /**
    232      * Creates a collation element iterator for the source string. The caller of
    233      * this method is responsible for the memory management of the return
    234      * pointer.
    235      * @param source the string over which the CollationElementIterator will
    236      *        iterate.
    237      * @return the collation element iterator of the source string using this as
    238      *         the based Collator.
    239      * @stable ICU 2.2
    240      */
    241     virtual CollationElementIterator* createCollationElementIterator(
    242                                            const UnicodeString& source) const;
    243 
    244     /**
    245      * Creates a collation element iterator for the source. The caller of this
    246      * method is responsible for the memory management of the returned pointer.
    247      * @param source the CharacterIterator which produces the characters over
    248      *        which the CollationElementItgerator will iterate.
    249      * @return the collation element iterator of the source using this as the
    250      *         based Collator.
    251      * @stable ICU 2.2
    252      */
    253     virtual CollationElementIterator* createCollationElementIterator(
    254                                          const CharacterIterator& source) const;
    255 
    256     // Make deprecated versions of Collator::compare() visible.
    257     using Collator::compare;
    258 
    259     /**
    260     * The comparison function compares the character data stored in two
    261     * different strings. Returns information about whether a string is less
    262     * than, greater than or equal to another string.
    263     * @param source the source string to be compared with.
    264     * @param target the string that is to be compared with the source string.
    265     * @param status possible error code
    266     * @return Returns an enum value. UCOL_GREATER if source is greater
    267     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    268     * than target
    269     * @stable ICU 2.6
    270     **/
    271     virtual UCollationResult compare(const UnicodeString& source,
    272                                      const UnicodeString& target,
    273                                      UErrorCode &status) const;
    274 
    275     /**
    276     * Does the same thing as compare but limits the comparison to a specified
    277     * length
    278     * @param source the source string to be compared with.
    279     * @param target the string that is to be compared with the source string.
    280     * @param length the length the comparison is limited to
    281     * @param status possible error code
    282     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    283     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    284     *         length) is equal to target; UCOL_LESS if source (up to the specified
    285     *         length) is less  than target.
    286     * @stable ICU 2.6
    287     */
    288     virtual UCollationResult compare(const UnicodeString& source,
    289                                      const UnicodeString& target,
    290                                      int32_t length,
    291                                      UErrorCode &status) const;
    292 
    293     /**
    294     * The comparison function compares the character data stored in two
    295     * different string arrays. Returns information about whether a string array
    296     * is less than, greater than or equal to another string array.
    297     * @param source the source string array to be compared with.
    298     * @param sourceLength the length of the source string array.  If this value
    299     *        is equal to -1, the string array is null-terminated.
    300     * @param target the string that is to be compared with the source string.
    301     * @param targetLength the length of the target string array.  If this value
    302     *        is equal to -1, the string array is null-terminated.
    303     * @param status possible error code
    304     * @return Returns an enum value. UCOL_GREATER if source is greater
    305     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    306     * than target
    307     * @stable ICU 2.6
    308     */
    309     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    310                                      const UChar* target, int32_t targetLength,
    311                                      UErrorCode &status) const;
    312 
    313     /**
    314      * Compares two strings using the Collator.
    315      * Returns whether the first one compares less than/equal to/greater than
    316      * the second one.
    317      * This version takes UCharIterator input.
    318      * @param sIter the first ("source") string iterator
    319      * @param tIter the second ("target") string iterator
    320      * @param status ICU status
    321      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    322      * @stable ICU 4.2
    323      */
    324     virtual UCollationResult compare(UCharIterator &sIter,
    325                                      UCharIterator &tIter,
    326                                      UErrorCode &status) const;
    327 
    328     /**
    329      * Compares two UTF-8 strings using the Collator.
    330      * Returns whether the first one compares less than/equal to/greater than
    331      * the second one.
    332      * This version takes UTF-8 input.
    333      * Note that a StringPiece can be implicitly constructed
    334      * from a std::string or a NUL-terminated const char * string.
    335      * @param source the first UTF-8 string
    336      * @param target the second UTF-8 string
    337      * @param status ICU status
    338      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    339      * @stable ICU 51
    340      */
    341     virtual UCollationResult compareUTF8(const StringPiece &source,
    342                                          const StringPiece &target,
    343                                          UErrorCode &status) const;
    344 
    345     /**
    346      * Transforms the string into a series of characters
    347      * that can be compared with CollationKey.compare().
    348      *
    349      * Note that sort keys are often less efficient than simply doing comparison.
    350      * For more details, see the ICU User Guide.
    351      *
    352      * @param source the source string.
    353      * @param key the transformed key of the source string.
    354      * @param status the error code status.
    355      * @return the transformed key.
    356      * @see CollationKey
    357      * @stable ICU 2.0
    358      */
    359     virtual CollationKey& getCollationKey(const UnicodeString& source,
    360                                           CollationKey& key,
    361                                           UErrorCode& status) const;
    362 
    363     /**
    364      * Transforms a specified region of the string into a series of characters
    365      * that can be compared with CollationKey.compare.
    366      *
    367      * Note that sort keys are often less efficient than simply doing comparison.
    368      * For more details, see the ICU User Guide.
    369      *
    370      * @param source the source string.
    371      * @param sourceLength the length of the source string.
    372      * @param key the transformed key of the source string.
    373      * @param status the error code status.
    374      * @return the transformed key.
    375      * @see CollationKey
    376      * @stable ICU 2.0
    377      */
    378     virtual CollationKey& getCollationKey(const UChar *source,
    379                                           int32_t sourceLength,
    380                                           CollationKey& key,
    381                                           UErrorCode& status) const;
    382 
    383     /**
    384      * Generates the hash code for the rule-based collation object.
    385      * @return the hash code.
    386      * @stable ICU 2.0
    387      */
    388     virtual int32_t hashCode() const;
    389 
    390     /**
    391     * Gets the locale of the Collator
    392     * @param type can be either requested, valid or actual locale. For more
    393     *             information see the definition of ULocDataLocaleType in
    394     *             uloc.h
    395     * @param status the error code status.
    396     * @return locale where the collation data lives. If the collator
    397     *         was instantiated from rules, locale is empty.
    398     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    399     */
    400     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    401 
    402     /**
    403      * Gets the tailoring rules for this collator.
    404      * @return the collation tailoring from which this collator was created
    405      * @stable ICU 2.0
    406      */
    407     const UnicodeString& getRules() const;
    408 
    409     /**
    410      * Gets the version information for a Collator.
    411      * @param info the version # information, the result will be filled in
    412      * @stable ICU 2.0
    413      */
    414     virtual void getVersion(UVersionInfo info) const;
    415 
    416 #ifndef U_HIDE_DEPRECATED_API
    417     /**
    418      * Returns the maximum length of any expansion sequences that end with the
    419      * specified comparison order.
    420      *
    421      * This is specific to the kind of collation element values and sequences
    422      * returned by the CollationElementIterator.
    423      * Call CollationElementIterator::getMaxExpansion() instead.
    424      *
    425      * @param order a collation order returned by CollationElementIterator::previous
    426      *              or CollationElementIterator::next.
    427      * @return maximum size of the expansion sequences ending with the collation
    428      *         element, or 1 if the collation element does not occur at the end of
    429      *         any expansion sequence
    430      * @see CollationElementIterator#getMaxExpansion
    431      * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
    432      */
    433     int32_t getMaxExpansion(int32_t order) const;
    434 #endif  /* U_HIDE_DEPRECATED_API */
    435 
    436     /**
    437      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    438      * method is to implement a simple version of RTTI, since not all C++
    439      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    440      * methods call this method.
    441      * @return The class ID for this object. All objects of a given class have
    442      *         the same class ID. Objects of other classes have different class
    443      *         IDs.
    444      * @stable ICU 2.0
    445      */
    446     virtual UClassID getDynamicClassID(void) const;
    447 
    448     /**
    449      * Returns the class ID for this class. This is useful only for comparing to
    450      * a return value from getDynamicClassID(). For example:
    451      * <pre>
    452      * Base* polymorphic_pointer = createPolymorphicObject();
    453      * if (polymorphic_pointer->getDynamicClassID() ==
    454      *                                          Derived::getStaticClassID()) ...
    455      * </pre>
    456      * @return The class ID for all objects of this class.
    457      * @stable ICU 2.0
    458      */
    459     static UClassID U_EXPORT2 getStaticClassID(void);
    460 
    461 #ifndef U_HIDE_DEPRECATED_API
    462     /**
    463      * Do not use this method: The caller and the ICU library might use different heaps.
    464      * Use cloneBinary() instead which writes to caller-provided memory.
    465      *
    466      * Returns a binary format of this collator.
    467      * @param length Returns the length of the data, in bytes
    468      * @param status the error code status.
    469      * @return memory, owned by the caller, of size 'length' bytes.
    470      * @deprecated ICU 52. Use cloneBinary() instead.
    471      */
    472     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
    473 #endif  /* U_HIDE_DEPRECATED_API */
    474 
    475     /** Creates a binary image of a collator. This binary image can be stored and
    476     *  later used to instantiate a collator using ucol_openBinary.
    477     *  This API supports preflighting.
    478     *  @param buffer a fill-in buffer to receive the binary image
    479     *  @param capacity capacity of the destination buffer
    480     *  @param status for catching errors
    481     *  @return size of the image
    482     *  @see ucol_openBinary
    483     *  @stable ICU 3.4
    484     */
    485     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
    486 
    487     /**
    488      * Returns current rules. Delta defines whether full rules are returned or
    489      * just the tailoring.
    490      *
    491      * getRules(void) should normally be used instead.
    492      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
    493      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    494      * @param buffer UnicodeString to store the result rules
    495      * @stable ICU 2.2
    496      * @see UCOL_FULL_RULES
    497      */
    498     void getRules(UColRuleOption delta, UnicodeString &buffer) const;
    499 
    500     /**
    501      * Universal attribute setter
    502      * @param attr attribute type
    503      * @param value attribute value
    504      * @param status to indicate whether the operation went on smoothly or there were errors
    505      * @stable ICU 2.2
    506      */
    507     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    508                               UErrorCode &status);
    509 
    510     /**
    511      * Universal attribute getter.
    512      * @param attr attribute type
    513      * @param status to indicate whether the operation went on smoothly or there were errors
    514      * @return attribute value
    515      * @stable ICU 2.2
    516      */
    517     virtual UColAttributeValue getAttribute(UColAttribute attr,
    518                                             UErrorCode &status) const;
    519 
    520     /**
    521      * Sets the variable top to the top of the specified reordering group.
    522      * The variable top determines the highest-sorting character
    523      * which is affected by UCOL_ALTERNATE_HANDLING.
    524      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
    525      * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
    526      *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
    527      *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
    528      * @param errorCode Standard ICU error code. Its input value must
    529      *                  pass the U_SUCCESS() test, or else the function returns
    530      *                  immediately. Check for U_FAILURE() on output or use with
    531      *                  function chaining. (See User Guide for details.)
    532      * @return *this
    533      * @see getMaxVariable
    534      * @stable ICU 53
    535      */
    536     virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
    537 
    538     /**
    539      * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
    540      * @return the maximum variable reordering group.
    541      * @see setMaxVariable
    542      * @stable ICU 53
    543      */
    544     virtual UColReorderCode getMaxVariable() const;
    545 
    546     /**
    547      * Sets the variable top to the primary weight of the specified string.
    548      *
    549      * Beginning with ICU 53, the variable top is pinned to
    550      * the top of one of the supported reordering groups,
    551      * and it must not be beyond the last of those groups.
    552      * See setMaxVariable().
    553      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    554      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    555      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    556      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
    557      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
    558      *    the last reordering group supported by setMaxVariable()
    559      * @return variable top primary weight
    560      * @deprecated ICU 53 Call setMaxVariable() instead.
    561      */
    562     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
    563 
    564     /**
    565      * Sets the variable top to the primary weight of the specified string.
    566      *
    567      * Beginning with ICU 53, the variable top is pinned to
    568      * the top of one of the supported reordering groups,
    569      * and it must not be beyond the last of those groups.
    570      * See setMaxVariable().
    571      * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    572      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    573      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
    574      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
    575      *    the last reordering group supported by setMaxVariable()
    576      * @return variable top primary weight
    577      * @deprecated ICU 53 Call setMaxVariable() instead.
    578      */
    579     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
    580 
    581     /**
    582      * Sets the variable top to the specified primary weight.
    583      *
    584      * Beginning with ICU 53, the variable top is pinned to
    585      * the top of one of the supported reordering groups,
    586      * and it must not be beyond the last of those groups.
    587      * See setMaxVariable().
    588      * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop
    589      * @param status error code
    590      * @deprecated ICU 53 Call setMaxVariable() instead.
    591      */
    592     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
    593 
    594     /**
    595      * Gets the variable top value of a Collator.
    596      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    597      * @return the variable top primary weight
    598      * @see getMaxVariable
    599      * @stable ICU 2.0
    600      */
    601     virtual uint32_t getVariableTop(UErrorCode &status) const;
    602 
    603     /**
    604      * Get a UnicodeSet that contains all the characters and sequences tailored in
    605      * this collator.
    606      * @param status      error code of the operation
    607      * @return a pointer to a UnicodeSet object containing all the
    608      *         code points and sequences that may sort differently than
    609      *         in the root collator. The object must be disposed of by using delete
    610      * @stable ICU 2.4
    611      */
    612     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    613 
    614     /**
    615      * Get the sort key as an array of bytes from a UnicodeString.
    616      *
    617      * Note that sort keys are often less efficient than simply doing comparison.
    618      * For more details, see the ICU User Guide.
    619      *
    620      * @param source string to be processed.
    621      * @param result buffer to store result in. If NULL, number of bytes needed
    622      *        will be returned.
    623      * @param resultLength length of the result buffer. If if not enough the
    624      *        buffer will be filled to capacity.
    625      * @return Number of bytes needed for storing the sort key
    626      * @stable ICU 2.0
    627      */
    628     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    629                                int32_t resultLength) const;
    630 
    631     /**
    632      * Get the sort key as an array of bytes from a UChar buffer.
    633      *
    634      * Note that sort keys are often less efficient than simply doing comparison.
    635      * For more details, see the ICU User Guide.
    636      *
    637      * @param source string to be processed.
    638      * @param sourceLength length of string to be processed. If -1, the string
    639      *        is 0 terminated and length will be decided by the function.
    640      * @param result buffer to store result in. If NULL, number of bytes needed
    641      *        will be returned.
    642      * @param resultLength length of the result buffer. If if not enough the
    643      *        buffer will be filled to capacity.
    644      * @return Number of bytes needed for storing the sort key
    645      * @stable ICU 2.2
    646      */
    647     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
    648                                uint8_t *result, int32_t resultLength) const;
    649 
    650     /**
    651      * Retrieves the reordering codes for this collator.
    652      * @param dest The array to fill with the script ordering.
    653      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    654      *  will only return the length of the result without writing any codes (pre-flighting).
    655      * @param status A reference to an error code value, which must not indicate
    656      * a failure before the function call.
    657      * @return The length of the script ordering array.
    658      * @see ucol_setReorderCodes
    659      * @see Collator#getEquivalentReorderCodes
    660      * @see Collator#setReorderCodes
    661      * @stable ICU 4.8
    662      */
    663      virtual int32_t getReorderCodes(int32_t *dest,
    664                                      int32_t destCapacity,
    665                                      UErrorCode& status) const;
    666 
    667     /**
    668      * Sets the ordering of scripts for this collator.
    669      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    670      * length is also set to 0. An empty array will clear any reordering codes on the collator.
    671      * @param reorderCodesLength The length of reorderCodes.
    672      * @param status error code
    673      * @see ucol_setReorderCodes
    674      * @see Collator#getReorderCodes
    675      * @see Collator#getEquivalentReorderCodes
    676      * @stable ICU 4.8
    677      */
    678      virtual void setReorderCodes(const int32_t* reorderCodes,
    679                                   int32_t reorderCodesLength,
    680                                   UErrorCode& status) ;
    681 
    682     /**
    683      * Implements ucol_strcollUTF8().
    684      * @internal
    685      */
    686     virtual UCollationResult internalCompareUTF8(
    687             const char *left, int32_t leftLength,
    688             const char *right, int32_t rightLength,
    689             UErrorCode &errorCode) const;
    690 
    691     /** Get the short definition string for a collator. This internal API harvests the collator's
    692      *  locale and the attribute set and produces a string that can be used for opening
    693      *  a collator with the same attributes using the ucol_openFromShortString API.
    694      *  This string will be normalized.
    695      *  The structure and the syntax of the string is defined in the "Naming collators"
    696      *  section of the users guide:
    697      *  http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
    698      *  This function supports preflighting.
    699      *
    700      *  This is internal, and intended to be used with delegate converters.
    701      *
    702      *  @param locale a locale that will appear as a collators locale in the resulting
    703      *                short string definition. If NULL, the locale will be harvested
    704      *                from the collator.
    705      *  @param buffer space to hold the resulting string
    706      *  @param capacity capacity of the buffer
    707      *  @param status for returning errors. All the preflighting errors are featured
    708      *  @return length of the resulting string
    709      *  @see ucol_openFromShortString
    710      *  @see ucol_normalizeShortDefinitionString
    711      *  @see ucol_getShortDefinitionString
    712      *  @internal
    713      */
    714     virtual int32_t internalGetShortDefinitionString(const char *locale,
    715                                                      char *buffer,
    716                                                      int32_t capacity,
    717                                                      UErrorCode &status) const;
    718 
    719     /**
    720      * Implements ucol_nextSortKeyPart().
    721      * @internal
    722      */
    723     virtual int32_t internalNextSortKeyPart(
    724             UCharIterator *iter, uint32_t state[2],
    725             uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
    726 
    727     /**
    728      * Only for use in ucol_openRules().
    729      * @internal
    730      */
    731     RuleBasedCollator();
    732 
    733 #ifndef U_HIDE_INTERNAL_API
    734     /**
    735      * Implements ucol_getLocaleByType().
    736      * Needed because the lifetime of the locale ID string must match that of the collator.
    737      * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper.
    738      * @internal
    739      */
    740     const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
    741 
    742     /**
    743      * Implements ucol_getContractionsAndExpansions().
    744      * Gets this collator's sets of contraction strings and/or
    745      * characters and strings that map to multiple collation elements (expansions).
    746      * If addPrefixes is TRUE, then contractions that are expressed as
    747      * prefix/pre-context rules are included.
    748      * @param contractions if not NULL, the set to hold the contractions
    749      * @param expansions if not NULL, the set to hold the expansions
    750      * @param addPrefixes include prefix contextual mappings
    751      * @param errorCode in/out ICU error code
    752      * @internal
    753      */
    754     void internalGetContractionsAndExpansions(
    755             UnicodeSet *contractions, UnicodeSet *expansions,
    756             UBool addPrefixes, UErrorCode &errorCode) const;
    757 
    758     /**
    759      * Adds the contractions that start with character c to the set.
    760      * Ignores prefixes. Used by AlphabeticIndex.
    761      * @internal
    762      */
    763     void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
    764 
    765     /**
    766      * Implements from-rule constructors, and ucol_openRules().
    767      * @internal
    768      */
    769     void internalBuildTailoring(
    770             const UnicodeString &rules,
    771             int32_t strength,
    772             UColAttributeValue decompositionMode,
    773             UParseError *outParseError, UnicodeString *outReason,
    774             UErrorCode &errorCode);
    775 
    776     /** @internal */
    777     static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) {
    778         return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
    779     }
    780     /** @internal */
    781     static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
    782         return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
    783     }
    784 
    785     /**
    786      * Appends the CEs for the string to the vector.
    787      * @internal for tests & tools
    788      */
    789     void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
    790 #endif  // U_HIDE_INTERNAL_API
    791 
    792 protected:
    793    /**
    794     * Used internally by registration to define the requested and valid locales.
    795     * @param requestedLocale the requested locale
    796     * @param validLocale the valid locale
    797     * @param actualLocale the actual locale
    798     * @internal
    799     */
    800     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    801 
    802 private:
    803     friend class CollationElementIterator;
    804     friend class Collator;
    805 
    806     RuleBasedCollator(const CollationCacheEntry *entry);
    807 
    808     /**
    809      * Enumeration of attributes that are relevant for short definition strings
    810      * (e.g., ucol_getShortDefinitionString()).
    811      * Effectively extends UColAttribute.
    812      */
    813     enum Attributes {
    814         ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
    815         ATTR_LIMIT
    816     };
    817 
    818     void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
    819 
    820     // Both lengths must be <0 or else both must be >=0.
    821     UCollationResult doCompare(const UChar *left, int32_t leftLength,
    822                                const UChar *right, int32_t rightLength,
    823                                UErrorCode &errorCode) const;
    824     UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
    825                                const uint8_t *right, int32_t rightLength,
    826                                UErrorCode &errorCode) const;
    827 
    828     void writeSortKey(const UChar *s, int32_t length,
    829                       SortKeyByteSink &sink, UErrorCode &errorCode) const;
    830 
    831     void writeIdenticalLevel(const UChar *s, const UChar *limit,
    832                              SortKeyByteSink &sink, UErrorCode &errorCode) const;
    833 
    834     const CollationSettings &getDefaultSettings() const;
    835 
    836     void setAttributeDefault(int32_t attribute) {
    837         explicitlySetAttributes &= ~((uint32_t)1 << attribute);
    838     }
    839     void setAttributeExplicitly(int32_t attribute) {
    840         explicitlySetAttributes |= (uint32_t)1 << attribute;
    841     }
    842     UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
    843         // assert(0 <= attribute < ATTR_LIMIT);
    844         return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
    845     }
    846 
    847     /**
    848      * Tests whether a character is "unsafe" for use as a collation starting point.
    849      *
    850      * @param c code point or code unit
    851      * @return TRUE if c is unsafe
    852      * @see CollationElementIterator#setOffset(int)
    853      */
    854     UBool isUnsafe(UChar32 c) const;
    855 
    856     static void computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
    857     UBool initMaxExpansions(UErrorCode &errorCode) const;
    858 
    859     void setFastLatinOptions(CollationSettings &ownedSettings) const;
    860 
    861     const CollationData *data;
    862     const CollationSettings *settings;  // reference-counted
    863     const CollationTailoring *tailoring;  // alias of cacheEntry->tailoring
    864     const CollationCacheEntry *cacheEntry;  // reference-counted
    865     Locale validLocale;
    866     uint32_t explicitlySetAttributes;
    867 
    868     UBool actualLocaleIsSameAsValid;
    869 };
    870 
    871 U_NAMESPACE_END
    872 
    873 #endif  // !UCONFIG_NO_COLLATION
    874 #endif  // TBLCOLL_H
    875