Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 * Copyright (C) 1996-2016, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 ******************************************************************************
      8 */
      9 
     10 /**
     11  * \file
     12  * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class.
     13  */
     14 
     15 /**
     16 * File tblcoll.h
     17 *
     18 * Created by: Helena Shih
     19 *
     20 * Modification History:
     21 *
     22 *  Date        Name        Description
     23 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     24 *                          constructor which reads RuleBasedCollator object from
     25 *                          a binary file.  Added writeToFile method which streams
     26 *                          RuleBasedCollator out to a binary file.  The streamIn
     27 *                          and streamOut methods use istream and ostream objects
     28 *                          in binary mode.
     29 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     30 *                          hold invariant data.
     31 *  2/13/97     aliu        Moved several methods into this class from Collation.
     32 *                          Added a private RuleBasedCollator(Locale&) constructor,
     33 *                          to be used by Collator::createDefault().  General
     34 *                          clean up.
     35 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     36 *                          constructor and getDynamicClassID.
     37 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     38 *                          specifying whether or not binary loading is to be
     39 *                          attempted.  This is required for dynamic rule loading.
     40 * 05/07/97     helena      Added memory allocation error detection.
     41 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     42 *                          use MergeCollation::getPattern.
     43 *  6/20/97     helena      Java class name change.
     44 *  8/18/97     helena      Added internal API documentation.
     45 * 09/03/97     helena      Added createCollationKeyValues().
     46 * 02/10/98     damiba      Added compare with "length" parameter
     47 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     48 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     49 *                          Normalizer::EMode
     50 * 06/14/99     stephen     Removed kResourceBundleSuffix
     51 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     52 *                          UnicodeString construction and special case for NO_OP.
     53 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     54 *                          internal state management.
     55 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     56 *                          to implementation file.
     57 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     58 *                          (ucol.h)
     59 * 2012-2014    markus      Rewritten in C++ again.
     60 */
     61 
     62 #ifndef TBLCOLL_H
     63 #define TBLCOLL_H
     64 
     65 #include "unicode/utypes.h"
     66 
     67 #if !UCONFIG_NO_COLLATION
     68 
     69 #include "unicode/coll.h"
     70 #include "unicode/locid.h"
     71 #include "unicode/uiter.h"
     72 #include "unicode/ucol.h"
     73 
     74 U_NAMESPACE_BEGIN
     75 
     76 struct CollationCacheEntry;
     77 struct CollationData;
     78 struct CollationSettings;
     79 struct CollationTailoring;
     80 /**
     81 * @stable ICU 2.0
     82 */
     83 class StringSearch;
     84 /**
     85 * @stable ICU 2.0
     86 */
     87 class CollationElementIterator;
     88 class CollationKey;
     89 class SortKeyByteSink;
     90 class UnicodeSet;
     91 class UnicodeString;
     92 class UVector64;
     93 
     94 /**
     95  * The RuleBasedCollator class provides the implementation of
     96  * Collator, using data-driven tables. The user can create a customized
     97  * table-based collation.
     98  * <p>
     99  * For more information about the collation service see
    100  * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
    101  * <p>
    102  * Collation service provides correct sorting orders for most locales supported in ICU.
    103  * If specific data for a locale is not available, the orders eventually falls back
    104  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
    105  * <p>
    106  * Sort ordering may be customized by providing your own set of rules. For more on
    107  * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
    108  * Collation Customization</a> section of the User Guide.
    109  * <p>
    110  * Note, RuleBasedCollator is not to be subclassed.
    111  * @see        Collator
    112  */
    113 class U_I18N_API RuleBasedCollator : public Collator {
    114 public:
    115     /**
    116      * RuleBasedCollator constructor. This takes the table rules and builds a
    117      * collation table out of them. Please see RuleBasedCollator class
    118      * description for more details on the collation rule syntax.
    119      * @param rules the collation rules to build the collation table from.
    120      * @param status reporting a success or an error.
    121      * @stable ICU 2.0
    122      */
    123     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    124 
    125     /**
    126      * RuleBasedCollator constructor. This takes the table rules and builds a
    127      * collation table out of them. Please see RuleBasedCollator class
    128      * description for more details on the collation rule syntax.
    129      * @param rules the collation rules to build the collation table from.
    130      * @param collationStrength strength for comparison
    131      * @param status reporting a success or an error.
    132      * @stable ICU 2.0
    133      */
    134     RuleBasedCollator(const UnicodeString& rules,
    135                        ECollationStrength collationStrength,
    136                        UErrorCode& status);
    137 
    138     /**
    139      * RuleBasedCollator constructor. This takes the table rules and builds a
    140      * collation table out of them. Please see RuleBasedCollator class
    141      * description for more details on the collation rule syntax.
    142      * @param rules the collation rules to build the collation table from.
    143      * @param decompositionMode the normalisation mode
    144      * @param status reporting a success or an error.
    145      * @stable ICU 2.0
    146      */
    147     RuleBasedCollator(const UnicodeString& rules,
    148                     UColAttributeValue decompositionMode,
    149                     UErrorCode& status);
    150 
    151     /**
    152      * RuleBasedCollator constructor. This takes the table rules and builds a
    153      * collation table out of them. Please see RuleBasedCollator class
    154      * description for more details on the collation rule syntax.
    155      * @param rules the collation rules to build the collation table from.
    156      * @param collationStrength strength for comparison
    157      * @param decompositionMode the normalisation mode
    158      * @param status reporting a success or an error.
    159      * @stable ICU 2.0
    160      */
    161     RuleBasedCollator(const UnicodeString& rules,
    162                     ECollationStrength collationStrength,
    163                     UColAttributeValue decompositionMode,
    164                     UErrorCode& status);
    165 
    166 #ifndef U_HIDE_INTERNAL_API
    167     /**
    168      * TODO: document & propose as public API
    169      * @internal
    170      */
    171     RuleBasedCollator(const UnicodeString &rules,
    172                       UParseError &parseError, UnicodeString &reason,
    173                       UErrorCode &errorCode);
    174 #endif  /* U_HIDE_INTERNAL_API */
    175 
    176     /**
    177      * Copy constructor.
    178      * @param other the RuleBasedCollator object to be copied
    179      * @stable ICU 2.0
    180      */
    181     RuleBasedCollator(const RuleBasedCollator& other);
    182 
    183 
    184     /** Opens a collator from a collator binary image created using
    185     *  cloneBinary. Binary image used in instantiation of the
    186     *  collator remains owned by the user and should stay around for
    187     *  the lifetime of the collator. The API also takes a base collator
    188     *  which must be the root collator.
    189     *  @param bin binary image owned by the user and required through the
    190     *             lifetime of the collator
    191     *  @param length size of the image. If negative, the API will try to
    192     *                figure out the length of the image
    193     *  @param base Base collator, for lookup of untailored characters.
    194     *              Must be the root collator, must not be NULL.
    195     *              The base is required to be present through the lifetime of the collator.
    196     *  @param status for catching errors
    197     *  @return newly created collator
    198     *  @see cloneBinary
    199     *  @stable ICU 3.4
    200     */
    201     RuleBasedCollator(const uint8_t *bin, int32_t length,
    202                     const RuleBasedCollator *base,
    203                     UErrorCode &status);
    204 
    205     /**
    206      * Destructor.
    207      * @stable ICU 2.0
    208      */
    209     virtual ~RuleBasedCollator();
    210 
    211     /**
    212      * Assignment operator.
    213      * @param other other RuleBasedCollator object to copy from.
    214      * @stable ICU 2.0
    215      */
    216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    217 
    218     /**
    219      * Returns true if argument is the same as this object.
    220      * @param other Collator object to be compared.
    221      * @return true if arguments is the same as this object.
    222      * @stable ICU 2.0
    223      */
    224     virtual UBool operator==(const Collator& other) const;
    225 
    226     /**
    227      * Makes a copy of this object.
    228      * @return a copy of this object, owned by the caller
    229      * @stable ICU 2.0
    230      */
    231     virtual Collator* clone(void) const;
    232 
    233     /**
    234      * Creates a collation element iterator for the source string. The caller of
    235      * this method is responsible for the memory management of the return
    236      * pointer.
    237      * @param source the string over which the CollationElementIterator will
    238      *        iterate.
    239      * @return the collation element iterator of the source string using this as
    240      *         the based Collator.
    241      * @stable ICU 2.2
    242      */
    243     virtual CollationElementIterator* createCollationElementIterator(
    244                                            const UnicodeString& source) const;
    245 
    246     /**
    247      * Creates a collation element iterator for the source. The caller of this
    248      * method is responsible for the memory management of the returned pointer.
    249      * @param source the CharacterIterator which produces the characters over
    250      *        which the CollationElementItgerator will iterate.
    251      * @return the collation element iterator of the source using this as the
    252      *         based Collator.
    253      * @stable ICU 2.2
    254      */
    255     virtual CollationElementIterator* createCollationElementIterator(
    256                                          const CharacterIterator& source) const;
    257 
    258     // Make deprecated versions of Collator::compare() visible.
    259     using Collator::compare;
    260 
    261     /**
    262     * The comparison function compares the character data stored in two
    263     * different strings. Returns information about whether a string is less
    264     * than, greater than or equal to another string.
    265     * @param source the source string to be compared with.
    266     * @param target the string that is to be compared with the source string.
    267     * @param status possible error code
    268     * @return Returns an enum value. UCOL_GREATER if source is greater
    269     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    270     * than target
    271     * @stable ICU 2.6
    272     **/
    273     virtual UCollationResult compare(const UnicodeString& source,
    274                                      const UnicodeString& target,
    275                                      UErrorCode &status) const;
    276 
    277     /**
    278     * Does the same thing as compare but limits the comparison to a specified
    279     * length
    280     * @param source the source string to be compared with.
    281     * @param target the string that is to be compared with the source string.
    282     * @param length the length the comparison is limited to
    283     * @param status possible error code
    284     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    285     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    286     *         length) is equal to target; UCOL_LESS if source (up to the specified
    287     *         length) is less  than target.
    288     * @stable ICU 2.6
    289     */
    290     virtual UCollationResult compare(const UnicodeString& source,
    291                                      const UnicodeString& target,
    292                                      int32_t length,
    293                                      UErrorCode &status) const;
    294 
    295     /**
    296     * The comparison function compares the character data stored in two
    297     * different string arrays. Returns information about whether a string array
    298     * is less than, greater than or equal to another string array.
    299     * @param source the source string array to be compared with.
    300     * @param sourceLength the length of the source string array.  If this value
    301     *        is equal to -1, the string array is null-terminated.
    302     * @param target the string that is to be compared with the source string.
    303     * @param targetLength the length of the target string array.  If this value
    304     *        is equal to -1, the string array is null-terminated.
    305     * @param status possible error code
    306     * @return Returns an enum value. UCOL_GREATER if source is greater
    307     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    308     * than target
    309     * @stable ICU 2.6
    310     */
    311     virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
    312                                      const char16_t* target, int32_t targetLength,
    313                                      UErrorCode &status) const;
    314 
    315     /**
    316      * Compares two strings using the Collator.
    317      * Returns whether the first one compares less than/equal to/greater than
    318      * the second one.
    319      * This version takes UCharIterator input.
    320      * @param sIter the first ("source") string iterator
    321      * @param tIter the second ("target") string iterator
    322      * @param status ICU status
    323      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    324      * @stable ICU 4.2
    325      */
    326     virtual UCollationResult compare(UCharIterator &sIter,
    327                                      UCharIterator &tIter,
    328                                      UErrorCode &status) const;
    329 
    330     /**
    331      * Compares two UTF-8 strings using the Collator.
    332      * Returns whether the first one compares less than/equal to/greater than
    333      * the second one.
    334      * This version takes UTF-8 input.
    335      * Note that a StringPiece can be implicitly constructed
    336      * from a std::string or a NUL-terminated const char * string.
    337      * @param source the first UTF-8 string
    338      * @param target the second UTF-8 string
    339      * @param status ICU status
    340      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    341      * @stable ICU 51
    342      */
    343     virtual UCollationResult compareUTF8(const StringPiece &source,
    344                                          const StringPiece &target,
    345                                          UErrorCode &status) const;
    346 
    347     /**
    348      * Transforms the string into a series of characters
    349      * that can be compared with CollationKey.compare().
    350      *
    351      * Note that sort keys are often less efficient than simply doing comparison.
    352      * For more details, see the ICU User Guide.
    353      *
    354      * @param source the source string.
    355      * @param key the transformed key of the source string.
    356      * @param status the error code status.
    357      * @return the transformed key.
    358      * @see CollationKey
    359      * @stable ICU 2.0
    360      */
    361     virtual CollationKey& getCollationKey(const UnicodeString& source,
    362                                           CollationKey& key,
    363                                           UErrorCode& status) const;
    364 
    365     /**
    366      * Transforms a specified region of the string into a series of characters
    367      * that can be compared with CollationKey.compare.
    368      *
    369      * Note that sort keys are often less efficient than simply doing comparison.
    370      * For more details, see the ICU User Guide.
    371      *
    372      * @param source the source string.
    373      * @param sourceLength the length of the source string.
    374      * @param key the transformed key of the source string.
    375      * @param status the error code status.
    376      * @return the transformed key.
    377      * @see CollationKey
    378      * @stable ICU 2.0
    379      */
    380     virtual CollationKey& getCollationKey(const char16_t *source,
    381                                           int32_t sourceLength,
    382                                           CollationKey& key,
    383                                           UErrorCode& status) const;
    384 
    385     /**
    386      * Generates the hash code for the rule-based collation object.
    387      * @return the hash code.
    388      * @stable ICU 2.0
    389      */
    390     virtual int32_t hashCode() const;
    391 
    392     /**
    393     * Gets the locale of the Collator
    394     * @param type can be either requested, valid or actual locale. For more
    395     *             information see the definition of ULocDataLocaleType in
    396     *             uloc.h
    397     * @param status the error code status.
    398     * @return locale where the collation data lives. If the collator
    399     *         was instantiated from rules, locale is empty.
    400     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    401     */
    402     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    403 
    404     /**
    405      * Gets the tailoring rules for this collator.
    406      * @return the collation tailoring from which this collator was created
    407      * @stable ICU 2.0
    408      */
    409     const UnicodeString& getRules() const;
    410 
    411     /**
    412      * Gets the version information for a Collator.
    413      * @param info the version # information, the result will be filled in
    414      * @stable ICU 2.0
    415      */
    416     virtual void getVersion(UVersionInfo info) const;
    417 
    418 #ifndef U_HIDE_DEPRECATED_API
    419     /**
    420      * Returns the maximum length of any expansion sequences that end with the
    421      * specified comparison order.
    422      *
    423      * This is specific to the kind of collation element values and sequences
    424      * returned by the CollationElementIterator.
    425      * Call CollationElementIterator::getMaxExpansion() instead.
    426      *
    427      * @param order a collation order returned by CollationElementIterator::previous
    428      *              or CollationElementIterator::next.
    429      * @return maximum size of the expansion sequences ending with the collation
    430      *         element, or 1 if the collation element does not occur at the end of
    431      *         any expansion sequence
    432      * @see CollationElementIterator#getMaxExpansion
    433      * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
    434      */
    435     int32_t getMaxExpansion(int32_t order) const;
    436 #endif  /* U_HIDE_DEPRECATED_API */
    437 
    438     /**
    439      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    440      * method is to implement a simple version of RTTI, since not all C++
    441      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    442      * methods call this method.
    443      * @return The class ID for this object. All objects of a given class have
    444      *         the same class ID. Objects of other classes have different class
    445      *         IDs.
    446      * @stable ICU 2.0
    447      */
    448     virtual UClassID getDynamicClassID(void) const;
    449 
    450     /**
    451      * Returns the class ID for this class. This is useful only for comparing to
    452      * a return value from getDynamicClassID(). For example:
    453      * <pre>
    454      * Base* polymorphic_pointer = createPolymorphicObject();
    455      * if (polymorphic_pointer->getDynamicClassID() ==
    456      *                                          Derived::getStaticClassID()) ...
    457      * </pre>
    458      * @return The class ID for all objects of this class.
    459      * @stable ICU 2.0
    460      */
    461     static UClassID U_EXPORT2 getStaticClassID(void);
    462 
    463 #ifndef U_HIDE_DEPRECATED_API
    464     /**
    465      * Do not use this method: The caller and the ICU library might use different heaps.
    466      * Use cloneBinary() instead which writes to caller-provided memory.
    467      *
    468      * Returns a binary format of this collator.
    469      * @param length Returns the length of the data, in bytes
    470      * @param status the error code status.
    471      * @return memory, owned by the caller, of size 'length' bytes.
    472      * @deprecated ICU 52. Use cloneBinary() instead.
    473      */
    474     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
    475 #endif  /* U_HIDE_DEPRECATED_API */
    476 
    477     /** Creates a binary image of a collator. This binary image can be stored and
    478     *  later used to instantiate a collator using ucol_openBinary.
    479     *  This API supports preflighting.
    480     *  @param buffer a fill-in buffer to receive the binary image
    481     *  @param capacity capacity of the destination buffer
    482     *  @param status for catching errors
    483     *  @return size of the image
    484     *  @see ucol_openBinary
    485     *  @stable ICU 3.4
    486     */
    487     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
    488 
    489     /**
    490      * Returns current rules. Delta defines whether full rules are returned or
    491      * just the tailoring.
    492      *
    493      * getRules(void) should normally be used instead.
    494      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
    495      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    496      * @param buffer UnicodeString to store the result rules
    497      * @stable ICU 2.2
    498      * @see UCOL_FULL_RULES
    499      */
    500     void getRules(UColRuleOption delta, UnicodeString &buffer) const;
    501 
    502     /**
    503      * Universal attribute setter
    504      * @param attr attribute type
    505      * @param value attribute value
    506      * @param status to indicate whether the operation went on smoothly or there were errors
    507      * @stable ICU 2.2
    508      */
    509     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    510                               UErrorCode &status);
    511 
    512     /**
    513      * Universal attribute getter.
    514      * @param attr attribute type
    515      * @param status to indicate whether the operation went on smoothly or there were errors
    516      * @return attribute value
    517      * @stable ICU 2.2
    518      */
    519     virtual UColAttributeValue getAttribute(UColAttribute attr,
    520                                             UErrorCode &status) const;
    521 
    522     /**
    523      * Sets the variable top to the top of the specified reordering group.
    524      * The variable top determines the highest-sorting character
    525      * which is affected by UCOL_ALTERNATE_HANDLING.
    526      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
    527      * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
    528      *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
    529      *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
    530      * @param errorCode Standard ICU error code. Its input value must
    531      *                  pass the U_SUCCESS() test, or else the function returns
    532      *                  immediately. Check for U_FAILURE() on output or use with
    533      *                  function chaining. (See User Guide for details.)
    534      * @return *this
    535      * @see getMaxVariable
    536      * @stable ICU 53
    537      */
    538     virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
    539 
    540     /**
    541      * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
    542      * @return the maximum variable reordering group.
    543      * @see setMaxVariable
    544      * @stable ICU 53
    545      */
    546     virtual UColReorderCode getMaxVariable() const;
    547 
    548     /**
    549      * Sets the variable top to the primary weight of the specified string.
    550      *
    551      * Beginning with ICU 53, the variable top is pinned to
    552      * the top of one of the supported reordering groups,
    553      * and it must not be beyond the last of those groups.
    554      * See setMaxVariable().
    555      * @param varTop one or more (if contraction) char16_ts to which the variable top should be set
    556      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    557      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    558      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
    559      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
    560      *    the last reordering group supported by setMaxVariable()
    561      * @return variable top primary weight
    562      * @deprecated ICU 53 Call setMaxVariable() instead.
    563      */
    564     virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status);
    565 
    566     /**
    567      * Sets the variable top to the primary weight of the specified string.
    568      *
    569      * Beginning with ICU 53, the variable top is pinned to
    570      * the top of one of the supported reordering groups,
    571      * and it must not be beyond the last of those groups.
    572      * See setMaxVariable().
    573      * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set
    574      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    575      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
    576      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
    577      *    the last reordering group supported by setMaxVariable()
    578      * @return variable top primary weight
    579      * @deprecated ICU 53 Call setMaxVariable() instead.
    580      */
    581     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
    582 
    583     /**
    584      * Sets the variable top to the specified primary weight.
    585      *
    586      * Beginning with ICU 53, the variable top is pinned to
    587      * the top of one of the supported reordering groups,
    588      * and it must not be beyond the last of those groups.
    589      * See setMaxVariable().
    590      * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop
    591      * @param status error code
    592      * @deprecated ICU 53 Call setMaxVariable() instead.
    593      */
    594     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
    595 
    596     /**
    597      * Gets the variable top value of a Collator.
    598      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    599      * @return the variable top primary weight
    600      * @see getMaxVariable
    601      * @stable ICU 2.0
    602      */
    603     virtual uint32_t getVariableTop(UErrorCode &status) const;
    604 
    605     /**
    606      * Get a UnicodeSet that contains all the characters and sequences tailored in
    607      * this collator.
    608      * @param status      error code of the operation
    609      * @return a pointer to a UnicodeSet object containing all the
    610      *         code points and sequences that may sort differently than
    611      *         in the root collator. The object must be disposed of by using delete
    612      * @stable ICU 2.4
    613      */
    614     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    615 
    616     /**
    617      * Get the sort key as an array of bytes from a UnicodeString.
    618      *
    619      * Note that sort keys are often less efficient than simply doing comparison.
    620      * For more details, see the ICU User Guide.
    621      *
    622      * @param source string to be processed.
    623      * @param result buffer to store result in. If NULL, number of bytes needed
    624      *        will be returned.
    625      * @param resultLength length of the result buffer. If if not enough the
    626      *        buffer will be filled to capacity.
    627      * @return Number of bytes needed for storing the sort key
    628      * @stable ICU 2.0
    629      */
    630     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    631                                int32_t resultLength) const;
    632 
    633     /**
    634      * Get the sort key as an array of bytes from a char16_t buffer.
    635      *
    636      * Note that sort keys are often less efficient than simply doing comparison.
    637      * For more details, see the ICU User Guide.
    638      *
    639      * @param source string to be processed.
    640      * @param sourceLength length of string to be processed. If -1, the string
    641      *        is 0 terminated and length will be decided by the function.
    642      * @param result buffer to store result in. If NULL, number of bytes needed
    643      *        will be returned.
    644      * @param resultLength length of the result buffer. If if not enough the
    645      *        buffer will be filled to capacity.
    646      * @return Number of bytes needed for storing the sort key
    647      * @stable ICU 2.2
    648      */
    649     virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength,
    650                                uint8_t *result, int32_t resultLength) const;
    651 
    652     /**
    653      * Retrieves the reordering codes for this collator.
    654      * @param dest The array to fill with the script ordering.
    655      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    656      *  will only return the length of the result without writing any codes (pre-flighting).
    657      * @param status A reference to an error code value, which must not indicate
    658      * a failure before the function call.
    659      * @return The length of the script ordering array.
    660      * @see ucol_setReorderCodes
    661      * @see Collator#getEquivalentReorderCodes
    662      * @see Collator#setReorderCodes
    663      * @stable ICU 4.8
    664      */
    665      virtual int32_t getReorderCodes(int32_t *dest,
    666                                      int32_t destCapacity,
    667                                      UErrorCode& status) const;
    668 
    669     /**
    670      * Sets the ordering of scripts for this collator.
    671      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    672      * length is also set to 0. An empty array will clear any reordering codes on the collator.
    673      * @param reorderCodesLength The length of reorderCodes.
    674      * @param status error code
    675      * @see ucol_setReorderCodes
    676      * @see Collator#getReorderCodes
    677      * @see Collator#getEquivalentReorderCodes
    678      * @stable ICU 4.8
    679      */
    680      virtual void setReorderCodes(const int32_t* reorderCodes,
    681                                   int32_t reorderCodesLength,
    682                                   UErrorCode& status) ;
    683 
    684     /**
    685      * Implements ucol_strcollUTF8().
    686      * @internal
    687      */
    688     virtual UCollationResult internalCompareUTF8(
    689             const char *left, int32_t leftLength,
    690             const char *right, int32_t rightLength,
    691             UErrorCode &errorCode) const;
    692 
    693     /** Get the short definition string for a collator. This internal API harvests the collator's
    694      *  locale and the attribute set and produces a string that can be used for opening
    695      *  a collator with the same attributes using the ucol_openFromShortString API.
    696      *  This string will be normalized.
    697      *  The structure and the syntax of the string is defined in the "Naming collators"
    698      *  section of the users guide:
    699      *  http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
    700      *  This function supports preflighting.
    701      *
    702      *  This is internal, and intended to be used with delegate converters.
    703      *
    704      *  @param locale a locale that will appear as a collators locale in the resulting
    705      *                short string definition. If NULL, the locale will be harvested
    706      *                from the collator.
    707      *  @param buffer space to hold the resulting string
    708      *  @param capacity capacity of the buffer
    709      *  @param status for returning errors. All the preflighting errors are featured
    710      *  @return length of the resulting string
    711      *  @see ucol_openFromShortString
    712      *  @see ucol_normalizeShortDefinitionString
    713      *  @see ucol_getShortDefinitionString
    714      *  @internal
    715      */
    716     virtual int32_t internalGetShortDefinitionString(const char *locale,
    717                                                      char *buffer,
    718                                                      int32_t capacity,
    719                                                      UErrorCode &status) const;
    720 
    721     /**
    722      * Implements ucol_nextSortKeyPart().
    723      * @internal
    724      */
    725     virtual int32_t internalNextSortKeyPart(
    726             UCharIterator *iter, uint32_t state[2],
    727             uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
    728 
    729     // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
    730     /**
    731      * Only for use in ucol_openRules().
    732      * @internal
    733      */
    734     RuleBasedCollator();
    735 
    736 #ifndef U_HIDE_INTERNAL_API
    737     /**
    738      * Implements ucol_getLocaleByType().
    739      * Needed because the lifetime of the locale ID string must match that of the collator.
    740      * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper.
    741      * @internal
    742      */
    743     const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
    744 
    745     /**
    746      * Implements ucol_getContractionsAndExpansions().
    747      * Gets this collator's sets of contraction strings and/or
    748      * characters and strings that map to multiple collation elements (expansions).
    749      * If addPrefixes is TRUE, then contractions that are expressed as
    750      * prefix/pre-context rules are included.
    751      * @param contractions if not NULL, the set to hold the contractions
    752      * @param expansions if not NULL, the set to hold the expansions
    753      * @param addPrefixes include prefix contextual mappings
    754      * @param errorCode in/out ICU error code
    755      * @internal
    756      */
    757     void internalGetContractionsAndExpansions(
    758             UnicodeSet *contractions, UnicodeSet *expansions,
    759             UBool addPrefixes, UErrorCode &errorCode) const;
    760 
    761     /**
    762      * Adds the contractions that start with character c to the set.
    763      * Ignores prefixes. Used by AlphabeticIndex.
    764      * @internal
    765      */
    766     void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
    767 
    768     /**
    769      * Implements from-rule constructors, and ucol_openRules().
    770      * @internal
    771      */
    772     void internalBuildTailoring(
    773             const UnicodeString &rules,
    774             int32_t strength,
    775             UColAttributeValue decompositionMode,
    776             UParseError *outParseError, UnicodeString *outReason,
    777             UErrorCode &errorCode);
    778 
    779     /** @internal */
    780     static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) {
    781         return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
    782     }
    783     /** @internal */
    784     static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
    785         return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
    786     }
    787 
    788     /**
    789      * Appends the CEs for the string to the vector.
    790      * @internal for tests & tools
    791      */
    792     void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
    793 #endif  // U_HIDE_INTERNAL_API
    794 
    795 protected:
    796    /**
    797     * Used internally by registration to define the requested and valid locales.
    798     * @param requestedLocale the requested locale
    799     * @param validLocale the valid locale
    800     * @param actualLocale the actual locale
    801     * @internal
    802     */
    803     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    804 
    805 private:
    806     friend class CollationElementIterator;
    807     friend class Collator;
    808 
    809     RuleBasedCollator(const CollationCacheEntry *entry);
    810 
    811     /**
    812      * Enumeration of attributes that are relevant for short definition strings
    813      * (e.g., ucol_getShortDefinitionString()).
    814      * Effectively extends UColAttribute.
    815      */
    816     enum Attributes {
    817         ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
    818         ATTR_LIMIT
    819     };
    820 
    821     void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
    822 
    823     // Both lengths must be <0 or else both must be >=0.
    824     UCollationResult doCompare(const char16_t *left, int32_t leftLength,
    825                                const char16_t *right, int32_t rightLength,
    826                                UErrorCode &errorCode) const;
    827     UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
    828                                const uint8_t *right, int32_t rightLength,
    829                                UErrorCode &errorCode) const;
    830 
    831     void writeSortKey(const char16_t *s, int32_t length,
    832                       SortKeyByteSink &sink, UErrorCode &errorCode) const;
    833 
    834     void writeIdenticalLevel(const char16_t *s, const char16_t *limit,
    835                              SortKeyByteSink &sink, UErrorCode &errorCode) const;
    836 
    837     const CollationSettings &getDefaultSettings() const;
    838 
    839     void setAttributeDefault(int32_t attribute) {
    840         explicitlySetAttributes &= ~((uint32_t)1 << attribute);
    841     }
    842     void setAttributeExplicitly(int32_t attribute) {
    843         explicitlySetAttributes |= (uint32_t)1 << attribute;
    844     }
    845     UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
    846         // assert(0 <= attribute < ATTR_LIMIT);
    847         return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
    848     }
    849 
    850     /**
    851      * Tests whether a character is "unsafe" for use as a collation starting point.
    852      *
    853      * @param c code point or code unit
    854      * @return TRUE if c is unsafe
    855      * @see CollationElementIterator#setOffset(int)
    856      */
    857     UBool isUnsafe(UChar32 c) const;
    858 
    859     static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
    860     UBool initMaxExpansions(UErrorCode &errorCode) const;
    861 
    862     void setFastLatinOptions(CollationSettings &ownedSettings) const;
    863 
    864     const CollationData *data;
    865     const CollationSettings *settings;  // reference-counted
    866     const CollationTailoring *tailoring;  // alias of cacheEntry->tailoring
    867     const CollationCacheEntry *cacheEntry;  // reference-counted
    868     Locale validLocale;
    869     uint32_t explicitlySetAttributes;
    870 
    871     UBool actualLocaleIsSameAsValid;
    872 };
    873 
    874 U_NAMESPACE_END
    875 
    876 #endif  // !UCONFIG_NO_COLLATION
    877 #endif  // TBLCOLL_H
    878