Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1996-2014, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class.
     11  */
     12 
     13 /**
     14 * File tblcoll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     22 *                          constructor which reads RuleBasedCollator object from
     23 *                          a binary file.  Added writeToFile method which streams
     24 *                          RuleBasedCollator out to a binary file.  The streamIn
     25 *                          and streamOut methods use istream and ostream objects
     26 *                          in binary mode.
     27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     28 *                          hold invariant data.
     29 *  2/13/97     aliu        Moved several methods into this class from Collation.
     30 *                          Added a private RuleBasedCollator(Locale&) constructor,
     31 *                          to be used by Collator::createDefault().  General
     32 *                          clean up.
     33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     34 *                          constructor and getDynamicClassID.
     35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     36 *                          specifying whether or not binary loading is to be
     37 *                          attempted.  This is required for dynamic rule loading.
     38 * 05/07/97     helena      Added memory allocation error detection.
     39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     40 *                          use MergeCollation::getPattern.
     41 *  6/20/97     helena      Java class name change.
     42 *  8/18/97     helena      Added internal API documentation.
     43 * 09/03/97     helena      Added createCollationKeyValues().
     44 * 02/10/98     damiba      Added compare with "length" parameter
     45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     47 *                          Normalizer::EMode
     48 * 06/14/99     stephen     Removed kResourceBundleSuffix
     49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     50 *                          UnicodeString construction and special case for NO_OP.
     51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     52 *                          internal state management.
     53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     54 *                          to implementation file.
     55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     56 *                          (ucol.h)
     57 * 2012-2014    markus      Rewritten in C++ again.
     58 */
     59 
     60 #ifndef TBLCOLL_H
     61 #define TBLCOLL_H
     62 
     63 #include "unicode/utypes.h"
     64 
     65 #if !UCONFIG_NO_COLLATION
     66 
     67 #include "unicode/coll.h"
     68 #include "unicode/locid.h"
     69 #include "unicode/uiter.h"
     70 #include "unicode/ucol.h"
     71 
     72 U_NAMESPACE_BEGIN
     73 
     74 struct CollationData;
     75 struct CollationSettings;
     76 struct CollationTailoring;
     77 /**
     78 * @stable ICU 2.0
     79 */
     80 class StringSearch;
     81 /**
     82 * @stable ICU 2.0
     83 */
     84 class CollationElementIterator;
     85 class CollationKey;
     86 class SortKeyByteSink;
     87 class UnicodeSet;
     88 class UnicodeString;
     89 class UVector64;
     90 
     91 /**
     92  * The RuleBasedCollator class provides the implementation of
     93  * Collator, using data-driven tables. The user can create a customized
     94  * table-based collation.
     95  * <p>
     96  * For more information about the collation service see
     97  * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
     98  * <p>
     99  * Collation service provides correct sorting orders for most locales supported in ICU.
    100  * If specific data for a locale is not available, the orders eventually falls back
    101  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
    102  * <p>
    103  * Sort ordering may be customized by providing your own set of rules. For more on
    104  * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
    105  * Collation Customization</a> section of the User Guide.
    106  * <p>
    107  * Note, RuleBasedCollator is not to be subclassed.
    108  * @see        Collator
    109  */
    110 class U_I18N_API RuleBasedCollator : public Collator {
    111 public:
    112     /**
    113      * RuleBasedCollator constructor. This takes the table rules and builds a
    114      * collation table out of them. Please see RuleBasedCollator class
    115      * description for more details on the collation rule syntax.
    116      * @param rules the collation rules to build the collation table from.
    117      * @param status reporting a success or an error.
    118      * @stable ICU 2.0
    119      */
    120     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    121 
    122     /**
    123      * RuleBasedCollator constructor. This takes the table rules and builds a
    124      * collation table out of them. Please see RuleBasedCollator class
    125      * description for more details on the collation rule syntax.
    126      * @param rules the collation rules to build the collation table from.
    127      * @param collationStrength strength for comparison
    128      * @param status reporting a success or an error.
    129      * @stable ICU 2.0
    130      */
    131     RuleBasedCollator(const UnicodeString& rules,
    132                        ECollationStrength collationStrength,
    133                        UErrorCode& status);
    134 
    135     /**
    136      * RuleBasedCollator constructor. This takes the table rules and builds a
    137      * collation table out of them. Please see RuleBasedCollator class
    138      * description for more details on the collation rule syntax.
    139      * @param rules the collation rules to build the collation table from.
    140      * @param decompositionMode the normalisation mode
    141      * @param status reporting a success or an error.
    142      * @stable ICU 2.0
    143      */
    144     RuleBasedCollator(const UnicodeString& rules,
    145                     UColAttributeValue decompositionMode,
    146                     UErrorCode& status);
    147 
    148     /**
    149      * RuleBasedCollator constructor. This takes the table rules and builds a
    150      * collation table out of them. Please see RuleBasedCollator class
    151      * description for more details on the collation rule syntax.
    152      * @param rules the collation rules to build the collation table from.
    153      * @param collationStrength strength for comparison
    154      * @param decompositionMode the normalisation mode
    155      * @param status reporting a success or an error.
    156      * @stable ICU 2.0
    157      */
    158     RuleBasedCollator(const UnicodeString& rules,
    159                     ECollationStrength collationStrength,
    160                     UColAttributeValue decompositionMode,
    161                     UErrorCode& status);
    162 
    163 #ifndef U_HIDE_INTERNAL_API
    164     /**
    165      * TODO: document & propose as public API
    166      * @internal
    167      */
    168     RuleBasedCollator(const UnicodeString &rules,
    169                       UParseError &parseError, UnicodeString &reason,
    170                       UErrorCode &errorCode);
    171 #endif  /* U_HIDE_INTERNAL_API */
    172 
    173     /**
    174      * Copy constructor.
    175      * @param other the RuleBasedCollator object to be copied
    176      * @stable ICU 2.0
    177      */
    178     RuleBasedCollator(const RuleBasedCollator& other);
    179 
    180 
    181     /** Opens a collator from a collator binary image created using
    182     *  cloneBinary. Binary image used in instantiation of the
    183     *  collator remains owned by the user and should stay around for
    184     *  the lifetime of the collator. The API also takes a base collator
    185     *  which usually should be the root collator.
    186     *  @param bin binary image owned by the user and required through the
    187     *             lifetime of the collator
    188     *  @param length size of the image. If negative, the API will try to
    189     *                figure out the length of the image
    190     *  @param base fallback collator, usually root. The base is required to be
    191     *              present through the lifetime of the collator. Currently
    192     *              it cannot be NULL.
    193     *  @param status for catching errors
    194     *  @return newly created collator
    195     *  @see cloneBinary
    196     *  @stable ICU 3.4
    197     */
    198     RuleBasedCollator(const uint8_t *bin, int32_t length,
    199                     const RuleBasedCollator *base,
    200                     UErrorCode &status);
    201 
    202     /**
    203      * Destructor.
    204      * @stable ICU 2.0
    205      */
    206     virtual ~RuleBasedCollator();
    207 
    208     /**
    209      * Assignment operator.
    210      * @param other other RuleBasedCollator object to copy from.
    211      * @stable ICU 2.0
    212      */
    213     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    214 
    215     /**
    216      * Returns true if argument is the same as this object.
    217      * @param other Collator object to be compared.
    218      * @return true if arguments is the same as this object.
    219      * @stable ICU 2.0
    220      */
    221     virtual UBool operator==(const Collator& other) const;
    222 
    223     /**
    224      * Makes a copy of this object.
    225      * @return a copy of this object, owned by the caller
    226      * @stable ICU 2.0
    227      */
    228     virtual Collator* clone(void) const;
    229 
    230     /**
    231      * Creates a collation element iterator for the source string. The caller of
    232      * this method is responsible for the memory management of the return
    233      * pointer.
    234      * @param source the string over which the CollationElementIterator will
    235      *        iterate.
    236      * @return the collation element iterator of the source string using this as
    237      *         the based Collator.
    238      * @stable ICU 2.2
    239      */
    240     virtual CollationElementIterator* createCollationElementIterator(
    241                                            const UnicodeString& source) const;
    242 
    243     /**
    244      * Creates a collation element iterator for the source. The caller of this
    245      * method is responsible for the memory management of the returned pointer.
    246      * @param source the CharacterIterator which produces the characters over
    247      *        which the CollationElementItgerator will iterate.
    248      * @return the collation element iterator of the source using this as the
    249      *         based Collator.
    250      * @stable ICU 2.2
    251      */
    252     virtual CollationElementIterator* createCollationElementIterator(
    253                                          const CharacterIterator& source) const;
    254 
    255     // Make deprecated versions of Collator::compare() visible.
    256     using Collator::compare;
    257 
    258     /**
    259     * The comparison function compares the character data stored in two
    260     * different strings. Returns information about whether a string is less
    261     * than, greater than or equal to another string.
    262     * @param source the source string to be compared with.
    263     * @param target the string that is to be compared with the source string.
    264     * @param status possible error code
    265     * @return Returns an enum value. UCOL_GREATER if source is greater
    266     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    267     * than target
    268     * @stable ICU 2.6
    269     **/
    270     virtual UCollationResult compare(const UnicodeString& source,
    271                                      const UnicodeString& target,
    272                                      UErrorCode &status) const;
    273 
    274     /**
    275     * Does the same thing as compare but limits the comparison to a specified
    276     * length
    277     * @param source the source string to be compared with.
    278     * @param target the string that is to be compared with the source string.
    279     * @param length the length the comparison is limited to
    280     * @param status possible error code
    281     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    282     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    283     *         length) is equal to target; UCOL_LESS if source (up to the specified
    284     *         length) is less  than target.
    285     * @stable ICU 2.6
    286     */
    287     virtual UCollationResult compare(const UnicodeString& source,
    288                                      const UnicodeString& target,
    289                                      int32_t length,
    290                                      UErrorCode &status) const;
    291 
    292     /**
    293     * The comparison function compares the character data stored in two
    294     * different string arrays. Returns information about whether a string array
    295     * is less than, greater than or equal to another string array.
    296     * @param source the source string array to be compared with.
    297     * @param sourceLength the length of the source string array.  If this value
    298     *        is equal to -1, the string array is null-terminated.
    299     * @param target the string that is to be compared with the source string.
    300     * @param targetLength the length of the target string array.  If this value
    301     *        is equal to -1, the string array is null-terminated.
    302     * @param status possible error code
    303     * @return Returns an enum value. UCOL_GREATER if source is greater
    304     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    305     * than target
    306     * @stable ICU 2.6
    307     */
    308     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    309                                      const UChar* target, int32_t targetLength,
    310                                      UErrorCode &status) const;
    311 
    312     /**
    313      * Compares two strings using the Collator.
    314      * Returns whether the first one compares less than/equal to/greater than
    315      * the second one.
    316      * This version takes UCharIterator input.
    317      * @param sIter the first ("source") string iterator
    318      * @param tIter the second ("target") string iterator
    319      * @param status ICU status
    320      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    321      * @stable ICU 4.2
    322      */
    323     virtual UCollationResult compare(UCharIterator &sIter,
    324                                      UCharIterator &tIter,
    325                                      UErrorCode &status) const;
    326 
    327     /**
    328      * Compares two UTF-8 strings using the Collator.
    329      * Returns whether the first one compares less than/equal to/greater than
    330      * the second one.
    331      * This version takes UTF-8 input.
    332      * Note that a StringPiece can be implicitly constructed
    333      * from a std::string or a NUL-terminated const char * string.
    334      * @param source the first UTF-8 string
    335      * @param target the second UTF-8 string
    336      * @param status ICU status
    337      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    338      * @stable ICU 51
    339      */
    340     virtual UCollationResult compareUTF8(const StringPiece &source,
    341                                          const StringPiece &target,
    342                                          UErrorCode &status) const;
    343 
    344     /**
    345     * Transforms a specified region of the string into a series of characters
    346     * that can be compared with CollationKey.compare. Use a CollationKey when
    347     * you need to do repeated comparisions on the same string. For a single
    348     * comparison the compare method will be faster.
    349     * @param source the source string.
    350     * @param key the transformed key of the source string.
    351     * @param status the error code status.
    352     * @return the transformed key.
    353     * @see CollationKey
    354     * @stable ICU 2.0
    355     */
    356     virtual CollationKey& getCollationKey(const UnicodeString& source,
    357                                           CollationKey& key,
    358                                           UErrorCode& status) const;
    359 
    360     /**
    361     * Transforms a specified region of the string into a series of characters
    362     * that can be compared with CollationKey.compare. Use a CollationKey when
    363     * you need to do repeated comparisions on the same string. For a single
    364     * comparison the compare method will be faster.
    365     * @param source the source string.
    366     * @param sourceLength the length of the source string.
    367     * @param key the transformed key of the source string.
    368     * @param status the error code status.
    369     * @return the transformed key.
    370     * @see CollationKey
    371     * @stable ICU 2.0
    372     */
    373     virtual CollationKey& getCollationKey(const UChar *source,
    374                                           int32_t sourceLength,
    375                                           CollationKey& key,
    376                                           UErrorCode& status) const;
    377 
    378     /**
    379      * Generates the hash code for the rule-based collation object.
    380      * @return the hash code.
    381      * @stable ICU 2.0
    382      */
    383     virtual int32_t hashCode() const;
    384 
    385     /**
    386     * Gets the locale of the Collator
    387     * @param type can be either requested, valid or actual locale. For more
    388     *             information see the definition of ULocDataLocaleType in
    389     *             uloc.h
    390     * @param status the error code status.
    391     * @return locale where the collation data lives. If the collator
    392     *         was instantiated from rules, locale is empty.
    393     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    394     */
    395     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    396 
    397     /**
    398      * Gets the tailoring rules for this collator.
    399      * @return the collation tailoring from which this collator was created
    400      * @stable ICU 2.0
    401      */
    402     const UnicodeString& getRules() const;
    403 
    404     /**
    405      * Gets the version information for a Collator.
    406      * @param info the version # information, the result will be filled in
    407      * @stable ICU 2.0
    408      */
    409     virtual void getVersion(UVersionInfo info) const;
    410 
    411 #ifndef U_HIDE_DEPRECATED_API
    412     /**
    413      * Returns the maximum length of any expansion sequences that end with the
    414      * specified comparison order.
    415      *
    416      * This is specific to the kind of collation element values and sequences
    417      * returned by the CollationElementIterator.
    418      * Call CollationElementIterator::getMaxExpansion() instead.
    419      *
    420      * @param order a collation order returned by CollationElementIterator::previous
    421      *              or CollationElementIterator::next.
    422      * @return maximum size of the expansion sequences ending with the collation
    423      *         element, or 1 if the collation element does not occur at the end of
    424      *         any expansion sequence
    425      * @see CollationElementIterator#getMaxExpansion
    426      * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
    427      */
    428     int32_t getMaxExpansion(int32_t order) const;
    429 #endif  /* U_HIDE_DEPRECATED_API */
    430 
    431     /**
    432      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    433      * method is to implement a simple version of RTTI, since not all C++
    434      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    435      * methods call this method.
    436      * @return The class ID for this object. All objects of a given class have
    437      *         the same class ID. Objects of other classes have different class
    438      *         IDs.
    439      * @stable ICU 2.0
    440      */
    441     virtual UClassID getDynamicClassID(void) const;
    442 
    443     /**
    444      * Returns the class ID for this class. This is useful only for comparing to
    445      * a return value from getDynamicClassID(). For example:
    446      * <pre>
    447      * Base* polymorphic_pointer = createPolymorphicObject();
    448      * if (polymorphic_pointer->getDynamicClassID() ==
    449      *                                          Derived::getStaticClassID()) ...
    450      * </pre>
    451      * @return The class ID for all objects of this class.
    452      * @stable ICU 2.0
    453      */
    454     static UClassID U_EXPORT2 getStaticClassID(void);
    455 
    456 #ifndef U_HIDE_DEPRECATED_API
    457     /**
    458      * Do not use this method: The caller and the ICU library might use different heaps.
    459      * Use cloneBinary() instead which writes to caller-provided memory.
    460      *
    461      * Returns a binary format of this collator.
    462      * @param length Returns the length of the data, in bytes
    463      * @param status the error code status.
    464      * @return memory, owned by the caller, of size 'length' bytes.
    465      * @deprecated ICU 52. Use cloneBinary() instead.
    466      */
    467     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
    468 #endif  /* U_HIDE_DEPRECATED_API */
    469 
    470     /** Creates a binary image of a collator. This binary image can be stored and
    471     *  later used to instantiate a collator using ucol_openBinary.
    472     *  This API supports preflighting.
    473     *  @param buffer a fill-in buffer to receive the binary image
    474     *  @param capacity capacity of the destination buffer
    475     *  @param status for catching errors
    476     *  @return size of the image
    477     *  @see ucol_openBinary
    478     *  @stable ICU 3.4
    479     */
    480     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
    481 
    482     /**
    483      * Returns current rules. Delta defines whether full rules are returned or
    484      * just the tailoring.
    485      *
    486      * getRules(void) should normally be used instead.
    487      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
    488      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    489      * @param buffer UnicodeString to store the result rules
    490      * @stable ICU 2.2
    491      * @see UCOL_FULL_RULES
    492      */
    493     void getRules(UColRuleOption delta, UnicodeString &buffer) const;
    494 
    495     /**
    496      * Universal attribute setter
    497      * @param attr attribute type
    498      * @param value attribute value
    499      * @param status to indicate whether the operation went on smoothly or there were errors
    500      * @stable ICU 2.2
    501      */
    502     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    503                               UErrorCode &status);
    504 
    505     /**
    506      * Universal attribute getter.
    507      * @param attr attribute type
    508      * @param status to indicate whether the operation went on smoothly or there were errors
    509      * @return attribute value
    510      * @stable ICU 2.2
    511      */
    512     virtual UColAttributeValue getAttribute(UColAttribute attr,
    513                                             UErrorCode &status) const;
    514 
    515     /**
    516      * Sets the variable top to the top of the specified reordering group.
    517      * The variable top determines the highest-sorting character
    518      * which is affected by UCOL_ALTERNATE_HANDLING.
    519      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
    520      * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
    521      *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
    522      *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
    523      * @param errorCode Standard ICU error code. Its input value must
    524      *                  pass the U_SUCCESS() test, or else the function returns
    525      *                  immediately. Check for U_FAILURE() on output or use with
    526      *                  function chaining. (See User Guide for details.)
    527      * @return *this
    528      * @see getMaxVariable
    529      * @draft ICU 53
    530      */
    531     virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
    532 
    533     /**
    534      * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
    535      * @return the maximum variable reordering group.
    536      * @see setMaxVariable
    537      * @draft ICU 53
    538      */
    539     virtual UColReorderCode getMaxVariable() const;
    540 
    541     /**
    542      * Sets the variable top to the primary weight of the specified string.
    543      *
    544      * Beginning with ICU 53, the variable top is pinned to
    545      * the top of one of the supported reordering groups,
    546      * and it must not be beyond the last of those groups.
    547      * See setMaxVariable().
    548      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    549      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    550      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    551      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
    552      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
    553      *    the last reordering group supported by setMaxVariable()
    554      * @return variable top primary weight
    555      * @deprecated ICU 53 Call setMaxVariable() instead.
    556      */
    557     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
    558 
    559     /**
    560      * Sets the variable top to the primary weight of the specified string.
    561      *
    562      * Beginning with ICU 53, the variable top is pinned to
    563      * the top of one of the supported reordering groups,
    564      * and it must not be beyond the last of those groups.
    565      * See setMaxVariable().
    566      * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    567      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    568      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
    569      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
    570      *    the last reordering group supported by setMaxVariable()
    571      * @return variable top primary weight
    572      * @deprecated ICU 53 Call setMaxVariable() instead.
    573      */
    574     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
    575 
    576     /**
    577      * Sets the variable top to the specified primary weight.
    578      *
    579      * Beginning with ICU 53, the variable top is pinned to
    580      * the top of one of the supported reordering groups,
    581      * and it must not be beyond the last of those groups.
    582      * See setMaxVariable().
    583      * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop
    584      * @param status error code
    585      * @deprecated ICU 53 Call setMaxVariable() instead.
    586      */
    587     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
    588 
    589     /**
    590      * Gets the variable top value of a Collator.
    591      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    592      * @return the variable top primary weight
    593      * @see getMaxVariable
    594      * @stable ICU 2.0
    595      */
    596     virtual uint32_t getVariableTop(UErrorCode &status) const;
    597 
    598     /**
    599      * Get a UnicodeSet that contains all the characters and sequences tailored in
    600      * this collator.
    601      * @param status      error code of the operation
    602      * @return a pointer to a UnicodeSet object containing all the
    603      *         code points and sequences that may sort differently than
    604      *         in the root collator. The object must be disposed of by using delete
    605      * @stable ICU 2.4
    606      */
    607     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    608 
    609     /**
    610      * Get the sort key as an array of bytes from a UnicodeString.
    611      * @param source string to be processed.
    612      * @param result buffer to store result in. If NULL, number of bytes needed
    613      *        will be returned.
    614      * @param resultLength length of the result buffer. If if not enough the
    615      *        buffer will be filled to capacity.
    616      * @return Number of bytes needed for storing the sort key
    617      * @stable ICU 2.0
    618      */
    619     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    620                                int32_t resultLength) const;
    621 
    622     /**
    623      * Get the sort key as an array of bytes from a UChar buffer.
    624      * @param source string to be processed.
    625      * @param sourceLength length of string to be processed. If -1, the string
    626      *        is 0 terminated and length will be decided by the function.
    627      * @param result buffer to store result in. If NULL, number of bytes needed
    628      *        will be returned.
    629      * @param resultLength length of the result buffer. If if not enough the
    630      *        buffer will be filled to capacity.
    631      * @return Number of bytes needed for storing the sort key
    632      * @stable ICU 2.2
    633      */
    634     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
    635                                uint8_t *result, int32_t resultLength) const;
    636 
    637     /**
    638      * Retrieves the reordering codes for this collator.
    639      * @param dest The array to fill with the script ordering.
    640      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    641      *  will only return the length of the result without writing any of the result string (pre-flighting).
    642      * @param status A reference to an error code value, which must not indicate
    643      * a failure before the function call.
    644      * @return The length of the script ordering array.
    645      * @see ucol_setReorderCodes
    646      * @see Collator#getEquivalentReorderCodes
    647      * @see Collator#setReorderCodes
    648      * @stable ICU 4.8
    649      */
    650      virtual int32_t getReorderCodes(int32_t *dest,
    651                                      int32_t destCapacity,
    652                                      UErrorCode& status) const;
    653 
    654     /**
    655      * Sets the ordering of scripts for this collator.
    656      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    657      * length is also set to 0. An empty array will clear any reordering codes on the collator.
    658      * @param reorderCodesLength The length of reorderCodes.
    659      * @param status error code
    660      * @see Collator#getReorderCodes
    661      * @see Collator#getEquivalentReorderCodes
    662      * @stable ICU 4.8
    663      */
    664      virtual void setReorderCodes(const int32_t* reorderCodes,
    665                                   int32_t reorderCodesLength,
    666                                   UErrorCode& status) ;
    667 
    668     /**
    669      * Implements ucol_strcollUTF8().
    670      * @internal
    671      */
    672     virtual UCollationResult internalCompareUTF8(
    673             const char *left, int32_t leftLength,
    674             const char *right, int32_t rightLength,
    675             UErrorCode &errorCode) const;
    676 
    677     /** Get the short definition string for a collator. This internal API harvests the collator's
    678      *  locale and the attribute set and produces a string that can be used for opening
    679      *  a collator with the same attributes using the ucol_openFromShortString API.
    680      *  This string will be normalized.
    681      *  The structure and the syntax of the string is defined in the "Naming collators"
    682      *  section of the users guide:
    683      *  http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
    684      *  This function supports preflighting.
    685      *
    686      *  This is internal, and intended to be used with delegate converters.
    687      *
    688      *  @param locale a locale that will appear as a collators locale in the resulting
    689      *                short string definition. If NULL, the locale will be harvested
    690      *                from the collator.
    691      *  @param buffer space to hold the resulting string
    692      *  @param capacity capacity of the buffer
    693      *  @param status for returning errors. All the preflighting errors are featured
    694      *  @return length of the resulting string
    695      *  @see ucol_openFromShortString
    696      *  @see ucol_normalizeShortDefinitionString
    697      *  @see ucol_getShortDefinitionString
    698      *  @internal
    699      */
    700     virtual int32_t internalGetShortDefinitionString(const char *locale,
    701                                                      char *buffer,
    702                                                      int32_t capacity,
    703                                                      UErrorCode &status) const;
    704 
    705     /**
    706      * Implements ucol_nextSortKeyPart().
    707      * @internal
    708      */
    709     virtual int32_t internalNextSortKeyPart(
    710             UCharIterator *iter, uint32_t state[2],
    711             uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
    712 
    713 #ifndef U_HIDE_INTERNAL_API
    714     /**
    715      * Only for use in ucol_openRules().
    716      * @internal
    717      */
    718     RuleBasedCollator();
    719 
    720     /**
    721      * Implements ucol_getLocaleByType().
    722      * Needed because the lifetime of the locale ID string must match that of the collator.
    723      * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper.
    724      * @internal
    725      */
    726     const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
    727 
    728     /**
    729      * Implements ucol_getContractionsAndExpansions().
    730      * Gets this collator's sets of contraction strings and/or
    731      * characters and strings that map to multiple collation elements (expansions).
    732      * If addPrefixes is TRUE, then contractions that are expressed as
    733      * prefix/pre-context rules are included.
    734      * @param contractions if not NULL, the set to hold the contractions
    735      * @param expansions if not NULL, the set to hold the expansions
    736      * @param addPrefixes include prefix contextual mappings
    737      * @param errorCode in/out ICU error code
    738      * @internal
    739      */
    740     void internalGetContractionsAndExpansions(
    741             UnicodeSet *contractions, UnicodeSet *expansions,
    742             UBool addPrefixes, UErrorCode &errorCode) const;
    743 
    744     /**
    745      * Adds the contractions that start with character c to the set.
    746      * Ignores prefixes. Used by AlphabeticIndex.
    747      * @internal
    748      */
    749     void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
    750 
    751     /**
    752      * Implements from-rule constructors, and ucol_openRules().
    753      * @internal
    754      */
    755     void internalBuildTailoring(
    756             const UnicodeString &rules,
    757             int32_t strength,
    758             UColAttributeValue decompositionMode,
    759             UParseError *outParseError, UnicodeString *outReason,
    760             UErrorCode &errorCode);
    761 
    762     /** @internal */
    763     static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) {
    764         return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
    765     }
    766     /** @internal */
    767     static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
    768         return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
    769     }
    770 
    771     /**
    772      * Appends the CEs for the string to the vector.
    773      * @internal for tests & tools
    774      */
    775     void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
    776 #endif  // U_HIDE_INTERNAL_API
    777 
    778 protected:
    779    /**
    780     * Used internally by registration to define the requested and valid locales.
    781     * @param requestedLocale the requested locale
    782     * @param validLocale the valid locale
    783     * @param actualLocale the actual locale
    784     * @internal
    785     */
    786     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    787 
    788 private:
    789     friend class CollationElementIterator;
    790     friend class Collator;
    791 
    792     RuleBasedCollator(const CollationTailoring *t, const Locale &vl);
    793 
    794     /**
    795      * Enumeration of attributes that are relevant for short definition strings
    796      * (e.g., ucol_getShortDefinitionString()).
    797      * Effectively extends UColAttribute.
    798      */
    799     enum Attributes {
    800         ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
    801         ATTR_LIMIT
    802     };
    803 
    804     void adoptTailoring(CollationTailoring *t);
    805 
    806     // Both lengths must be <0 or else both must be >=0.
    807     UCollationResult doCompare(const UChar *left, int32_t leftLength,
    808                                const UChar *right, int32_t rightLength,
    809                                UErrorCode &errorCode) const;
    810     UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
    811                                const uint8_t *right, int32_t rightLength,
    812                                UErrorCode &errorCode) const;
    813 
    814     void writeSortKey(const UChar *s, int32_t length,
    815                       SortKeyByteSink &sink, UErrorCode &errorCode) const;
    816 
    817     void writeIdenticalLevel(const UChar *s, const UChar *limit,
    818                              SortKeyByteSink &sink, UErrorCode &errorCode) const;
    819 
    820     const CollationSettings &getDefaultSettings() const;
    821 
    822     void setAttributeDefault(int32_t attribute) {
    823         explicitlySetAttributes &= ~((uint32_t)1 << attribute);
    824     }
    825     void setAttributeExplicitly(int32_t attribute) {
    826         explicitlySetAttributes |= (uint32_t)1 << attribute;
    827     }
    828     UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
    829         // assert(0 <= attribute < ATTR_LIMIT);
    830         return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
    831     }
    832 
    833     /**
    834      * Tests whether a character is "unsafe" for use as a collation starting point.
    835      *
    836      * @param c code point or code unit
    837      * @return TRUE if c is unsafe
    838      * @see CollationElementIterator#setOffset(int)
    839      */
    840     UBool isUnsafe(UChar32 c) const;
    841 
    842     static void computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
    843     UBool initMaxExpansions(UErrorCode &errorCode) const;
    844 
    845     void setFastLatinOptions(CollationSettings &ownedSettings) const;
    846 
    847     const CollationData *data;
    848     const CollationSettings *settings;  // reference-counted
    849     const CollationTailoring *tailoring;  // reference-counted
    850     Locale validLocale;
    851     uint32_t explicitlySetAttributes;
    852 
    853     UBool actualLocaleIsSameAsValid;
    854 };
    855 
    856 U_NAMESPACE_END
    857 
    858 #endif  // !UCONFIG_NO_COLLATION
    859 #endif  // TBLCOLL_H
    860