Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1996-2012, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
     11  */
     12 
     13 /**
     14 * File tblcoll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     22 *                          constructor which reads RuleBasedCollator object from
     23 *                          a binary file.  Added writeToFile method which streams
     24 *                          RuleBasedCollator out to a binary file.  The streamIn
     25 *                          and streamOut methods use istream and ostream objects
     26 *                          in binary mode.
     27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     28 *                          hold invariant data.
     29 *  2/13/97     aliu        Moved several methods into this class from Collation.
     30 *                          Added a private RuleBasedCollator(Locale&) constructor,
     31 *                          to be used by Collator::createDefault().  General
     32 *                          clean up.
     33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     34 *                          constructor and getDynamicClassID.
     35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     36 *                          specifying whether or not binary loading is to be
     37 *                          attempted.  This is required for dynamic rule loading.
     38 * 05/07/97     helena      Added memory allocation error detection.
     39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     40 *                          use MergeCollation::getPattern.
     41 *  6/20/97     helena      Java class name change.
     42 *  8/18/97     helena      Added internal API documentation.
     43 * 09/03/97     helena      Added createCollationKeyValues().
     44 * 02/10/98     damiba      Added compare with "length" parameter
     45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     47 *                          Normalizer::EMode
     48 * 06/14/99     stephen     Removed kResourceBundleSuffix
     49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     50 *                          UnicodeString construction and special case for NO_OP.
     51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     52 *                          internal state management.
     53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     54 *                          to implementation file.
     55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     56 *                          (ucol.h)
     57 */
     58 
     59 #ifndef TBLCOLL_H
     60 #define TBLCOLL_H
     61 
     62 #include "unicode/utypes.h"
     63 
     64 
     65 #if !UCONFIG_NO_COLLATION
     66 
     67 #include "unicode/coll.h"
     68 #include "unicode/ucol.h"
     69 #include "unicode/sortkey.h"
     70 #include "unicode/normlzr.h"
     71 
     72 U_NAMESPACE_BEGIN
     73 
     74 /**
     75 * @stable ICU 2.0
     76 */
     77 class StringSearch;
     78 /**
     79 * @stable ICU 2.0
     80 */
     81 class CollationElementIterator;
     82 
     83 /**
     84  * The RuleBasedCollator class provides the simple implementation of
     85  * Collator, using data-driven tables. The user can create a customized
     86  * table-based collation.
     87  * <P>
     88  * <em>Important: </em>The ICU collation service has been reimplemented
     89  * in order to achieve better performance and UCA compliance.
     90  * For details, see the
     91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
     92  * collation design document</a>.
     93  * <p>
     94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
     95  * <p>
     96  * For more information about the collation service see
     97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
     98  * <p>
     99  * Collation service provides correct sorting orders for most locales supported in ICU.
    100  * If specific data for a locale is not available, the orders eventually falls back
    101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
    102  * <p>
    103  * Sort ordering may be customized by providing your own set of rules. For more on
    104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
    105  * Collation customization</a> section of the users guide.
    106  * <p>
    107  * Note, RuleBasedCollator is not to be subclassed.
    108  * @see        Collator
    109  * @version    2.0 11/15/2001
    110  */
    111 class U_I18N_API RuleBasedCollator : public Collator
    112 {
    113 public:
    114 
    115   // constructor -------------------------------------------------------------
    116 
    117     /**
    118      * RuleBasedCollator constructor. This takes the table rules and builds a
    119      * collation table out of them. Please see RuleBasedCollator class
    120      * description for more details on the collation rule syntax.
    121      * @param rules the collation rules to build the collation table from.
    122      * @param status reporting a success or an error.
    123      * @see Locale
    124      * @stable ICU 2.0
    125      */
    126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    127 
    128     /**
    129      * RuleBasedCollator constructor. This takes the table rules and builds a
    130      * collation table out of them. Please see RuleBasedCollator class
    131      * description for more details on the collation rule syntax.
    132      * @param rules the collation rules to build the collation table from.
    133      * @param collationStrength default strength for comparison
    134      * @param status reporting a success or an error.
    135      * @see Locale
    136      * @stable ICU 2.0
    137      */
    138     RuleBasedCollator(const UnicodeString& rules,
    139                        ECollationStrength collationStrength,
    140                        UErrorCode& status);
    141 
    142     /**
    143      * RuleBasedCollator constructor. This takes the table rules and builds a
    144      * collation table out of them. Please see RuleBasedCollator class
    145      * description for more details on the collation rule syntax.
    146      * @param rules the collation rules to build the collation table from.
    147      * @param decompositionMode the normalisation mode
    148      * @param status reporting a success or an error.
    149      * @see Locale
    150      * @stable ICU 2.0
    151      */
    152     RuleBasedCollator(const UnicodeString& rules,
    153                     UColAttributeValue decompositionMode,
    154                     UErrorCode& status);
    155 
    156     /**
    157      * RuleBasedCollator constructor. This takes the table rules and builds a
    158      * collation table out of them. Please see RuleBasedCollator class
    159      * description for more details on the collation rule syntax.
    160      * @param rules the collation rules to build the collation table from.
    161      * @param collationStrength default strength for comparison
    162      * @param decompositionMode the normalisation mode
    163      * @param status reporting a success or an error.
    164      * @see Locale
    165      * @stable ICU 2.0
    166      */
    167     RuleBasedCollator(const UnicodeString& rules,
    168                     ECollationStrength collationStrength,
    169                     UColAttributeValue decompositionMode,
    170                     UErrorCode& status);
    171 
    172     /**
    173      * Copy constructor.
    174      * @param other the RuleBasedCollator object to be copied
    175      * @see Locale
    176      * @stable ICU 2.0
    177      */
    178     RuleBasedCollator(const RuleBasedCollator& other);
    179 
    180 
    181     /** Opens a collator from a collator binary image created using
    182     *  cloneBinary. Binary image used in instantiation of the
    183     *  collator remains owned by the user and should stay around for
    184     *  the lifetime of the collator. The API also takes a base collator
    185     *  which usualy should be UCA.
    186     *  @param bin binary image owned by the user and required through the
    187     *             lifetime of the collator
    188     *  @param length size of the image. If negative, the API will try to
    189     *                figure out the length of the image
    190     *  @param base fallback collator, usually UCA. Base is required to be
    191     *              present through the lifetime of the collator. Currently
    192     *              it cannot be NULL.
    193     *  @param status for catching errors
    194     *  @return newly created collator
    195     *  @see cloneBinary
    196     *  @stable ICU 3.4
    197     */
    198     RuleBasedCollator(const uint8_t *bin, int32_t length,
    199                     const RuleBasedCollator *base,
    200                     UErrorCode &status);
    201     // destructor --------------------------------------------------------------
    202 
    203     /**
    204      * Destructor.
    205      * @stable ICU 2.0
    206      */
    207     virtual ~RuleBasedCollator();
    208 
    209     // public methods ----------------------------------------------------------
    210 
    211     /**
    212      * Assignment operator.
    213      * @param other other RuleBasedCollator object to compare with.
    214      * @stable ICU 2.0
    215      */
    216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    217 
    218     /**
    219      * Returns true if argument is the same as this object.
    220      * @param other Collator object to be compared.
    221      * @return true if arguments is the same as this object.
    222      * @stable ICU 2.0
    223      */
    224     virtual UBool operator==(const Collator& other) const;
    225 
    226     /**
    227      * Makes a copy of this object.
    228      * @return a copy of this object, owned by the caller
    229      * @stable ICU 2.0
    230      */
    231     virtual Collator* clone(void) const;
    232 
    233     /**
    234      * Creates a collation element iterator for the source string. The caller of
    235      * this method is responsible for the memory management of the return
    236      * pointer.
    237      * @param source the string over which the CollationElementIterator will
    238      *        iterate.
    239      * @return the collation element iterator of the source string using this as
    240      *         the based Collator.
    241      * @stable ICU 2.2
    242      */
    243     virtual CollationElementIterator* createCollationElementIterator(
    244                                            const UnicodeString& source) const;
    245 
    246     /**
    247      * Creates a collation element iterator for the source. The caller of this
    248      * method is responsible for the memory management of the returned pointer.
    249      * @param source the CharacterIterator which produces the characters over
    250      *        which the CollationElementItgerator will iterate.
    251      * @return the collation element iterator of the source using this as the
    252      *         based Collator.
    253      * @stable ICU 2.2
    254      */
    255     virtual CollationElementIterator* createCollationElementIterator(
    256                                          const CharacterIterator& source) const;
    257 
    258     // Make deprecated versions of Collator::compare() visible.
    259     using Collator::compare;
    260 
    261     /**
    262     * The comparison function compares the character data stored in two
    263     * different strings. Returns information about whether a string is less
    264     * than, greater than or equal to another string.
    265     * @param source the source string to be compared with.
    266     * @param target the string that is to be compared with the source string.
    267     * @param status possible error code
    268     * @return Returns an enum value. UCOL_GREATER if source is greater
    269     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    270     * than target
    271     * @stable ICU 2.6
    272     **/
    273     virtual UCollationResult compare(const UnicodeString& source,
    274                                       const UnicodeString& target,
    275                                       UErrorCode &status) const;
    276 
    277     /**
    278     * Does the same thing as compare but limits the comparison to a specified
    279     * length
    280     * @param source the source string to be compared with.
    281     * @param target the string that is to be compared with the source string.
    282     * @param length the length the comparison is limited to
    283     * @param status possible error code
    284     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    285     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    286     *         length) is equal to target; UCOL_LESS if source (up to the specified
    287     *         length) is less  than target.
    288     * @stable ICU 2.6
    289     */
    290     virtual UCollationResult compare(const UnicodeString& source,
    291                                       const UnicodeString& target,
    292                                       int32_t length,
    293                                       UErrorCode &status) const;
    294 
    295     /**
    296     * The comparison function compares the character data stored in two
    297     * different string arrays. Returns information about whether a string array
    298     * is less than, greater than or equal to another string array.
    299     * @param source the source string array to be compared with.
    300     * @param sourceLength the length of the source string array.  If this value
    301     *        is equal to -1, the string array is null-terminated.
    302     * @param target the string that is to be compared with the source string.
    303     * @param targetLength the length of the target string array.  If this value
    304     *        is equal to -1, the string array is null-terminated.
    305     * @param status possible error code
    306     * @return Returns an enum value. UCOL_GREATER if source is greater
    307     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    308     * than target
    309     * @stable ICU 2.6
    310     */
    311     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    312                                       const UChar* target, int32_t targetLength,
    313                                       UErrorCode &status) const;
    314 
    315     /**
    316      * Compares two strings using the Collator.
    317      * Returns whether the first one compares less than/equal to/greater than
    318      * the second one.
    319      * This version takes UCharIterator input.
    320      * @param sIter the first ("source") string iterator
    321      * @param tIter the second ("target") string iterator
    322      * @param status ICU status
    323      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    324      * @stable ICU 4.2
    325      */
    326     virtual UCollationResult compare(UCharIterator &sIter,
    327                                      UCharIterator &tIter,
    328                                      UErrorCode &status) const;
    329 
    330     /**
    331     * Transforms a specified region of the string into a series of characters
    332     * that can be compared with CollationKey.compare. Use a CollationKey when
    333     * you need to do repeated comparisions on the same string. For a single
    334     * comparison the compare method will be faster.
    335     * @param source the source string.
    336     * @param key the transformed key of the source string.
    337     * @param status the error code status.
    338     * @return the transformed key.
    339     * @see CollationKey
    340     * @stable ICU 2.0
    341     */
    342     virtual CollationKey& getCollationKey(const UnicodeString& source,
    343                                           CollationKey& key,
    344                                           UErrorCode& status) const;
    345 
    346     /**
    347     * Transforms a specified region of the string into a series of characters
    348     * that can be compared with CollationKey.compare. Use a CollationKey when
    349     * you need to do repeated comparisions on the same string. For a single
    350     * comparison the compare method will be faster.
    351     * @param source the source string.
    352     * @param sourceLength the length of the source string.
    353     * @param key the transformed key of the source string.
    354     * @param status the error code status.
    355     * @return the transformed key.
    356     * @see CollationKey
    357     * @stable ICU 2.0
    358     */
    359     virtual CollationKey& getCollationKey(const UChar *source,
    360                                           int32_t sourceLength,
    361                                           CollationKey& key,
    362                                           UErrorCode& status) const;
    363 
    364     /**
    365      * Generates the hash code for the rule-based collation object.
    366      * @return the hash code.
    367      * @stable ICU 2.0
    368      */
    369     virtual int32_t hashCode(void) const;
    370 
    371     /**
    372     * Gets the locale of the Collator
    373     * @param type can be either requested, valid or actual locale. For more
    374     *             information see the definition of ULocDataLocaleType in
    375     *             uloc.h
    376     * @param status the error code status.
    377     * @return locale where the collation data lives. If the collator
    378     *         was instantiated from rules, locale is empty.
    379     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    380     */
    381     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    382 
    383     /**
    384      * Gets the tailoring rules for this collator.
    385      * @return the collation tailoring from which this collator was created
    386      * @stable ICU 2.0
    387      */
    388     const UnicodeString& getRules(void) const;
    389 
    390     /**
    391      * Gets the version information for a Collator.
    392      * @param info the version # information, the result will be filled in
    393      * @stable ICU 2.0
    394      */
    395     virtual void getVersion(UVersionInfo info) const;
    396 
    397     /**
    398      * Return the maximum length of any expansion sequences that end with the
    399      * specified comparison order.
    400      * @param order a collation order returned by previous or next.
    401      * @return maximum size of the expansion sequences ending with the collation
    402      *         element or 1 if collation element does not occur at the end of
    403      *         any expansion sequence
    404      * @see CollationElementIterator#getMaxExpansion
    405      * @stable ICU 2.0
    406      */
    407     int32_t getMaxExpansion(int32_t order) const;
    408 
    409     /**
    410      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    411      * method is to implement a simple version of RTTI, since not all C++
    412      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    413      * methods call this method.
    414      * @return The class ID for this object. All objects of a given class have
    415      *         the same class ID. Objects of other classes have different class
    416      *         IDs.
    417      * @stable ICU 2.0
    418      */
    419     virtual UClassID getDynamicClassID(void) const;
    420 
    421     /**
    422      * Returns the class ID for this class. This is useful only for comparing to
    423      * a return value from getDynamicClassID(). For example:
    424      * <pre>
    425      * Base* polymorphic_pointer = createPolymorphicObject();
    426      * if (polymorphic_pointer->getDynamicClassID() ==
    427      *                                          Derived::getStaticClassID()) ...
    428      * </pre>
    429      * @return The class ID for all objects of this class.
    430      * @stable ICU 2.0
    431      */
    432     static UClassID U_EXPORT2 getStaticClassID(void);
    433 
    434     /**
    435      * Returns the binary format of the class's rules. The format is that of
    436      * .col files.
    437      * @param length Returns the length of the data, in bytes
    438      * @param status the error code status.
    439      * @return memory, owned by the caller, of size 'length' bytes.
    440      * @stable ICU 2.2
    441      */
    442     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
    443 
    444 
    445     /** Creates a binary image of a collator. This binary image can be stored and
    446     *  later used to instantiate a collator using ucol_openBinary.
    447     *  This API supports preflighting.
    448     *  @param buffer a fill-in buffer to receive the binary image
    449     *  @param capacity capacity of the destination buffer
    450     *  @param status for catching errors
    451     *  @return size of the image
    452     *  @see ucol_openBinary
    453     *  @stable ICU 3.4
    454     */
    455     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
    456 
    457     /**
    458      * Returns current rules. Delta defines whether full rules are returned or
    459      * just the tailoring.
    460      *
    461      * getRules(void) should normally be used instead.
    462      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
    463      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    464      * @param buffer UnicodeString to store the result rules
    465      * @stable ICU 2.2
    466      * @see UCOL_FULL_RULES
    467      */
    468     void getRules(UColRuleOption delta, UnicodeString &buffer);
    469 
    470     /**
    471      * Universal attribute setter
    472      * @param attr attribute type
    473      * @param value attribute value
    474      * @param status to indicate whether the operation went on smoothly or there were errors
    475      * @stable ICU 2.2
    476      */
    477     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    478                               UErrorCode &status);
    479 
    480     /**
    481      * Universal attribute getter.
    482      * @param attr attribute type
    483      * @param status to indicate whether the operation went on smoothly or there were errors
    484      * @return attribute value
    485      * @stable ICU 2.2
    486      */
    487     virtual UColAttributeValue getAttribute(UColAttribute attr,
    488                                             UErrorCode &status) const;
    489 
    490     /**
    491      * Sets the variable top to a collation element value of a string supplied.
    492      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    493      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    494      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    495      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    496      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    497      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    498      * @stable ICU 2.0
    499      */
    500     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
    501 
    502     /**
    503      * Sets the variable top to a collation element value of a string supplied.
    504      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    505      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    506      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    507      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    508      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    509      * @stable ICU 2.0
    510      */
    511     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
    512 
    513     /**
    514      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
    515      * Lower 16 bits are ignored.
    516      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
    517      * @param status error code (not changed by function)
    518      * @stable ICU 2.0
    519      */
    520     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
    521 
    522     /**
    523      * Gets the variable top value of a Collator.
    524      * Lower 16 bits are undefined and should be ignored.
    525      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    526      * @stable ICU 2.0
    527      */
    528     virtual uint32_t getVariableTop(UErrorCode &status) const;
    529 
    530     /**
    531      * Get an UnicodeSet that contains all the characters and sequences tailored in
    532      * this collator.
    533      * @param status      error code of the operation
    534      * @return a pointer to a UnicodeSet object containing all the
    535      *         code points and sequences that may sort differently than
    536      *         in the UCA. The object must be disposed of by using delete
    537      * @stable ICU 2.4
    538      */
    539     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    540 
    541     /**
    542      * Get the sort key as an array of bytes from an UnicodeString.
    543      * @param source string to be processed.
    544      * @param result buffer to store result in. If NULL, number of bytes needed
    545      *        will be returned.
    546      * @param resultLength length of the result buffer. If if not enough the
    547      *        buffer will be filled to capacity.
    548      * @return Number of bytes needed for storing the sort key
    549      * @stable ICU 2.0
    550      */
    551     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    552                                int32_t resultLength) const;
    553 
    554     /**
    555      * Get the sort key as an array of bytes from an UChar buffer.
    556      * @param source string to be processed.
    557      * @param sourceLength length of string to be processed. If -1, the string
    558      *        is 0 terminated and length will be decided by the function.
    559      * @param result buffer to store result in. If NULL, number of bytes needed
    560      *        will be returned.
    561      * @param resultLength length of the result buffer. If if not enough the
    562      *        buffer will be filled to capacity.
    563      * @return Number of bytes needed for storing the sort key
    564      * @stable ICU 2.2
    565      */
    566     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
    567                                uint8_t *result, int32_t resultLength) const;
    568 
    569     /**
    570      * Retrieves the reordering codes for this collator.
    571      * @param dest The array to fill with the script ordering.
    572      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    573      *  will only return the length of the result without writing any of the result string (pre-flighting).
    574      * @param status A reference to an error code value, which must not indicate
    575      * a failure before the function call.
    576      * @return The length of the script ordering array.
    577      * @see ucol_setReorderCodes
    578      * @see Collator#getEquivalentReorderCodes
    579      * @see Collator#setReorderCodes
    580      * @stable ICU 4.8
    581      */
    582      virtual int32_t getReorderCodes(int32_t *dest,
    583                                      int32_t destCapacity,
    584                                      UErrorCode& status) const;
    585 
    586     /**
    587      * Sets the ordering of scripts for this collator.
    588      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    589      * length is also set to 0. An empty array will clear any reordering codes on the collator.
    590      * @param reorderCodesLength The length of reorderCodes.
    591      * @param status error code
    592      * @see Collator#getReorderCodes
    593      * @see Collator#getEquivalentReorderCodes
    594      * @stable ICU 4.8
    595      */
    596      virtual void setReorderCodes(const int32_t* reorderCodes,
    597                                   int32_t reorderCodesLength,
    598                                   UErrorCode& status) ;
    599 
    600     /**
    601      * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
    602      * codes will be grouped and must reorder together.
    603      * @param reorderCode The reorder code to determine equivalence for.
    604      * @param dest The array to fill with the script equivalene reordering codes.
    605      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
    606      * function will only return the length of the result without writing any of the result
    607      * string (pre-flighting).
    608      * @param status A reference to an error code value, which must not indicate
    609      * a failure before the function call.
    610      * @return The length of the of the reordering code equivalence array.
    611      * @see ucol_setReorderCodes
    612      * @see Collator#getReorderCodes
    613      * @see Collator#setReorderCodes
    614      * @stable ICU 4.8
    615      */
    616     static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
    617                                 int32_t* dest,
    618                                 int32_t destCapacity,
    619                                 UErrorCode& status);
    620 
    621 private:
    622 
    623     // private static constants -----------------------------------------------
    624 
    625     enum {
    626         /* need look up in .commit() */
    627         CHARINDEX = 0x70000000,
    628         /* Expand index follows */
    629         EXPANDCHARINDEX = 0x7E000000,
    630         /* contract indexes follows */
    631         CONTRACTCHARINDEX = 0x7F000000,
    632         /* unmapped character values */
    633         UNMAPPED = 0xFFFFFFFF,
    634         /* primary strength increment */
    635         PRIMARYORDERINCREMENT = 0x00010000,
    636         /* secondary strength increment */
    637         SECONDARYORDERINCREMENT = 0x00000100,
    638         /* tertiary strength increment */
    639         TERTIARYORDERINCREMENT = 0x00000001,
    640         /* mask off anything but primary order */
    641         PRIMARYORDERMASK = 0xffff0000,
    642         /* mask off anything but secondary order */
    643         SECONDARYORDERMASK = 0x0000ff00,
    644         /* mask off anything but tertiary order */
    645         TERTIARYORDERMASK = 0x000000ff,
    646         /* mask off ignorable char order */
    647         IGNORABLEMASK = 0x0000ffff,
    648         /* use only the primary difference */
    649         PRIMARYDIFFERENCEONLY = 0xffff0000,
    650         /* use only the primary and secondary difference */
    651         SECONDARYDIFFERENCEONLY = 0xffffff00,
    652         /* primary order shift */
    653         PRIMARYORDERSHIFT = 16,
    654         /* secondary order shift */
    655         SECONDARYORDERSHIFT = 8,
    656         /* starting value for collation elements */
    657         COLELEMENTSTART = 0x02020202,
    658         /* testing mask for primary low element */
    659         PRIMARYLOWZEROMASK = 0x00FF0000,
    660         /* reseting value for secondaries and tertiaries */
    661         RESETSECONDARYTERTIARY = 0x00000202,
    662         /* reseting value for tertiaries */
    663         RESETTERTIARY = 0x00000002,
    664 
    665         PRIMIGNORABLE = 0x0202
    666     };
    667 
    668     // private data members ---------------------------------------------------
    669 
    670     UBool dataIsOwned;
    671 
    672     UBool isWriteThroughAlias;
    673 
    674     /**
    675     * c struct for collation. All initialisation for it has to be done through
    676     * setUCollator().
    677     */
    678     UCollator *ucollator;
    679 
    680     /**
    681     * Rule UnicodeString
    682     */
    683     UnicodeString urulestring;
    684 
    685     // friend classes --------------------------------------------------------
    686 
    687     /**
    688     * Used to iterate over collation elements in a character source.
    689     */
    690     friend class CollationElementIterator;
    691 
    692     /**
    693     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
    694     *                                                       UErrorCode&)
    695     */
    696     friend class Collator;
    697 
    698     /**
    699     * Searching over collation elements in a character source
    700     */
    701     friend class StringSearch;
    702 
    703     // private constructors --------------------------------------------------
    704 
    705     /**
    706      * Default constructor
    707      */
    708     RuleBasedCollator();
    709 
    710     /**
    711      * RuleBasedCollator constructor. This constructor takes a locale. The
    712      * only caller of this class should be Collator::createInstance(). If
    713      * createInstance() happens to know that the requested locale's collation is
    714      * implemented as a RuleBasedCollator, it can then call this constructor.
    715      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
    716      * COLLATION TABLE. It does this by falling back to defaults.
    717      * @param desiredLocale locale used
    718      * @param status error code status
    719      */
    720     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
    721 
    722     /**
    723      * common constructor implementation
    724      *
    725      * @param rules the collation rules to build the collation table from.
    726      * @param collationStrength default strength for comparison
    727      * @param decompositionMode the normalisation mode
    728      * @param status reporting a success or an error.
    729      */
    730     void
    731     construct(const UnicodeString& rules,
    732               UColAttributeValue collationStrength,
    733               UColAttributeValue decompositionMode,
    734               UErrorCode& status);
    735 
    736     // private methods -------------------------------------------------------
    737 
    738     /**
    739     * Creates the c struct for ucollator
    740     * @param locale desired locale
    741     * @param status error status
    742     */
    743     void setUCollator(const Locale& locale, UErrorCode& status);
    744 
    745     /**
    746     * Creates the c struct for ucollator
    747     * @param locale desired locale name
    748     * @param status error status
    749     */
    750     void setUCollator(const char* locale, UErrorCode& status);
    751 
    752     /**
    753     * Creates the c struct for ucollator. This used internally by StringSearch.
    754     * Hence the responsibility of cleaning up the ucollator is not done by
    755     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
    756     * @param collator new ucollator data
    757     */
    758     void setUCollator(UCollator *collator);
    759 
    760 public:
    761 #ifndef U_HIDE_INTERNAL_API
    762     /**
    763     * Get UCollator data struct. Used only by StringSearch & intltest.
    764     * @return UCollator data struct
    765     * @internal
    766     */
    767     const UCollator * getUCollator();
    768 #endif  /* U_HIDE_INTERNAL_API */
    769 
    770 protected:
    771    /**
    772     * Used internally by registraton to define the requested and valid locales.
    773     * @param requestedLocale the requsted locale
    774     * @param validLocale the valid locale
    775     * @param actualLocale the actual locale
    776     * @internal
    777     */
    778     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    779 
    780 private:
    781     // if not owned and not a write through alias, copy the ucollator
    782     void checkOwned(void);
    783 
    784     // utility to init rule string used by checkOwned and construct
    785     void setRuleStringFromCollator();
    786 
    787 public:
    788     /** Get the short definition string for a collator. This internal API harvests the collator's
    789      *  locale and the attribute set and produces a string that can be used for opening
    790      *  a collator with the same properties using the ucol_openFromShortString API.
    791      *  This string will be normalized.
    792      *  The structure and the syntax of the string is defined in the "Naming collators"
    793      *  section of the users guide:
    794      *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
    795      *  This function supports preflighting.
    796      *
    797      *  This is internal, and intended to be used with delegate converters.
    798      *
    799      *  @param locale a locale that will appear as a collators locale in the resulting
    800      *                short string definition. If NULL, the locale will be harvested
    801      *                from the collator.
    802      *  @param buffer space to hold the resulting string
    803      *  @param capacity capacity of the buffer
    804      *  @param status for returning errors. All the preflighting errors are featured
    805      *  @return length of the resulting string
    806      *  @see ucol_openFromShortString
    807      *  @see ucol_normalizeShortDefinitionString
    808      *  @see ucol_getShortDefinitionString
    809      *  @internal
    810      */
    811     virtual int32_t internalGetShortDefinitionString(const char *locale,
    812                                                      char *buffer,
    813                                                      int32_t capacity,
    814                                                      UErrorCode &status) const;
    815 };
    816 
    817 // inline method implementation ---------------------------------------------
    818 
    819 inline void RuleBasedCollator::setUCollator(const Locale &locale,
    820                                                UErrorCode &status)
    821 {
    822     setUCollator(locale.getName(), status);
    823 }
    824 
    825 
    826 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
    827 {
    828 
    829     if (ucollator && dataIsOwned) {
    830         ucol_close(ucollator);
    831     }
    832     ucollator   = collator;
    833     dataIsOwned = FALSE;
    834     isWriteThroughAlias = TRUE;
    835     setRuleStringFromCollator();
    836 }
    837 
    838 #ifndef U_HIDE_INTERNAL_API
    839 inline const UCollator * RuleBasedCollator::getUCollator()
    840 {
    841     return ucollator;
    842 }
    843 #endif
    844 
    845 U_NAMESPACE_END
    846 
    847 #endif /* #if !UCONFIG_NO_COLLATION */
    848 
    849 #endif
    850