Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1996-2013, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
     11  */
     12 
     13 /**
     14 * File tblcoll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     22 *                          constructor which reads RuleBasedCollator object from
     23 *                          a binary file.  Added writeToFile method which streams
     24 *                          RuleBasedCollator out to a binary file.  The streamIn
     25 *                          and streamOut methods use istream and ostream objects
     26 *                          in binary mode.
     27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     28 *                          hold invariant data.
     29 *  2/13/97     aliu        Moved several methods into this class from Collation.
     30 *                          Added a private RuleBasedCollator(Locale&) constructor,
     31 *                          to be used by Collator::createDefault().  General
     32 *                          clean up.
     33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     34 *                          constructor and getDynamicClassID.
     35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     36 *                          specifying whether or not binary loading is to be
     37 *                          attempted.  This is required for dynamic rule loading.
     38 * 05/07/97     helena      Added memory allocation error detection.
     39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     40 *                          use MergeCollation::getPattern.
     41 *  6/20/97     helena      Java class name change.
     42 *  8/18/97     helena      Added internal API documentation.
     43 * 09/03/97     helena      Added createCollationKeyValues().
     44 * 02/10/98     damiba      Added compare with "length" parameter
     45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     47 *                          Normalizer::EMode
     48 * 06/14/99     stephen     Removed kResourceBundleSuffix
     49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     50 *                          UnicodeString construction and special case for NO_OP.
     51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     52 *                          internal state management.
     53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     54 *                          to implementation file.
     55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     56 *                          (ucol.h)
     57 */
     58 
     59 #ifndef TBLCOLL_H
     60 #define TBLCOLL_H
     61 
     62 #include "unicode/utypes.h"
     63 
     64 
     65 #if !UCONFIG_NO_COLLATION
     66 
     67 #include "unicode/coll.h"
     68 #include "unicode/ucol.h"
     69 #include "unicode/sortkey.h"
     70 #include "unicode/normlzr.h"
     71 
     72 U_NAMESPACE_BEGIN
     73 
     74 /**
     75 * @stable ICU 2.0
     76 */
     77 class StringSearch;
     78 /**
     79 * @stable ICU 2.0
     80 */
     81 class CollationElementIterator;
     82 
     83 /**
     84  * The RuleBasedCollator class provides the simple implementation of
     85  * Collator, using data-driven tables. The user can create a customized
     86  * table-based collation.
     87  * <P>
     88  * <em>Important: </em>The ICU collation service has been reimplemented
     89  * in order to achieve better performance and UCA compliance.
     90  * For details, see the
     91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
     92  * collation design document</a>.
     93  * <p>
     94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
     95  * <p>
     96  * For more information about the collation service see
     97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
     98  * <p>
     99  * Collation service provides correct sorting orders for most locales supported in ICU.
    100  * If specific data for a locale is not available, the orders eventually falls back
    101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
    102  * <p>
    103  * Sort ordering may be customized by providing your own set of rules. For more on
    104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
    105  * Collation customization</a> section of the users guide.
    106  * <p>
    107  * Note, RuleBasedCollator is not to be subclassed.
    108  * @see        Collator
    109  * @version    2.0 11/15/2001
    110  */
    111 class U_I18N_API RuleBasedCollator : public Collator
    112 {
    113 public:
    114 
    115   // constructor -------------------------------------------------------------
    116 
    117     /**
    118      * RuleBasedCollator constructor. This takes the table rules and builds a
    119      * collation table out of them. Please see RuleBasedCollator class
    120      * description for more details on the collation rule syntax.
    121      * @param rules the collation rules to build the collation table from.
    122      * @param status reporting a success or an error.
    123      * @see Locale
    124      * @stable ICU 2.0
    125      */
    126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    127 
    128     /**
    129      * RuleBasedCollator constructor. This takes the table rules and builds a
    130      * collation table out of them. Please see RuleBasedCollator class
    131      * description for more details on the collation rule syntax.
    132      * @param rules the collation rules to build the collation table from.
    133      * @param collationStrength default strength for comparison
    134      * @param status reporting a success or an error.
    135      * @see Locale
    136      * @stable ICU 2.0
    137      */
    138     RuleBasedCollator(const UnicodeString& rules,
    139                        ECollationStrength collationStrength,
    140                        UErrorCode& status);
    141 
    142     /**
    143      * RuleBasedCollator constructor. This takes the table rules and builds a
    144      * collation table out of them. Please see RuleBasedCollator class
    145      * description for more details on the collation rule syntax.
    146      * @param rules the collation rules to build the collation table from.
    147      * @param decompositionMode the normalisation mode
    148      * @param status reporting a success or an error.
    149      * @see Locale
    150      * @stable ICU 2.0
    151      */
    152     RuleBasedCollator(const UnicodeString& rules,
    153                     UColAttributeValue decompositionMode,
    154                     UErrorCode& status);
    155 
    156     /**
    157      * RuleBasedCollator constructor. This takes the table rules and builds a
    158      * collation table out of them. Please see RuleBasedCollator class
    159      * description for more details on the collation rule syntax.
    160      * @param rules the collation rules to build the collation table from.
    161      * @param collationStrength default strength for comparison
    162      * @param decompositionMode the normalisation mode
    163      * @param status reporting a success or an error.
    164      * @see Locale
    165      * @stable ICU 2.0
    166      */
    167     RuleBasedCollator(const UnicodeString& rules,
    168                     ECollationStrength collationStrength,
    169                     UColAttributeValue decompositionMode,
    170                     UErrorCode& status);
    171 
    172     /**
    173      * Copy constructor.
    174      * @param other the RuleBasedCollator object to be copied
    175      * @see Locale
    176      * @stable ICU 2.0
    177      */
    178     RuleBasedCollator(const RuleBasedCollator& other);
    179 
    180 
    181     /** Opens a collator from a collator binary image created using
    182     *  cloneBinary. Binary image used in instantiation of the
    183     *  collator remains owned by the user and should stay around for
    184     *  the lifetime of the collator. The API also takes a base collator
    185     *  which usualy should be UCA.
    186     *  @param bin binary image owned by the user and required through the
    187     *             lifetime of the collator
    188     *  @param length size of the image. If negative, the API will try to
    189     *                figure out the length of the image
    190     *  @param base fallback collator, usually UCA. Base is required to be
    191     *              present through the lifetime of the collator. Currently
    192     *              it cannot be NULL.
    193     *  @param status for catching errors
    194     *  @return newly created collator
    195     *  @see cloneBinary
    196     *  @stable ICU 3.4
    197     */
    198     RuleBasedCollator(const uint8_t *bin, int32_t length,
    199                     const RuleBasedCollator *base,
    200                     UErrorCode &status);
    201     // destructor --------------------------------------------------------------
    202 
    203     /**
    204      * Destructor.
    205      * @stable ICU 2.0
    206      */
    207     virtual ~RuleBasedCollator();
    208 
    209     // public methods ----------------------------------------------------------
    210 
    211     /**
    212      * Assignment operator.
    213      * @param other other RuleBasedCollator object to compare with.
    214      * @stable ICU 2.0
    215      */
    216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    217 
    218     /**
    219      * Returns true if argument is the same as this object.
    220      * @param other Collator object to be compared.
    221      * @return true if arguments is the same as this object.
    222      * @stable ICU 2.0
    223      */
    224     virtual UBool operator==(const Collator& other) const;
    225 
    226     /**
    227      * Makes a copy of this object.
    228      * @return a copy of this object, owned by the caller
    229      * @stable ICU 2.0
    230      */
    231     virtual Collator* clone(void) const;
    232 
    233     /**
    234      * Creates a collation element iterator for the source string. The caller of
    235      * this method is responsible for the memory management of the return
    236      * pointer.
    237      * @param source the string over which the CollationElementIterator will
    238      *        iterate.
    239      * @return the collation element iterator of the source string using this as
    240      *         the based Collator.
    241      * @stable ICU 2.2
    242      */
    243     virtual CollationElementIterator* createCollationElementIterator(
    244                                            const UnicodeString& source) const;
    245 
    246     /**
    247      * Creates a collation element iterator for the source. The caller of this
    248      * method is responsible for the memory management of the returned pointer.
    249      * @param source the CharacterIterator which produces the characters over
    250      *        which the CollationElementItgerator will iterate.
    251      * @return the collation element iterator of the source using this as the
    252      *         based Collator.
    253      * @stable ICU 2.2
    254      */
    255     virtual CollationElementIterator* createCollationElementIterator(
    256                                          const CharacterIterator& source) const;
    257 
    258     // Make deprecated versions of Collator::compare() visible.
    259     using Collator::compare;
    260 
    261     /**
    262     * The comparison function compares the character data stored in two
    263     * different strings. Returns information about whether a string is less
    264     * than, greater than or equal to another string.
    265     * @param source the source string to be compared with.
    266     * @param target the string that is to be compared with the source string.
    267     * @param status possible error code
    268     * @return Returns an enum value. UCOL_GREATER if source is greater
    269     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    270     * than target
    271     * @stable ICU 2.6
    272     **/
    273     virtual UCollationResult compare(const UnicodeString& source,
    274                                       const UnicodeString& target,
    275                                       UErrorCode &status) const;
    276 
    277     /**
    278     * Does the same thing as compare but limits the comparison to a specified
    279     * length
    280     * @param source the source string to be compared with.
    281     * @param target the string that is to be compared with the source string.
    282     * @param length the length the comparison is limited to
    283     * @param status possible error code
    284     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    285     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    286     *         length) is equal to target; UCOL_LESS if source (up to the specified
    287     *         length) is less  than target.
    288     * @stable ICU 2.6
    289     */
    290     virtual UCollationResult compare(const UnicodeString& source,
    291                                       const UnicodeString& target,
    292                                       int32_t length,
    293                                       UErrorCode &status) const;
    294 
    295     /**
    296     * The comparison function compares the character data stored in two
    297     * different string arrays. Returns information about whether a string array
    298     * is less than, greater than or equal to another string array.
    299     * @param source the source string array to be compared with.
    300     * @param sourceLength the length of the source string array.  If this value
    301     *        is equal to -1, the string array is null-terminated.
    302     * @param target the string that is to be compared with the source string.
    303     * @param targetLength the length of the target string array.  If this value
    304     *        is equal to -1, the string array is null-terminated.
    305     * @param status possible error code
    306     * @return Returns an enum value. UCOL_GREATER if source is greater
    307     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    308     * than target
    309     * @stable ICU 2.6
    310     */
    311     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    312                                       const UChar* target, int32_t targetLength,
    313                                       UErrorCode &status) const;
    314 
    315     /**
    316      * Compares two strings using the Collator.
    317      * Returns whether the first one compares less than/equal to/greater than
    318      * the second one.
    319      * This version takes UCharIterator input.
    320      * @param sIter the first ("source") string iterator
    321      * @param tIter the second ("target") string iterator
    322      * @param status ICU status
    323      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    324      * @stable ICU 4.2
    325      */
    326     virtual UCollationResult compare(UCharIterator &sIter,
    327                                      UCharIterator &tIter,
    328                                      UErrorCode &status) const;
    329 
    330     /**
    331     * Transforms a specified region of the string into a series of characters
    332     * that can be compared with CollationKey.compare. Use a CollationKey when
    333     * you need to do repeated comparisions on the same string. For a single
    334     * comparison the compare method will be faster.
    335     * @param source the source string.
    336     * @param key the transformed key of the source string.
    337     * @param status the error code status.
    338     * @return the transformed key.
    339     * @see CollationKey
    340     * @stable ICU 2.0
    341     */
    342     virtual CollationKey& getCollationKey(const UnicodeString& source,
    343                                           CollationKey& key,
    344                                           UErrorCode& status) const;
    345 
    346     /**
    347     * Transforms a specified region of the string into a series of characters
    348     * that can be compared with CollationKey.compare. Use a CollationKey when
    349     * you need to do repeated comparisions on the same string. For a single
    350     * comparison the compare method will be faster.
    351     * @param source the source string.
    352     * @param sourceLength the length of the source string.
    353     * @param key the transformed key of the source string.
    354     * @param status the error code status.
    355     * @return the transformed key.
    356     * @see CollationKey
    357     * @stable ICU 2.0
    358     */
    359     virtual CollationKey& getCollationKey(const UChar *source,
    360                                           int32_t sourceLength,
    361                                           CollationKey& key,
    362                                           UErrorCode& status) const;
    363 
    364     /**
    365      * Generates the hash code for the rule-based collation object.
    366      * @return the hash code.
    367      * @stable ICU 2.0
    368      */
    369     virtual int32_t hashCode(void) const;
    370 
    371     /**
    372     * Gets the locale of the Collator
    373     * @param type can be either requested, valid or actual locale. For more
    374     *             information see the definition of ULocDataLocaleType in
    375     *             uloc.h
    376     * @param status the error code status.
    377     * @return locale where the collation data lives. If the collator
    378     *         was instantiated from rules, locale is empty.
    379     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    380     */
    381     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    382 
    383     /**
    384      * Gets the tailoring rules for this collator.
    385      * @return the collation tailoring from which this collator was created
    386      * @stable ICU 2.0
    387      */
    388     const UnicodeString& getRules(void) const;
    389 
    390     /**
    391      * Gets the version information for a Collator.
    392      * @param info the version # information, the result will be filled in
    393      * @stable ICU 2.0
    394      */
    395     virtual void getVersion(UVersionInfo info) const;
    396 
    397 #ifndef U_HIDE_DEPRECATED_API
    398     /**
    399      * Returns the maximum length of any expansion sequences that end with the
    400      * specified comparison order.
    401      *
    402      * This is specific to the kind of collation element values and sequences
    403      * returned by the CollationElementIterator.
    404      * Call CollationElementIterator::getMaxExpansion() instead.
    405      *
    406      * @param order a collation order returned by CollationElementIterator::previous
    407      *              or CollationElementIterator::next.
    408      * @return maximum size of the expansion sequences ending with the collation
    409      *         element, or 1 if the collation element does not occur at the end of
    410      *         any expansion sequence
    411      * @see CollationElementIterator#getMaxExpansion
    412      * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
    413      */
    414     int32_t getMaxExpansion(int32_t order) const;
    415 #endif  /* U_HIDE_DEPRECATED_API */
    416 
    417     /**
    418      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    419      * method is to implement a simple version of RTTI, since not all C++
    420      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    421      * methods call this method.
    422      * @return The class ID for this object. All objects of a given class have
    423      *         the same class ID. Objects of other classes have different class
    424      *         IDs.
    425      * @stable ICU 2.0
    426      */
    427     virtual UClassID getDynamicClassID(void) const;
    428 
    429     /**
    430      * Returns the class ID for this class. This is useful only for comparing to
    431      * a return value from getDynamicClassID(). For example:
    432      * <pre>
    433      * Base* polymorphic_pointer = createPolymorphicObject();
    434      * if (polymorphic_pointer->getDynamicClassID() ==
    435      *                                          Derived::getStaticClassID()) ...
    436      * </pre>
    437      * @return The class ID for all objects of this class.
    438      * @stable ICU 2.0
    439      */
    440     static UClassID U_EXPORT2 getStaticClassID(void);
    441 
    442     /**
    443      * Returns the binary format of the class's rules. The format is that of
    444      * .col files.
    445      * @param length Returns the length of the data, in bytes
    446      * @param status the error code status.
    447      * @return memory, owned by the caller, of size 'length' bytes.
    448      * @stable ICU 2.2
    449      */
    450     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
    451 
    452 
    453     /** Creates a binary image of a collator. This binary image can be stored and
    454     *  later used to instantiate a collator using ucol_openBinary.
    455     *  This API supports preflighting.
    456     *  @param buffer a fill-in buffer to receive the binary image
    457     *  @param capacity capacity of the destination buffer
    458     *  @param status for catching errors
    459     *  @return size of the image
    460     *  @see ucol_openBinary
    461     *  @stable ICU 3.4
    462     */
    463     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
    464 
    465     /**
    466      * Returns current rules. Delta defines whether full rules are returned or
    467      * just the tailoring.
    468      *
    469      * getRules(void) should normally be used instead.
    470      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
    471      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    472      * @param buffer UnicodeString to store the result rules
    473      * @stable ICU 2.2
    474      * @see UCOL_FULL_RULES
    475      */
    476     void getRules(UColRuleOption delta, UnicodeString &buffer);
    477 
    478     /**
    479      * Universal attribute setter
    480      * @param attr attribute type
    481      * @param value attribute value
    482      * @param status to indicate whether the operation went on smoothly or there were errors
    483      * @stable ICU 2.2
    484      */
    485     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    486                               UErrorCode &status);
    487 
    488     /**
    489      * Universal attribute getter.
    490      * @param attr attribute type
    491      * @param status to indicate whether the operation went on smoothly or there were errors
    492      * @return attribute value
    493      * @stable ICU 2.2
    494      */
    495     virtual UColAttributeValue getAttribute(UColAttribute attr,
    496                                             UErrorCode &status) const;
    497 
    498     /**
    499      * Sets the variable top to a collation element value of a string supplied.
    500      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    501      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    502      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    503      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    504      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    505      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    506      * @stable ICU 2.0
    507      */
    508     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
    509 
    510     /**
    511      * Sets the variable top to a collation element value of a string supplied.
    512      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    513      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    514      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    515      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    516      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    517      * @stable ICU 2.0
    518      */
    519     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
    520 
    521     /**
    522      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
    523      * Lower 16 bits are ignored.
    524      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
    525      * @param status error code (not changed by function)
    526      * @stable ICU 2.0
    527      */
    528     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
    529 
    530     /**
    531      * Gets the variable top value of a Collator.
    532      * Lower 16 bits are undefined and should be ignored.
    533      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    534      * @stable ICU 2.0
    535      */
    536     virtual uint32_t getVariableTop(UErrorCode &status) const;
    537 
    538     /**
    539      * Get an UnicodeSet that contains all the characters and sequences tailored in
    540      * this collator.
    541      * @param status      error code of the operation
    542      * @return a pointer to a UnicodeSet object containing all the
    543      *         code points and sequences that may sort differently than
    544      *         in the UCA. The object must be disposed of by using delete
    545      * @stable ICU 2.4
    546      */
    547     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    548 
    549     /**
    550      * Get the sort key as an array of bytes from an UnicodeString.
    551      * @param source string to be processed.
    552      * @param result buffer to store result in. If NULL, number of bytes needed
    553      *        will be returned.
    554      * @param resultLength length of the result buffer. If if not enough the
    555      *        buffer will be filled to capacity.
    556      * @return Number of bytes needed for storing the sort key
    557      * @stable ICU 2.0
    558      */
    559     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    560                                int32_t resultLength) const;
    561 
    562     /**
    563      * Get the sort key as an array of bytes from an UChar buffer.
    564      * @param source string to be processed.
    565      * @param sourceLength length of string to be processed. If -1, the string
    566      *        is 0 terminated and length will be decided by the function.
    567      * @param result buffer to store result in. If NULL, number of bytes needed
    568      *        will be returned.
    569      * @param resultLength length of the result buffer. If if not enough the
    570      *        buffer will be filled to capacity.
    571      * @return Number of bytes needed for storing the sort key
    572      * @stable ICU 2.2
    573      */
    574     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
    575                                uint8_t *result, int32_t resultLength) const;
    576 
    577     /**
    578      * Retrieves the reordering codes for this collator.
    579      * @param dest The array to fill with the script ordering.
    580      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    581      *  will only return the length of the result without writing any of the result string (pre-flighting).
    582      * @param status A reference to an error code value, which must not indicate
    583      * a failure before the function call.
    584      * @return The length of the script ordering array.
    585      * @see ucol_setReorderCodes
    586      * @see Collator#getEquivalentReorderCodes
    587      * @see Collator#setReorderCodes
    588      * @stable ICU 4.8
    589      */
    590      virtual int32_t getReorderCodes(int32_t *dest,
    591                                      int32_t destCapacity,
    592                                      UErrorCode& status) const;
    593 
    594     /**
    595      * Sets the ordering of scripts for this collator.
    596      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    597      * length is also set to 0. An empty array will clear any reordering codes on the collator.
    598      * @param reorderCodesLength The length of reorderCodes.
    599      * @param status error code
    600      * @see Collator#getReorderCodes
    601      * @see Collator#getEquivalentReorderCodes
    602      * @stable ICU 4.8
    603      */
    604      virtual void setReorderCodes(const int32_t* reorderCodes,
    605                                   int32_t reorderCodesLength,
    606                                   UErrorCode& status) ;
    607 
    608     /**
    609      * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
    610      * codes will be grouped and must reorder together.
    611      * @param reorderCode The reorder code to determine equivalence for.
    612      * @param dest The array to fill with the script equivalene reordering codes.
    613      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
    614      * function will only return the length of the result without writing any of the result
    615      * string (pre-flighting).
    616      * @param status A reference to an error code value, which must not indicate
    617      * a failure before the function call.
    618      * @return The length of the of the reordering code equivalence array.
    619      * @see ucol_setReorderCodes
    620      * @see Collator#getReorderCodes
    621      * @see Collator#setReorderCodes
    622      * @stable ICU 4.8
    623      */
    624     static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
    625                                 int32_t* dest,
    626                                 int32_t destCapacity,
    627                                 UErrorCode& status);
    628 
    629 private:
    630 
    631     // private static constants -----------------------------------------------
    632 
    633     enum {
    634         /* need look up in .commit() */
    635         CHARINDEX = 0x70000000,
    636         /* Expand index follows */
    637         EXPANDCHARINDEX = 0x7E000000,
    638         /* contract indexes follows */
    639         CONTRACTCHARINDEX = 0x7F000000,
    640         /* unmapped character values */
    641         UNMAPPED = 0xFFFFFFFF,
    642         /* primary strength increment */
    643         PRIMARYORDERINCREMENT = 0x00010000,
    644         /* secondary strength increment */
    645         SECONDARYORDERINCREMENT = 0x00000100,
    646         /* tertiary strength increment */
    647         TERTIARYORDERINCREMENT = 0x00000001,
    648         /* mask off anything but primary order */
    649         PRIMARYORDERMASK = 0xffff0000,
    650         /* mask off anything but secondary order */
    651         SECONDARYORDERMASK = 0x0000ff00,
    652         /* mask off anything but tertiary order */
    653         TERTIARYORDERMASK = 0x000000ff,
    654         /* mask off ignorable char order */
    655         IGNORABLEMASK = 0x0000ffff,
    656         /* use only the primary difference */
    657         PRIMARYDIFFERENCEONLY = 0xffff0000,
    658         /* use only the primary and secondary difference */
    659         SECONDARYDIFFERENCEONLY = 0xffffff00,
    660         /* primary order shift */
    661         PRIMARYORDERSHIFT = 16,
    662         /* secondary order shift */
    663         SECONDARYORDERSHIFT = 8,
    664         /* starting value for collation elements */
    665         COLELEMENTSTART = 0x02020202,
    666         /* testing mask for primary low element */
    667         PRIMARYLOWZEROMASK = 0x00FF0000,
    668         /* reseting value for secondaries and tertiaries */
    669         RESETSECONDARYTERTIARY = 0x00000202,
    670         /* reseting value for tertiaries */
    671         RESETTERTIARY = 0x00000002,
    672 
    673         PRIMIGNORABLE = 0x0202
    674     };
    675 
    676     // private data members ---------------------------------------------------
    677 
    678     UBool dataIsOwned;
    679 
    680     UBool isWriteThroughAlias;
    681 
    682     /**
    683     * c struct for collation. All initialisation for it has to be done through
    684     * setUCollator().
    685     */
    686     UCollator *ucollator;
    687 
    688     /**
    689     * Rule UnicodeString
    690     */
    691     UnicodeString urulestring;
    692 
    693     // friend classes --------------------------------------------------------
    694 
    695     /**
    696     * Used to iterate over collation elements in a character source.
    697     */
    698     friend class CollationElementIterator;
    699 
    700     /**
    701     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
    702     *                                                       UErrorCode&)
    703     */
    704     friend class Collator;
    705 
    706     /**
    707     * Searching over collation elements in a character source
    708     */
    709     friend class StringSearch;
    710 
    711     // private constructors --------------------------------------------------
    712 
    713     /**
    714      * Default constructor
    715      */
    716     RuleBasedCollator();
    717 
    718     /**
    719      * RuleBasedCollator constructor. This constructor takes a locale. The
    720      * only caller of this class should be Collator::createInstance(). If
    721      * createInstance() happens to know that the requested locale's collation is
    722      * implemented as a RuleBasedCollator, it can then call this constructor.
    723      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
    724      * COLLATION TABLE. It does this by falling back to defaults.
    725      * @param desiredLocale locale used
    726      * @param status error code status
    727      */
    728     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
    729 
    730     /**
    731      * common constructor implementation
    732      *
    733      * @param rules the collation rules to build the collation table from.
    734      * @param collationStrength default strength for comparison
    735      * @param decompositionMode the normalisation mode
    736      * @param status reporting a success or an error.
    737      */
    738     void
    739     construct(const UnicodeString& rules,
    740               UColAttributeValue collationStrength,
    741               UColAttributeValue decompositionMode,
    742               UErrorCode& status);
    743 
    744     // private methods -------------------------------------------------------
    745 
    746     /**
    747     * Creates the c struct for ucollator
    748     * @param locale desired locale
    749     * @param status error status
    750     */
    751     void setUCollator(const Locale& locale, UErrorCode& status);
    752 
    753     /**
    754     * Creates the c struct for ucollator
    755     * @param locale desired locale name
    756     * @param status error status
    757     */
    758     void setUCollator(const char* locale, UErrorCode& status);
    759 
    760     /**
    761     * Creates the c struct for ucollator. This used internally by StringSearch.
    762     * Hence the responsibility of cleaning up the ucollator is not done by
    763     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
    764     * @param collator new ucollator data
    765     */
    766     void setUCollator(UCollator *collator);
    767 
    768 public:
    769 #ifndef U_HIDE_INTERNAL_API
    770     /**
    771     * Get UCollator data struct. Used only by StringSearch & intltest.
    772     * @return UCollator data struct
    773     * @internal
    774     */
    775     const UCollator * getUCollator();
    776 #endif  /* U_HIDE_INTERNAL_API */
    777 
    778 protected:
    779    /**
    780     * Used internally by registraton to define the requested and valid locales.
    781     * @param requestedLocale the requsted locale
    782     * @param validLocale the valid locale
    783     * @param actualLocale the actual locale
    784     * @internal
    785     */
    786     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    787 
    788 private:
    789     // if not owned and not a write through alias, copy the ucollator
    790     void checkOwned(void);
    791 
    792     // utility to init rule string used by checkOwned and construct
    793     void setRuleStringFromCollator();
    794 
    795 public:
    796     /** Get the short definition string for a collator. This internal API harvests the collator's
    797      *  locale and the attribute set and produces a string that can be used for opening
    798      *  a collator with the same properties using the ucol_openFromShortString API.
    799      *  This string will be normalized.
    800      *  The structure and the syntax of the string is defined in the "Naming collators"
    801      *  section of the users guide:
    802      *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
    803      *  This function supports preflighting.
    804      *
    805      *  This is internal, and intended to be used with delegate converters.
    806      *
    807      *  @param locale a locale that will appear as a collators locale in the resulting
    808      *                short string definition. If NULL, the locale will be harvested
    809      *                from the collator.
    810      *  @param buffer space to hold the resulting string
    811      *  @param capacity capacity of the buffer
    812      *  @param status for returning errors. All the preflighting errors are featured
    813      *  @return length of the resulting string
    814      *  @see ucol_openFromShortString
    815      *  @see ucol_normalizeShortDefinitionString
    816      *  @see ucol_getShortDefinitionString
    817      *  @internal
    818      */
    819     virtual int32_t internalGetShortDefinitionString(const char *locale,
    820                                                      char *buffer,
    821                                                      int32_t capacity,
    822                                                      UErrorCode &status) const;
    823 };
    824 
    825 // inline method implementation ---------------------------------------------
    826 
    827 inline void RuleBasedCollator::setUCollator(const Locale &locale,
    828                                                UErrorCode &status)
    829 {
    830     setUCollator(locale.getName(), status);
    831 }
    832 
    833 
    834 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
    835 {
    836 
    837     if (ucollator && dataIsOwned) {
    838         ucol_close(ucollator);
    839     }
    840     ucollator   = collator;
    841     dataIsOwned = FALSE;
    842     isWriteThroughAlias = TRUE;
    843     setRuleStringFromCollator();
    844 }
    845 
    846 inline const UCollator * RuleBasedCollator::getUCollator()
    847 {
    848     return ucollator;
    849 }
    850 
    851 U_NAMESPACE_END
    852 
    853 #endif /* #if !UCONFIG_NO_COLLATION */
    854 
    855 #endif
    856