Home | History | Annotate | Download | only in unicode
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1996-2013, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 ******************************************************************************
      6 */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
     11  */
     12 
     13 /**
     14 * File tblcoll.h
     15 *
     16 * Created by: Helena Shih
     17 *
     18 * Modification History:
     19 *
     20 *  Date        Name        Description
     21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     22 *                          constructor which reads RuleBasedCollator object from
     23 *                          a binary file.  Added writeToFile method which streams
     24 *                          RuleBasedCollator out to a binary file.  The streamIn
     25 *                          and streamOut methods use istream and ostream objects
     26 *                          in binary mode.
     27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     28 *                          hold invariant data.
     29 *  2/13/97     aliu        Moved several methods into this class from Collation.
     30 *                          Added a private RuleBasedCollator(Locale&) constructor,
     31 *                          to be used by Collator::createDefault().  General
     32 *                          clean up.
     33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     34 *                          constructor and getDynamicClassID.
     35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
     36 *                          specifying whether or not binary loading is to be
     37 *                          attempted.  This is required for dynamic rule loading.
     38 * 05/07/97     helena      Added memory allocation error detection.
     39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
     40 *                          use MergeCollation::getPattern.
     41 *  6/20/97     helena      Java class name change.
     42 *  8/18/97     helena      Added internal API documentation.
     43 * 09/03/97     helena      Added createCollationKeyValues().
     44 * 02/10/98     damiba      Added compare with "length" parameter
     45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
     47 *                          Normalizer::EMode
     48 * 06/14/99     stephen     Removed kResourceBundleSuffix
     49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
     50 *                          UnicodeString construction and special case for NO_OP.
     51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
     52 *                          internal state management.
     53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     54 *                          to implementation file.
     55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
     56 *                          (ucol.h)
     57 */
     58 
     59 #ifndef TBLCOLL_H
     60 #define TBLCOLL_H
     61 
     62 #include "unicode/utypes.h"
     63 
     64 
     65 #if !UCONFIG_NO_COLLATION
     66 
     67 #include "unicode/coll.h"
     68 #include "unicode/ucol.h"
     69 #include "unicode/sortkey.h"
     70 #include "unicode/normlzr.h"
     71 
     72 U_NAMESPACE_BEGIN
     73 
     74 /**
     75 * @stable ICU 2.0
     76 */
     77 class StringSearch;
     78 /**
     79 * @stable ICU 2.0
     80 */
     81 class CollationElementIterator;
     82 
     83 /**
     84  * The RuleBasedCollator class provides the simple implementation of
     85  * Collator, using data-driven tables. The user can create a customized
     86  * table-based collation.
     87  * <P>
     88  * <em>Important: </em>The ICU collation service has been reimplemented
     89  * in order to achieve better performance and UCA compliance.
     90  * For details, see the
     91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
     92  * collation design document</a>.
     93  * <p>
     94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
     95  * <p>
     96  * For more information about the collation service see
     97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
     98  * <p>
     99  * Collation service provides correct sorting orders for most locales supported in ICU.
    100  * If specific data for a locale is not available, the orders eventually falls back
    101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
    102  * <p>
    103  * Sort ordering may be customized by providing your own set of rules. For more on
    104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
    105  * Collation customization</a> section of the users guide.
    106  * <p>
    107  * Note, RuleBasedCollator is not to be subclassed.
    108  * @see        Collator
    109  * @version    2.0 11/15/2001
    110  */
    111 class U_I18N_API RuleBasedCollator : public Collator
    112 {
    113 public:
    114 
    115   // constructor -------------------------------------------------------------
    116 
    117     /**
    118      * RuleBasedCollator constructor. This takes the table rules and builds a
    119      * collation table out of them. Please see RuleBasedCollator class
    120      * description for more details on the collation rule syntax.
    121      * @param rules the collation rules to build the collation table from.
    122      * @param status reporting a success or an error.
    123      * @see Locale
    124      * @stable ICU 2.0
    125      */
    126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
    127 
    128     /**
    129      * RuleBasedCollator constructor. This takes the table rules and builds a
    130      * collation table out of them. Please see RuleBasedCollator class
    131      * description for more details on the collation rule syntax.
    132      * @param rules the collation rules to build the collation table from.
    133      * @param collationStrength default strength for comparison
    134      * @param status reporting a success or an error.
    135      * @see Locale
    136      * @stable ICU 2.0
    137      */
    138     RuleBasedCollator(const UnicodeString& rules,
    139                        ECollationStrength collationStrength,
    140                        UErrorCode& status);
    141 
    142     /**
    143      * RuleBasedCollator constructor. This takes the table rules and builds a
    144      * collation table out of them. Please see RuleBasedCollator class
    145      * description for more details on the collation rule syntax.
    146      * @param rules the collation rules to build the collation table from.
    147      * @param decompositionMode the normalisation mode
    148      * @param status reporting a success or an error.
    149      * @see Locale
    150      * @stable ICU 2.0
    151      */
    152     RuleBasedCollator(const UnicodeString& rules,
    153                     UColAttributeValue decompositionMode,
    154                     UErrorCode& status);
    155 
    156     /**
    157      * RuleBasedCollator constructor. This takes the table rules and builds a
    158      * collation table out of them. Please see RuleBasedCollator class
    159      * description for more details on the collation rule syntax.
    160      * @param rules the collation rules to build the collation table from.
    161      * @param collationStrength default strength for comparison
    162      * @param decompositionMode the normalisation mode
    163      * @param status reporting a success or an error.
    164      * @see Locale
    165      * @stable ICU 2.0
    166      */
    167     RuleBasedCollator(const UnicodeString& rules,
    168                     ECollationStrength collationStrength,
    169                     UColAttributeValue decompositionMode,
    170                     UErrorCode& status);
    171 
    172     /**
    173      * Copy constructor.
    174      * @param other the RuleBasedCollator object to be copied
    175      * @see Locale
    176      * @stable ICU 2.0
    177      */
    178     RuleBasedCollator(const RuleBasedCollator& other);
    179 
    180 
    181     /** Opens a collator from a collator binary image created using
    182     *  cloneBinary. Binary image used in instantiation of the
    183     *  collator remains owned by the user and should stay around for
    184     *  the lifetime of the collator. The API also takes a base collator
    185     *  which usualy should be UCA.
    186     *  @param bin binary image owned by the user and required through the
    187     *             lifetime of the collator
    188     *  @param length size of the image. If negative, the API will try to
    189     *                figure out the length of the image
    190     *  @param base fallback collator, usually UCA. Base is required to be
    191     *              present through the lifetime of the collator. Currently
    192     *              it cannot be NULL.
    193     *  @param status for catching errors
    194     *  @return newly created collator
    195     *  @see cloneBinary
    196     *  @stable ICU 3.4
    197     */
    198     RuleBasedCollator(const uint8_t *bin, int32_t length,
    199                     const RuleBasedCollator *base,
    200                     UErrorCode &status);
    201     // destructor --------------------------------------------------------------
    202 
    203     /**
    204      * Destructor.
    205      * @stable ICU 2.0
    206      */
    207     virtual ~RuleBasedCollator();
    208 
    209     // public methods ----------------------------------------------------------
    210 
    211     /**
    212      * Assignment operator.
    213      * @param other other RuleBasedCollator object to compare with.
    214      * @stable ICU 2.0
    215      */
    216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
    217 
    218     /**
    219      * Returns true if argument is the same as this object.
    220      * @param other Collator object to be compared.
    221      * @return true if arguments is the same as this object.
    222      * @stable ICU 2.0
    223      */
    224     virtual UBool operator==(const Collator& other) const;
    225 
    226     /**
    227      * Makes a copy of this object.
    228      * @return a copy of this object, owned by the caller
    229      * @stable ICU 2.0
    230      */
    231     virtual Collator* clone(void) const;
    232 
    233     /**
    234      * Creates a collation element iterator for the source string. The caller of
    235      * this method is responsible for the memory management of the return
    236      * pointer.
    237      * @param source the string over which the CollationElementIterator will
    238      *        iterate.
    239      * @return the collation element iterator of the source string using this as
    240      *         the based Collator.
    241      * @stable ICU 2.2
    242      */
    243     virtual CollationElementIterator* createCollationElementIterator(
    244                                            const UnicodeString& source) const;
    245 
    246     /**
    247      * Creates a collation element iterator for the source. The caller of this
    248      * method is responsible for the memory management of the returned pointer.
    249      * @param source the CharacterIterator which produces the characters over
    250      *        which the CollationElementItgerator will iterate.
    251      * @return the collation element iterator of the source using this as the
    252      *         based Collator.
    253      * @stable ICU 2.2
    254      */
    255     virtual CollationElementIterator* createCollationElementIterator(
    256                                          const CharacterIterator& source) const;
    257 
    258     // Make deprecated versions of Collator::compare() visible.
    259     using Collator::compare;
    260 
    261     /**
    262     * The comparison function compares the character data stored in two
    263     * different strings. Returns information about whether a string is less
    264     * than, greater than or equal to another string.
    265     * @param source the source string to be compared with.
    266     * @param target the string that is to be compared with the source string.
    267     * @param status possible error code
    268     * @return Returns an enum value. UCOL_GREATER if source is greater
    269     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    270     * than target
    271     * @stable ICU 2.6
    272     **/
    273     virtual UCollationResult compare(const UnicodeString& source,
    274                                       const UnicodeString& target,
    275                                       UErrorCode &status) const;
    276 
    277     /**
    278     * Does the same thing as compare but limits the comparison to a specified
    279     * length
    280     * @param source the source string to be compared with.
    281     * @param target the string that is to be compared with the source string.
    282     * @param length the length the comparison is limited to
    283     * @param status possible error code
    284     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
    285     *         length) is greater than target; UCOL_EQUAL if source (up to specified
    286     *         length) is equal to target; UCOL_LESS if source (up to the specified
    287     *         length) is less  than target.
    288     * @stable ICU 2.6
    289     */
    290     virtual UCollationResult compare(const UnicodeString& source,
    291                                       const UnicodeString& target,
    292                                       int32_t length,
    293                                       UErrorCode &status) const;
    294 
    295     /**
    296     * The comparison function compares the character data stored in two
    297     * different string arrays. Returns information about whether a string array
    298     * is less than, greater than or equal to another string array.
    299     * @param source the source string array to be compared with.
    300     * @param sourceLength the length of the source string array.  If this value
    301     *        is equal to -1, the string array is null-terminated.
    302     * @param target the string that is to be compared with the source string.
    303     * @param targetLength the length of the target string array.  If this value
    304     *        is equal to -1, the string array is null-terminated.
    305     * @param status possible error code
    306     * @return Returns an enum value. UCOL_GREATER if source is greater
    307     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
    308     * than target
    309     * @stable ICU 2.6
    310     */
    311     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
    312                                       const UChar* target, int32_t targetLength,
    313                                       UErrorCode &status) const;
    314 
    315     /**
    316      * Compares two strings using the Collator.
    317      * Returns whether the first one compares less than/equal to/greater than
    318      * the second one.
    319      * This version takes UCharIterator input.
    320      * @param sIter the first ("source") string iterator
    321      * @param tIter the second ("target") string iterator
    322      * @param status ICU status
    323      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
    324      * @stable ICU 4.2
    325      */
    326     virtual UCollationResult compare(UCharIterator &sIter,
    327                                      UCharIterator &tIter,
    328                                      UErrorCode &status) const;
    329 
    330     /**
    331     * Transforms a specified region of the string into a series of characters
    332     * that can be compared with CollationKey.compare. Use a CollationKey when
    333     * you need to do repeated comparisions on the same string. For a single
    334     * comparison the compare method will be faster.
    335     * @param source the source string.
    336     * @param key the transformed key of the source string.
    337     * @param status the error code status.
    338     * @return the transformed key.
    339     * @see CollationKey
    340     * @stable ICU 2.0
    341     */
    342     virtual CollationKey& getCollationKey(const UnicodeString& source,
    343                                           CollationKey& key,
    344                                           UErrorCode& status) const;
    345 
    346     /**
    347     * Transforms a specified region of the string into a series of characters
    348     * that can be compared with CollationKey.compare. Use a CollationKey when
    349     * you need to do repeated comparisions on the same string. For a single
    350     * comparison the compare method will be faster.
    351     * @param source the source string.
    352     * @param sourceLength the length of the source string.
    353     * @param key the transformed key of the source string.
    354     * @param status the error code status.
    355     * @return the transformed key.
    356     * @see CollationKey
    357     * @stable ICU 2.0
    358     */
    359     virtual CollationKey& getCollationKey(const UChar *source,
    360                                           int32_t sourceLength,
    361                                           CollationKey& key,
    362                                           UErrorCode& status) const;
    363 
    364     /**
    365      * Generates the hash code for the rule-based collation object.
    366      * @return the hash code.
    367      * @stable ICU 2.0
    368      */
    369     virtual int32_t hashCode(void) const;
    370 
    371     /**
    372     * Gets the locale of the Collator
    373     * @param type can be either requested, valid or actual locale. For more
    374     *             information see the definition of ULocDataLocaleType in
    375     *             uloc.h
    376     * @param status the error code status.
    377     * @return locale where the collation data lives. If the collator
    378     *         was instantiated from rules, locale is empty.
    379     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
    380     */
    381     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
    382 
    383     /**
    384      * Gets the tailoring rules for this collator.
    385      * @return the collation tailoring from which this collator was created
    386      * @stable ICU 2.0
    387      */
    388     const UnicodeString& getRules(void) const;
    389 
    390     /**
    391      * Gets the version information for a Collator.
    392      * @param info the version # information, the result will be filled in
    393      * @stable ICU 2.0
    394      */
    395     virtual void getVersion(UVersionInfo info) const;
    396 
    397 #ifndef U_HIDE_DEPRECATED_API
    398     /**
    399      * Returns the maximum length of any expansion sequences that end with the
    400      * specified comparison order.
    401      *
    402      * This is specific to the kind of collation element values and sequences
    403      * returned by the CollationElementIterator.
    404      * Call CollationElementIterator::getMaxExpansion() instead.
    405      *
    406      * @param order a collation order returned by CollationElementIterator::previous
    407      *              or CollationElementIterator::next.
    408      * @return maximum size of the expansion sequences ending with the collation
    409      *         element, or 1 if the collation element does not occur at the end of
    410      *         any expansion sequence
    411      * @see CollationElementIterator#getMaxExpansion
    412      * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
    413      */
    414     int32_t getMaxExpansion(int32_t order) const;
    415 #endif  /* U_HIDE_DEPRECATED_API */
    416 
    417     /**
    418      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
    419      * method is to implement a simple version of RTTI, since not all C++
    420      * compilers support genuine RTTI. Polymorphic operator==() and clone()
    421      * methods call this method.
    422      * @return The class ID for this object. All objects of a given class have
    423      *         the same class ID. Objects of other classes have different class
    424      *         IDs.
    425      * @stable ICU 2.0
    426      */
    427     virtual UClassID getDynamicClassID(void) const;
    428 
    429     /**
    430      * Returns the class ID for this class. This is useful only for comparing to
    431      * a return value from getDynamicClassID(). For example:
    432      * <pre>
    433      * Base* polymorphic_pointer = createPolymorphicObject();
    434      * if (polymorphic_pointer->getDynamicClassID() ==
    435      *                                          Derived::getStaticClassID()) ...
    436      * </pre>
    437      * @return The class ID for all objects of this class.
    438      * @stable ICU 2.0
    439      */
    440     static UClassID U_EXPORT2 getStaticClassID(void);
    441 
    442 #ifndef U_HIDE_DEPRECATED_API
    443     /**
    444      * Do not use this method: The caller and the ICU library might use different heaps.
    445      * Use cloneBinary() instead which writes to caller-provided memory.
    446      *
    447      * Returns a binary format of this collator.
    448      * @param length Returns the length of the data, in bytes
    449      * @param status the error code status.
    450      * @return memory, owned by the caller, of size 'length' bytes.
    451      * @deprecated ICU 52. Use cloneBinary() instead.
    452      */
    453     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
    454 #endif  /* U_HIDE_DEPRECATED_API */
    455 
    456     /** Creates a binary image of a collator. This binary image can be stored and
    457     *  later used to instantiate a collator using ucol_openBinary.
    458     *  This API supports preflighting.
    459     *  @param buffer a fill-in buffer to receive the binary image
    460     *  @param capacity capacity of the destination buffer
    461     *  @param status for catching errors
    462     *  @return size of the image
    463     *  @see ucol_openBinary
    464     *  @stable ICU 3.4
    465     */
    466     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
    467 
    468     /**
    469      * Returns current rules. Delta defines whether full rules are returned or
    470      * just the tailoring.
    471      *
    472      * getRules(void) should normally be used instead.
    473      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
    474      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
    475      * @param buffer UnicodeString to store the result rules
    476      * @stable ICU 2.2
    477      * @see UCOL_FULL_RULES
    478      */
    479     void getRules(UColRuleOption delta, UnicodeString &buffer);
    480 
    481     /**
    482      * Universal attribute setter
    483      * @param attr attribute type
    484      * @param value attribute value
    485      * @param status to indicate whether the operation went on smoothly or there were errors
    486      * @stable ICU 2.2
    487      */
    488     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
    489                               UErrorCode &status);
    490 
    491     /**
    492      * Universal attribute getter.
    493      * @param attr attribute type
    494      * @param status to indicate whether the operation went on smoothly or there were errors
    495      * @return attribute value
    496      * @stable ICU 2.2
    497      */
    498     virtual UColAttributeValue getAttribute(UColAttribute attr,
    499                                             UErrorCode &status) const;
    500 
    501     /**
    502      * Sets the variable top to a collation element value of a string supplied.
    503      * @param varTop one or more (if contraction) UChars to which the variable top should be set
    504      * @param len length of variable top string. If -1 it is considered to be zero terminated.
    505      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    506      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    507      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    508      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    509      * @stable ICU 2.0
    510      */
    511     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
    512 
    513     /**
    514      * Sets the variable top to a collation element value of a string supplied.
    515      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
    516      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
    517      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
    518      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
    519      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
    520      * @stable ICU 2.0
    521      */
    522     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
    523 
    524     /**
    525      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
    526      * Lower 16 bits are ignored.
    527      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
    528      * @param status error code (not changed by function)
    529      * @stable ICU 2.0
    530      */
    531     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
    532 
    533     /**
    534      * Gets the variable top value of a Collator.
    535      * Lower 16 bits are undefined and should be ignored.
    536      * @param status error code (not changed by function). If error code is set, the return value is undefined.
    537      * @stable ICU 2.0
    538      */
    539     virtual uint32_t getVariableTop(UErrorCode &status) const;
    540 
    541     /**
    542      * Get an UnicodeSet that contains all the characters and sequences tailored in
    543      * this collator.
    544      * @param status      error code of the operation
    545      * @return a pointer to a UnicodeSet object containing all the
    546      *         code points and sequences that may sort differently than
    547      *         in the UCA. The object must be disposed of by using delete
    548      * @stable ICU 2.4
    549      */
    550     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
    551 
    552     /**
    553      * Get the sort key as an array of bytes from an UnicodeString.
    554      * @param source string to be processed.
    555      * @param result buffer to store result in. If NULL, number of bytes needed
    556      *        will be returned.
    557      * @param resultLength length of the result buffer. If if not enough the
    558      *        buffer will be filled to capacity.
    559      * @return Number of bytes needed for storing the sort key
    560      * @stable ICU 2.0
    561      */
    562     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
    563                                int32_t resultLength) const;
    564 
    565     /**
    566      * Get the sort key as an array of bytes from an UChar buffer.
    567      * @param source string to be processed.
    568      * @param sourceLength length of string to be processed. If -1, the string
    569      *        is 0 terminated and length will be decided by the function.
    570      * @param result buffer to store result in. If NULL, number of bytes needed
    571      *        will be returned.
    572      * @param resultLength length of the result buffer. If if not enough the
    573      *        buffer will be filled to capacity.
    574      * @return Number of bytes needed for storing the sort key
    575      * @stable ICU 2.2
    576      */
    577     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
    578                                uint8_t *result, int32_t resultLength) const;
    579 
    580     /**
    581      * Retrieves the reordering codes for this collator.
    582      * @param dest The array to fill with the script ordering.
    583      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
    584      *  will only return the length of the result without writing any of the result string (pre-flighting).
    585      * @param status A reference to an error code value, which must not indicate
    586      * a failure before the function call.
    587      * @return The length of the script ordering array.
    588      * @see ucol_setReorderCodes
    589      * @see Collator#getEquivalentReorderCodes
    590      * @see Collator#setReorderCodes
    591      * @stable ICU 4.8
    592      */
    593      virtual int32_t getReorderCodes(int32_t *dest,
    594                                      int32_t destCapacity,
    595                                      UErrorCode& status) const;
    596 
    597     /**
    598      * Sets the ordering of scripts for this collator.
    599      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
    600      * length is also set to 0. An empty array will clear any reordering codes on the collator.
    601      * @param reorderCodesLength The length of reorderCodes.
    602      * @param status error code
    603      * @see Collator#getReorderCodes
    604      * @see Collator#getEquivalentReorderCodes
    605      * @stable ICU 4.8
    606      */
    607      virtual void setReorderCodes(const int32_t* reorderCodes,
    608                                   int32_t reorderCodesLength,
    609                                   UErrorCode& status) ;
    610 
    611     /**
    612      * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
    613      * codes will be grouped and must reorder together.
    614      * @param reorderCode The reorder code to determine equivalence for.
    615      * @param dest The array to fill with the script equivalene reordering codes.
    616      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
    617      * function will only return the length of the result without writing any of the result
    618      * string (pre-flighting).
    619      * @param status A reference to an error code value, which must not indicate
    620      * a failure before the function call.
    621      * @return The length of the of the reordering code equivalence array.
    622      * @see ucol_setReorderCodes
    623      * @see Collator#getReorderCodes
    624      * @see Collator#setReorderCodes
    625      * @stable ICU 4.8
    626      */
    627     static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
    628                                 int32_t* dest,
    629                                 int32_t destCapacity,
    630                                 UErrorCode& status);
    631 
    632 private:
    633 
    634     // private static constants -----------------------------------------------
    635 
    636     enum {
    637         /* need look up in .commit() */
    638         CHARINDEX = 0x70000000,
    639         /* Expand index follows */
    640         EXPANDCHARINDEX = 0x7E000000,
    641         /* contract indexes follows */
    642         CONTRACTCHARINDEX = 0x7F000000,
    643         /* unmapped character values */
    644         UNMAPPED = 0xFFFFFFFF,
    645         /* primary strength increment */
    646         PRIMARYORDERINCREMENT = 0x00010000,
    647         /* secondary strength increment */
    648         SECONDARYORDERINCREMENT = 0x00000100,
    649         /* tertiary strength increment */
    650         TERTIARYORDERINCREMENT = 0x00000001,
    651         /* mask off anything but primary order */
    652         PRIMARYORDERMASK = 0xffff0000,
    653         /* mask off anything but secondary order */
    654         SECONDARYORDERMASK = 0x0000ff00,
    655         /* mask off anything but tertiary order */
    656         TERTIARYORDERMASK = 0x000000ff,
    657         /* mask off ignorable char order */
    658         IGNORABLEMASK = 0x0000ffff,
    659         /* use only the primary difference */
    660         PRIMARYDIFFERENCEONLY = 0xffff0000,
    661         /* use only the primary and secondary difference */
    662         SECONDARYDIFFERENCEONLY = 0xffffff00,
    663         /* primary order shift */
    664         PRIMARYORDERSHIFT = 16,
    665         /* secondary order shift */
    666         SECONDARYORDERSHIFT = 8,
    667         /* starting value for collation elements */
    668         COLELEMENTSTART = 0x02020202,
    669         /* testing mask for primary low element */
    670         PRIMARYLOWZEROMASK = 0x00FF0000,
    671         /* reseting value for secondaries and tertiaries */
    672         RESETSECONDARYTERTIARY = 0x00000202,
    673         /* reseting value for tertiaries */
    674         RESETTERTIARY = 0x00000002,
    675 
    676         PRIMIGNORABLE = 0x0202
    677     };
    678 
    679     // private data members ---------------------------------------------------
    680 
    681     UBool dataIsOwned;
    682 
    683     UBool isWriteThroughAlias;
    684 
    685     /**
    686     * c struct for collation. All initialisation for it has to be done through
    687     * setUCollator().
    688     */
    689     UCollator *ucollator;
    690 
    691     /**
    692     * Rule UnicodeString
    693     */
    694     UnicodeString urulestring;
    695 
    696     // friend classes --------------------------------------------------------
    697 
    698     /**
    699     * Used to iterate over collation elements in a character source.
    700     */
    701     friend class CollationElementIterator;
    702 
    703     /**
    704     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
    705     *                                                       UErrorCode&)
    706     */
    707     friend class Collator;
    708 
    709     /**
    710     * Searching over collation elements in a character source
    711     */
    712     friend class StringSearch;
    713 
    714     // private constructors --------------------------------------------------
    715 
    716     /**
    717      * Default constructor
    718      */
    719     RuleBasedCollator();
    720 
    721     /**
    722      * RuleBasedCollator constructor. This constructor takes a locale. The
    723      * only caller of this class should be Collator::createInstance(). If
    724      * createInstance() happens to know that the requested locale's collation is
    725      * implemented as a RuleBasedCollator, it can then call this constructor.
    726      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
    727      * COLLATION TABLE. It does this by falling back to defaults.
    728      * @param desiredLocale locale used
    729      * @param status error code status
    730      */
    731     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
    732 
    733     /**
    734      * common constructor implementation
    735      *
    736      * @param rules the collation rules to build the collation table from.
    737      * @param collationStrength default strength for comparison
    738      * @param decompositionMode the normalisation mode
    739      * @param status reporting a success or an error.
    740      */
    741     void
    742     construct(const UnicodeString& rules,
    743               UColAttributeValue collationStrength,
    744               UColAttributeValue decompositionMode,
    745               UErrorCode& status);
    746 
    747     // private methods -------------------------------------------------------
    748 
    749     /**
    750     * Creates the c struct for ucollator
    751     * @param locale desired locale
    752     * @param status error status
    753     */
    754     void setUCollator(const Locale& locale, UErrorCode& status);
    755 
    756     /**
    757     * Creates the c struct for ucollator
    758     * @param locale desired locale name
    759     * @param status error status
    760     */
    761     void setUCollator(const char* locale, UErrorCode& status);
    762 
    763     /**
    764     * Creates the c struct for ucollator. This used internally by StringSearch.
    765     * Hence the responsibility of cleaning up the ucollator is not done by
    766     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
    767     * @param collator new ucollator data
    768     */
    769     void setUCollator(UCollator *collator);
    770 
    771 public:
    772 #ifndef U_HIDE_INTERNAL_API
    773     /**
    774     * Get UCollator data struct. Used only by StringSearch & intltest.
    775     * @return UCollator data struct
    776     * @internal
    777     */
    778     const UCollator * getUCollator();
    779 #endif  /* U_HIDE_INTERNAL_API */
    780 
    781 protected:
    782    /**
    783     * Used internally by registraton to define the requested and valid locales.
    784     * @param requestedLocale the requsted locale
    785     * @param validLocale the valid locale
    786     * @param actualLocale the actual locale
    787     * @internal
    788     */
    789     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
    790 
    791 private:
    792     // if not owned and not a write through alias, copy the ucollator
    793     void checkOwned(void);
    794 
    795     // utility to init rule string used by checkOwned and construct
    796     void setRuleStringFromCollator();
    797 
    798 public:
    799     /** Get the short definition string for a collator. This internal API harvests the collator's
    800      *  locale and the attribute set and produces a string that can be used for opening
    801      *  a collator with the same properties using the ucol_openFromShortString API.
    802      *  This string will be normalized.
    803      *  The structure and the syntax of the string is defined in the "Naming collators"
    804      *  section of the users guide:
    805      *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
    806      *  This function supports preflighting.
    807      *
    808      *  This is internal, and intended to be used with delegate converters.
    809      *
    810      *  @param locale a locale that will appear as a collators locale in the resulting
    811      *                short string definition. If NULL, the locale will be harvested
    812      *                from the collator.
    813      *  @param buffer space to hold the resulting string
    814      *  @param capacity capacity of the buffer
    815      *  @param status for returning errors. All the preflighting errors are featured
    816      *  @return length of the resulting string
    817      *  @see ucol_openFromShortString
    818      *  @see ucol_normalizeShortDefinitionString
    819      *  @see ucol_getShortDefinitionString
    820      *  @internal
    821      */
    822     virtual int32_t internalGetShortDefinitionString(const char *locale,
    823                                                      char *buffer,
    824                                                      int32_t capacity,
    825                                                      UErrorCode &status) const;
    826 };
    827 
    828 // inline method implementation ---------------------------------------------
    829 
    830 inline void RuleBasedCollator::setUCollator(const Locale &locale,
    831                                                UErrorCode &status)
    832 {
    833     setUCollator(locale.getName(), status);
    834 }
    835 
    836 
    837 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
    838 {
    839 
    840     if (ucollator && dataIsOwned) {
    841         ucol_close(ucollator);
    842     }
    843     ucollator   = collator;
    844     dataIsOwned = FALSE;
    845     isWriteThroughAlias = TRUE;
    846     setRuleStringFromCollator();
    847 }
    848 
    849 #ifndef U_HIDE_INTERNAL_API
    850 inline const UCollator * RuleBasedCollator::getUCollator()
    851 {
    852     return ucollator;
    853 }
    854 #endif  /* U_HIDE_INTERNAL_API */
    855 
    856 U_NAMESPACE_END
    857 
    858 #endif /* #if !UCONFIG_NO_COLLATION */
    859 
    860 #endif
    861