Home | History | Annotate | Download | only in unicode
      1 /*
      2  *****************************************************************************
      3  * Copyright (C) 1996-2006, International Business Machines Corporation and others.
      4  * All Rights Reserved.
      5  *****************************************************************************
      6  *
      7  * File sortkey.h
      8  *
      9  * Created by: Helena Shih
     10  *
     11  * Modification History:
     12  *
     13  *  Date         Name          Description
     14  *
     15  *  6/20/97     helena      Java class name change.
     16  *  8/18/97     helena      Added internal API documentation.
     17  *  6/26/98     erm         Changed to use byte arrays and memcmp.
     18  *****************************************************************************
     19  */
     20 
     21 #ifndef SORTKEY_H
     22 #define SORTKEY_H
     23 
     24 #include "unicode/utypes.h"
     25 
     26 /**
     27  * \file
     28  * \brief C++ API: Keys for comparing strings multiple times.
     29  */
     30 
     31 #if !UCONFIG_NO_COLLATION
     32 
     33 #include "unicode/uobject.h"
     34 #include "unicode/unistr.h"
     35 #include "unicode/coll.h"
     36 
     37 U_NAMESPACE_BEGIN
     38 
     39 /* forward declaration */
     40 class RuleBasedCollator;
     41 
     42 /**
     43  *
     44  * Collation keys are generated by the Collator class.  Use the CollationKey objects
     45  * instead of Collator to compare strings multiple times.  A CollationKey
     46  * preprocesses the comparison information from the Collator object to
     47  * make the comparison faster.  If you are not going to comparing strings
     48  * multiple times, then using the Collator object is generally faster,
     49  * since it only processes as much of the string as needed to make a
     50  * comparison.
     51  * <p> For example (with strength == tertiary)
     52  * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
     53  * only needs to process a couple of characters, while a comparison
     54  * with CollationKeys will process all of the characters.  On the other hand,
     55  * if you are doing a sort of a number of fields, it is much faster to use
     56  * CollationKeys, since you will be comparing strings multiple times.
     57  * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
     58  * in a hidden field, and use it for sorting or indexing.
     59  *
     60  * <p>Example of use:
     61  * <pre>
     62  * \code
     63  *     UErrorCode success = U_ZERO_ERROR;
     64  *     Collator* myCollator = Collator::createInstance(success);
     65  *     CollationKey* keys = new CollationKey [3];
     66  *     myCollator->getCollationKey("Tom", keys[0], success );
     67  *     myCollator->getCollationKey("Dick", keys[1], success );
     68  *     myCollator->getCollationKey("Harry", keys[2], success );
     69  *
     70  *     // Inside body of sort routine, compare keys this way:
     71  *     CollationKey tmp;
     72  *     if(keys[0].compareTo( keys[1] ) > 0 ) {
     73  *         tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
     74  *     }
     75  *     //...
     76  * \endcode
     77  * </pre>
     78  * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
     79  * long lists of words by retrieving collation keys with Collator::getCollationKey().
     80  * You can then cache the collation keys and compare them using CollationKey::compareTo().
     81  * <p>
     82  * <strong>Note:</strong> <code>Collator</code>s with different Locale,
     83  * CollationStrength and DecompositionMode settings will return different
     84  * CollationKeys for the same set of strings. Locales have specific
     85  * collation rules, and the way in which secondary and tertiary differences
     86  * are taken into account, for example, will result in different CollationKeys
     87  * for same strings.
     88  * <p>
     89 
     90  * @see          Collator
     91  * @see          RuleBasedCollator
     92  * @version      1.3 12/18/96
     93  * @author       Helena Shih
     94  * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
     95  */
     96 class U_I18N_API CollationKey : public UObject {
     97 public:
     98     /**
     99     * This creates an empty collation key based on the null string.  An empty
    100     * collation key contains no sorting information.  When comparing two empty
    101     * collation keys, the result is Collator::EQUAL.  Comparing empty collation key
    102     * with non-empty collation key is always Collator::LESS.
    103     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    104     */
    105     CollationKey();
    106 
    107 
    108     /**
    109     * Creates a collation key based on the collation key values.
    110     * @param values the collation key values
    111     * @param count number of collation key values, including trailing nulls.
    112     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    113     */
    114     CollationKey(const  uint8_t*    values,
    115                 int32_t     count);
    116 
    117     /**
    118     * Copy constructor.
    119     * @param other    the object to be copied.
    120     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    121     */
    122     CollationKey(const CollationKey& other);
    123 
    124     /**
    125     * Sort key destructor.
    126     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    127     */
    128     virtual ~CollationKey();
    129 
    130     /**
    131     * Assignment operator
    132     * @param other    the object to be copied.
    133     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    134     */
    135     const   CollationKey&   operator=(const CollationKey& other);
    136 
    137     /**
    138     * Compare if two collation keys are the same.
    139     * @param source the collation key to compare to.
    140     * @return Returns true if two collation keys are equal, false otherwise.
    141     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    142     */
    143     UBool                   operator==(const CollationKey& source) const;
    144 
    145     /**
    146     * Compare if two collation keys are not the same.
    147     * @param source the collation key to compare to.
    148     * @return Returns TRUE if two collation keys are different, FALSE otherwise.
    149     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    150     */
    151     UBool                   operator!=(const CollationKey& source) const;
    152 
    153 
    154     /**
    155     * Test to see if the key is in an invalid state. The key will be in an
    156     * invalid state if it couldn't allocate memory for some operation.
    157     * @return Returns TRUE if the key is in an invalid, FALSE otherwise.
    158     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    159     */
    160     UBool                   isBogus(void) const;
    161 
    162     /**
    163     * Returns a pointer to the collation key values. The storage is owned
    164     * by the collation key and the pointer will become invalid if the key
    165     * is deleted.
    166     * @param count the output parameter of number of collation key values,
    167     * including any trailing nulls.
    168     * @return a pointer to the collation key values.
    169     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    170     */
    171     const    uint8_t*       getByteArray(int32_t& count) const;
    172 
    173 #ifdef U_USE_COLLATION_KEY_DEPRECATES
    174     /**
    175     * Extracts the collation key values into a new array. The caller owns
    176     * this storage and should free it.
    177     * @param count the output parameter of number of collation key values,
    178     * including any trailing nulls.
    179     * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
    180     */
    181     uint8_t*                toByteArray(int32_t& count) const;
    182 #endif
    183 
    184     /**
    185     * Convenience method which does a string(bit-wise) comparison of the
    186     * two collation keys.
    187     * @param target target collation key to be compared with
    188     * @return Returns Collator::LESS if sourceKey &lt; targetKey,
    189     * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
    190     * otherwise.
    191     * @deprecated ICU 2.6 use the overload with error code
    192     */
    193     Collator::EComparisonResult compareTo(const CollationKey& target) const;
    194 
    195     /**
    196     * Convenience method which does a string(bit-wise) comparison of the
    197     * two collation keys.
    198     * @param target target collation key to be compared with
    199     * @param status error code
    200     * @return Returns UCOL_LESS if sourceKey &lt; targetKey,
    201     * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
    202     * otherwise.
    203     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    204     */
    205     UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;
    206 
    207     /**
    208     * Creates an integer that is unique to the collation key.  NOTE: this
    209     * is not the same as String.hashCode.
    210     * <p>Example of use:
    211     * <pre>
    212     * .    UErrorCode status = U_ZERO_ERROR;
    213     * .    Collator *myCollation = Collator::createInstance(Locale::US, status);
    214     * .    if (U_FAILURE(status)) return;
    215     * .    CollationKey key1, key2;
    216     * .    UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
    217     * .    myCollation->getCollationKey("abc", key1, status1);
    218     * .    if (U_FAILURE(status1)) { delete myCollation; return; }
    219     * .    myCollation->getCollationKey("ABC", key2, status2);
    220     * .    if (U_FAILURE(status2)) { delete myCollation; return; }
    221     * .    // key1.hashCode() != key2.hashCode()
    222     * </pre>
    223     * @return the hash value based on the string's collation order.
    224     * @see UnicodeString#hashCode
    225     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    226     */
    227     int32_t                 hashCode(void) const;
    228 
    229     /**
    230      * ICU "poor man's RTTI", returns a UClassID for the actual class.
    231      * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    232      */
    233     virtual UClassID getDynamicClassID() const;
    234 
    235     /**
    236      * ICU "poor man's RTTI", returns a UClassID for this class.
    237      * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
    238      */
    239     static UClassID U_EXPORT2 getStaticClassID();
    240 
    241 private:
    242     /**
    243     * Returns an array of the collation key values as 16-bit integers.
    244     * The caller owns the storage and must delete it.
    245     * @param values Output param of the collation key values.
    246     * @param count output parameter of the number of collation key values
    247     * @return a pointer to an array of 16-bit collation key values.
    248     */
    249     void adopt(uint8_t *values, int32_t count);
    250 
    251     /*
    252     * Creates a collation key with a string.
    253     */
    254 
    255     /**
    256     * If this CollationKey has capacity less than newSize,
    257     * its internal capacity will be increased to newSize.
    258     * @param newSize minimum size this CollationKey has to have
    259     * @return this CollationKey
    260     */
    261     CollationKey&           ensureCapacity(int32_t newSize);
    262     /**
    263     * Set the CollationKey to a "bogus" or invalid state
    264     * @return this CollationKey
    265     */
    266     CollationKey&           setToBogus(void);
    267     /**
    268     * Resets this CollationKey to an empty state
    269     * @return this CollationKey
    270     */
    271     CollationKey&           reset(void);
    272 
    273     /**
    274     * Allow private access to RuleBasedCollator
    275     */
    276     friend  class           RuleBasedCollator;
    277     /**
    278     * Bogus status
    279     */
    280     UBool                   fBogus;
    281     /**
    282     * Size of fBytes used to store the sortkey. i.e. up till the
    283     * null-termination.
    284     */
    285     int32_t                 fCount;
    286     /**
    287     * Full size of the fBytes
    288     */
    289     int32_t                 fCapacity;
    290     /**
    291     * Unique hash value of this CollationKey
    292     */
    293     int32_t                 fHashCode;
    294     /**
    295     * Array to store the sortkey
    296     */
    297     uint8_t*                fBytes;
    298 
    299 };
    300 
    301 inline UBool
    302 CollationKey::operator!=(const CollationKey& other) const
    303 {
    304     return !(*this == other);
    305 }
    306 
    307 inline UBool
    308 CollationKey::isBogus() const
    309 {
    310     return fBogus;
    311 }
    312 
    313 inline const uint8_t*
    314 CollationKey::getByteArray(int32_t &count) const
    315 {
    316     count = fCount;
    317     return fBytes;
    318 }
    319 
    320 U_NAMESPACE_END
    321 
    322 #endif /* #if !UCONFIG_NO_COLLATION */
    323 
    324 #endif
    325