Home | History | Annotate | Download | only in unicode
      1 /*
      2  *****************************************************************************
      3  * Copyright (C) 1996-2013, International Business Machines Corporation and others.
      4  * All Rights Reserved.
      5  *****************************************************************************
      6  *
      7  * File sortkey.h
      8  *
      9  * Created by: Helena Shih
     10  *
     11  * Modification History:
     12  *
     13  *  Date         Name          Description
     14  *
     15  *  6/20/97     helena      Java class name change.
     16  *  8/18/97     helena      Added internal API documentation.
     17  *  6/26/98     erm         Changed to use byte arrays and memcmp.
     18  *****************************************************************************
     19  */
     20 
     21 #ifndef SORTKEY_H
     22 #define SORTKEY_H
     23 
     24 #include "unicode/utypes.h"
     25 
     26 /**
     27  * \file
     28  * \brief C++ API: Keys for comparing strings multiple times.
     29  */
     30 
     31 #if !UCONFIG_NO_COLLATION
     32 
     33 #include "unicode/uobject.h"
     34 #include "unicode/unistr.h"
     35 #include "unicode/coll.h"
     36 
     37 U_NAMESPACE_BEGIN
     38 
     39 /* forward declaration */
     40 class RuleBasedCollator;
     41 
     42 /**
     43  *
     44  * Collation keys are generated by the Collator class.  Use the CollationKey objects
     45  * instead of Collator to compare strings multiple times.  A CollationKey
     46  * preprocesses the comparison information from the Collator object to
     47  * make the comparison faster.  If you are not going to comparing strings
     48  * multiple times, then using the Collator object is generally faster,
     49  * since it only processes as much of the string as needed to make a
     50  * comparison.
     51  * <p> For example (with strength == tertiary)
     52  * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
     53  * only needs to process a couple of characters, while a comparison
     54  * with CollationKeys will process all of the characters.  On the other hand,
     55  * if you are doing a sort of a number of fields, it is much faster to use
     56  * CollationKeys, since you will be comparing strings multiple times.
     57  * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
     58  * in a hidden field, and use it for sorting or indexing.
     59  *
     60  * <p>Example of use:
     61  * <pre>
     62  * \code
     63  *     UErrorCode success = U_ZERO_ERROR;
     64  *     Collator* myCollator = Collator::createInstance(success);
     65  *     CollationKey* keys = new CollationKey [3];
     66  *     myCollator->getCollationKey("Tom", keys[0], success );
     67  *     myCollator->getCollationKey("Dick", keys[1], success );
     68  *     myCollator->getCollationKey("Harry", keys[2], success );
     69  *
     70  *     // Inside body of sort routine, compare keys this way:
     71  *     CollationKey tmp;
     72  *     if(keys[0].compareTo( keys[1] ) > 0 ) {
     73  *         tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
     74  *     }
     75  *     //...
     76  * \endcode
     77  * </pre>
     78  * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
     79  * long lists of words by retrieving collation keys with Collator::getCollationKey().
     80  * You can then cache the collation keys and compare them using CollationKey::compareTo().
     81  * <p>
     82  * <strong>Note:</strong> <code>Collator</code>s with different Locale,
     83  * CollationStrength and DecompositionMode settings will return different
     84  * CollationKeys for the same set of strings. Locales have specific
     85  * collation rules, and the way in which secondary and tertiary differences
     86  * are taken into account, for example, will result in different CollationKeys
     87  * for same strings.
     88  * <p>
     89 
     90  * @see          Collator
     91  * @see          RuleBasedCollator
     92  * @version      1.3 12/18/96
     93  * @author       Helena Shih
     94  * @stable ICU 2.0
     95  */
     96 class U_I18N_API CollationKey : public UObject {
     97 public:
     98     /**
     99     * This creates an empty collation key based on the null string.  An empty
    100     * collation key contains no sorting information.  When comparing two empty
    101     * collation keys, the result is Collator::EQUAL.  Comparing empty collation key
    102     * with non-empty collation key is always Collator::LESS.
    103     * @stable ICU 2.0
    104     */
    105     CollationKey();
    106 
    107 
    108     /**
    109     * Creates a collation key based on the collation key values.
    110     * @param values the collation key values
    111     * @param count number of collation key values, including trailing nulls.
    112     * @stable ICU 2.0
    113     */
    114     CollationKey(const  uint8_t*    values,
    115                 int32_t     count);
    116 
    117     /**
    118     * Copy constructor.
    119     * @param other    the object to be copied.
    120     * @stable ICU 2.0
    121     */
    122     CollationKey(const CollationKey& other);
    123 
    124     /**
    125     * Sort key destructor.
    126     * @stable ICU 2.0
    127     */
    128     virtual ~CollationKey();
    129 
    130     /**
    131     * Assignment operator
    132     * @param other    the object to be copied.
    133     * @stable ICU 2.0
    134     */
    135     const   CollationKey&   operator=(const CollationKey& other);
    136 
    137     /**
    138     * Compare if two collation keys are the same.
    139     * @param source the collation key to compare to.
    140     * @return Returns true if two collation keys are equal, false otherwise.
    141     * @stable ICU 2.0
    142     */
    143     UBool                   operator==(const CollationKey& source) const;
    144 
    145     /**
    146     * Compare if two collation keys are not the same.
    147     * @param source the collation key to compare to.
    148     * @return Returns TRUE if two collation keys are different, FALSE otherwise.
    149     * @stable ICU 2.0
    150     */
    151     UBool                   operator!=(const CollationKey& source) const;
    152 
    153 
    154     /**
    155     * Test to see if the key is in an invalid state. The key will be in an
    156     * invalid state if it couldn't allocate memory for some operation.
    157     * @return Returns TRUE if the key is in an invalid, FALSE otherwise.
    158     * @stable ICU 2.0
    159     */
    160     UBool                   isBogus(void) const;
    161 
    162     /**
    163     * Returns a pointer to the collation key values. The storage is owned
    164     * by the collation key and the pointer will become invalid if the key
    165     * is deleted.
    166     * @param count the output parameter of number of collation key values,
    167     * including any trailing nulls.
    168     * @return a pointer to the collation key values.
    169     * @stable ICU 2.0
    170     */
    171     const    uint8_t*       getByteArray(int32_t& count) const;
    172 
    173 #ifdef U_USE_COLLATION_KEY_DEPRECATES
    174     /**
    175     * Extracts the collation key values into a new array. The caller owns
    176     * this storage and should free it.
    177     * @param count the output parameter of number of collation key values,
    178     * including any trailing nulls.
    179     * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
    180     */
    181     uint8_t*                toByteArray(int32_t& count) const;
    182 #endif
    183 
    184 #ifndef U_HIDE_DEPRECATED_API
    185     /**
    186     * Convenience method which does a string(bit-wise) comparison of the
    187     * two collation keys.
    188     * @param target target collation key to be compared with
    189     * @return Returns Collator::LESS if sourceKey &lt; targetKey,
    190     * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
    191     * otherwise.
    192     * @deprecated ICU 2.6 use the overload with error code
    193     */
    194     Collator::EComparisonResult compareTo(const CollationKey& target) const;
    195 #endif  /* U_HIDE_DEPRECATED_API */
    196 
    197     /**
    198     * Convenience method which does a string(bit-wise) comparison of the
    199     * two collation keys.
    200     * @param target target collation key to be compared with
    201     * @param status error code
    202     * @return Returns UCOL_LESS if sourceKey &lt; targetKey,
    203     * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
    204     * otherwise.
    205     * @stable ICU 2.6
    206     */
    207     UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;
    208 
    209     /**
    210     * Creates an integer that is unique to the collation key.  NOTE: this
    211     * is not the same as String.hashCode.
    212     * <p>Example of use:
    213     * <pre>
    214     * .    UErrorCode status = U_ZERO_ERROR;
    215     * .    Collator *myCollation = Collator::createInstance(Locale::US, status);
    216     * .    if (U_FAILURE(status)) return;
    217     * .    CollationKey key1, key2;
    218     * .    UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
    219     * .    myCollation->getCollationKey("abc", key1, status1);
    220     * .    if (U_FAILURE(status1)) { delete myCollation; return; }
    221     * .    myCollation->getCollationKey("ABC", key2, status2);
    222     * .    if (U_FAILURE(status2)) { delete myCollation; return; }
    223     * .    // key1.hashCode() != key2.hashCode()
    224     * </pre>
    225     * @return the hash value based on the string's collation order.
    226     * @see UnicodeString#hashCode
    227     * @stable ICU 2.0
    228     */
    229     int32_t                 hashCode(void) const;
    230 
    231     /**
    232      * ICU "poor man's RTTI", returns a UClassID for the actual class.
    233      * @stable ICU 2.2
    234      */
    235     virtual UClassID getDynamicClassID() const;
    236 
    237     /**
    238      * ICU "poor man's RTTI", returns a UClassID for this class.
    239      * @stable ICU 2.2
    240      */
    241     static UClassID U_EXPORT2 getStaticClassID();
    242 
    243 private:
    244     /**
    245      * Replaces the current bytes buffer with a new one of newCapacity
    246      * and copies length bytes from the old buffer to the new one.
    247      * @return the new buffer, or NULL if the allocation failed
    248      */
    249     uint8_t *reallocate(int32_t newCapacity, int32_t length);
    250     /**
    251      * Set a new length for a new sort key in the existing fBytes.
    252      */
    253     void setLength(int32_t newLength);
    254 
    255     uint8_t *getBytes() {
    256         return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
    257     }
    258     const uint8_t *getBytes() const {
    259         return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
    260     }
    261     int32_t getCapacity() const {
    262         return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity;
    263     }
    264     int32_t getLength() const { return fFlagAndLength & 0x7fffffff; }
    265 
    266     /**
    267     * Set the CollationKey to a "bogus" or invalid state
    268     * @return this CollationKey
    269     */
    270     CollationKey&           setToBogus(void);
    271     /**
    272     * Resets this CollationKey to an empty state
    273     * @return this CollationKey
    274     */
    275     CollationKey&           reset(void);
    276 
    277     /**
    278     * Allow private access to RuleBasedCollator
    279     */
    280     friend  class           RuleBasedCollator;
    281     friend  class           CollationKeyByteSink;
    282 
    283     // Class fields. sizeof(CollationKey) is intended to be 48 bytes
    284     // on a machine with 64-bit pointers.
    285     // We use a union to maximize the size of the internal buffer,
    286     // similar to UnicodeString but not as tight and complex.
    287 
    288     // (implicit) *vtable;
    289     /**
    290      * Sort key length and flag.
    291      * Bit 31 is set if the buffer is heap-allocated.
    292      * Bits 30..0 contain the sort key length.
    293      */
    294     int32_t fFlagAndLength;
    295     /**
    296     * Unique hash value of this CollationKey.
    297     * Special value 2 if the key is bogus.
    298     */
    299     mutable int32_t fHashCode;
    300     /**
    301      * fUnion provides 32 bytes for the internal buffer or for
    302      * pointer+capacity.
    303      */
    304     union StackBufferOrFields {
    305         /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */
    306         uint8_t fStackBuffer[32];
    307         struct {
    308             uint8_t *fBytes;
    309             int32_t fCapacity;
    310         } fFields;
    311     } fUnion;
    312 };
    313 
    314 inline UBool
    315 CollationKey::operator!=(const CollationKey& other) const
    316 {
    317     return !(*this == other);
    318 }
    319 
    320 inline UBool
    321 CollationKey::isBogus() const
    322 {
    323     return fHashCode == 2;  // kBogusHashCode
    324 }
    325 
    326 inline const uint8_t*
    327 CollationKey::getByteArray(int32_t &count) const
    328 {
    329     count = getLength();
    330     return getBytes();
    331 }
    332 
    333 U_NAMESPACE_END
    334 
    335 #endif /* #if !UCONFIG_NO_COLLATION */
    336 
    337 #endif
    338