Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1996-2012, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 //===============================================================================
      8 //
      9 // File sortkey.cpp
     10 //
     11 //
     12 //
     13 // Created by: Helena Shih
     14 //
     15 // Modification History:
     16 //
     17 //  Date         Name          Description
     18 //
     19 //  6/20/97      helena        Java class name change.
     20 //  6/23/97      helena        Added comments to make code more readable.
     21 //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
     22 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
     23 //                             Cleaned up operator=
     24 // 07/12/99      helena        HPUX 11 CC port.
     25 // 03/06/01      synwee        Modified compareTo, to handle the result of
     26 //                             2 string similar in contents, but one is longer
     27 //                             than the other
     28 //===============================================================================
     29 
     30 #include "unicode/utypes.h"
     31 
     32 #if !UCONFIG_NO_COLLATION
     33 
     34 #include "unicode/sortkey.h"
     35 #include "cmemory.h"
     36 #include "uelement.h"
     37 #include "ustr_imp.h"
     38 
     39 U_NAMESPACE_BEGIN
     40 
     41 // A hash code of kInvalidHashCode indicates that the hash code needs
     42 // to be computed. A hash code of kEmptyHashCode is used for empty keys
     43 // and for any key whose computed hash code is kInvalidHashCode.
     44 static const int32_t kInvalidHashCode = 0;
     45 static const int32_t kEmptyHashCode = 1;
     46 // The "bogus hash code" replaces a separate fBogus flag.
     47 static const int32_t kBogusHashCode = 2;
     48 
     49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
     50 
     51 CollationKey::CollationKey()
     52     : UObject(), fFlagAndLength(0),
     53       fHashCode(kEmptyHashCode)
     54 {
     55 }
     56 
     57 // Create a collation key from a bit array.
     58 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
     59     : UObject(), fFlagAndLength(count),
     60       fHashCode(kInvalidHashCode)
     61 {
     62     if (count < 0 || (newValues == NULL && count != 0) ||
     63             (count > getCapacity() && reallocate(count, 0) == NULL)) {
     64         setToBogus();
     65         return;
     66     }
     67 
     68     if (count > 0) {
     69         uprv_memcpy(getBytes(), newValues, count);
     70     }
     71 }
     72 
     73 CollationKey::CollationKey(const CollationKey& other)
     74     : UObject(other), fFlagAndLength(other.getLength()),
     75       fHashCode(other.fHashCode)
     76 {
     77     if (other.isBogus())
     78     {
     79         setToBogus();
     80         return;
     81     }
     82 
     83     int32_t length = fFlagAndLength;
     84     if (length > getCapacity() && reallocate(length, 0) == NULL) {
     85         setToBogus();
     86         return;
     87     }
     88 
     89     if (length > 0) {
     90         uprv_memcpy(getBytes(), other.getBytes(), length);
     91     }
     92 }
     93 
     94 CollationKey::~CollationKey()
     95 {
     96     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
     97 }
     98 
     99 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
    100     uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
    101     if(newBytes == NULL) { return NULL; }
    102     if(length > 0) {
    103         uprv_memcpy(newBytes, getBytes(), length);
    104     }
    105     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
    106     fUnion.fFields.fBytes = newBytes;
    107     fUnion.fFields.fCapacity = newCapacity;
    108     fFlagAndLength |= 0x80000000;
    109     return newBytes;
    110 }
    111 
    112 void CollationKey::setLength(int32_t newLength) {
    113     // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
    114     fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
    115     fHashCode = kInvalidHashCode;
    116 }
    117 
    118 // set the key to an empty state
    119 CollationKey&
    120 CollationKey::reset()
    121 {
    122     fFlagAndLength &= 0x80000000;
    123     fHashCode = kEmptyHashCode;
    124 
    125     return *this;
    126 }
    127 
    128 // set the key to a "bogus" or invalid state
    129 CollationKey&
    130 CollationKey::setToBogus()
    131 {
    132     fFlagAndLength &= 0x80000000;
    133     fHashCode = kBogusHashCode;
    134 
    135     return *this;
    136 }
    137 
    138 UBool
    139 CollationKey::operator==(const CollationKey& source) const
    140 {
    141     return getLength() == source.getLength() &&
    142             (this == &source ||
    143              uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
    144 }
    145 
    146 const CollationKey&
    147 CollationKey::operator=(const CollationKey& other)
    148 {
    149     if (this != &other)
    150     {
    151         if (other.isBogus())
    152         {
    153             return setToBogus();
    154         }
    155 
    156         int32_t length = other.getLength();
    157         if (length > getCapacity() && reallocate(length, 0) == NULL) {
    158             return setToBogus();
    159         }
    160         if (length > 0) {
    161             uprv_memcpy(getBytes(), other.getBytes(), length);
    162         }
    163         fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
    164         fHashCode = other.fHashCode;
    165     }
    166 
    167     return *this;
    168 }
    169 
    170 // Bitwise comparison for the collation keys.
    171 Collator::EComparisonResult
    172 CollationKey::compareTo(const CollationKey& target) const
    173 {
    174     UErrorCode errorCode = U_ZERO_ERROR;
    175     return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
    176 }
    177 
    178 // Bitwise comparison for the collation keys.
    179 UCollationResult
    180 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
    181 {
    182   if(U_SUCCESS(status)) {
    183     const uint8_t *src = getBytes();
    184     const uint8_t *tgt = target.getBytes();
    185 
    186     // are we comparing the same string
    187     if (src == tgt)
    188         return  UCOL_EQUAL;
    189 
    190     UCollationResult result;
    191 
    192     // are we comparing different lengths?
    193     int32_t minLength = getLength();
    194     int32_t targetLength = target.getLength();
    195     if (minLength < targetLength) {
    196         result = UCOL_LESS;
    197     } else if (minLength == targetLength) {
    198         result = UCOL_EQUAL;
    199     } else {
    200         minLength = targetLength;
    201         result = UCOL_GREATER;
    202     }
    203 
    204     if (minLength > 0) {
    205         int diff = uprv_memcmp(src, tgt, minLength);
    206         if (diff > 0) {
    207             return UCOL_GREATER;
    208         }
    209         else
    210             if (diff < 0) {
    211                 return UCOL_LESS;
    212             }
    213     }
    214 
    215     return result;
    216   } else {
    217     return UCOL_EQUAL;
    218   }
    219 }
    220 
    221 #ifdef U_USE_COLLATION_KEY_DEPRECATES
    222 // Create a copy of the byte array.
    223 uint8_t*
    224 CollationKey::toByteArray(int32_t& count) const
    225 {
    226     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
    227 
    228     if (result == NULL)
    229     {
    230         count = 0;
    231     }
    232     else
    233     {
    234         count = fCount;
    235         if (count > 0) {
    236             uprv_memcpy(result, fBytes, fCount);
    237         }
    238     }
    239 
    240     return result;
    241 }
    242 #endif
    243 
    244 static int32_t
    245 computeHashCode(const uint8_t *key, int32_t  length) {
    246     const char *s = reinterpret_cast<const char *>(key);
    247     int32_t hash;
    248     if (s == NULL || length == 0) {
    249         hash = kEmptyHashCode;
    250     } else {
    251         hash = ustr_hashCharsN(s, length);
    252         if (hash == kInvalidHashCode || hash == kBogusHashCode) {
    253             hash = kEmptyHashCode;
    254         }
    255     }
    256     return hash;
    257 }
    258 
    259 int32_t
    260 CollationKey::hashCode() const
    261 {
    262     // (Cribbed from UnicodeString)
    263     // We cache the hashCode; when it becomes invalid, due to any change to the
    264     // string, we note this by setting it to kInvalidHashCode. [LIU]
    265 
    266     // Note: This method is semantically const, but physically non-const.
    267 
    268     if (fHashCode == kInvalidHashCode)
    269     {
    270         fHashCode = computeHashCode(getBytes(), getLength());
    271     }
    272 
    273     return fHashCode;
    274 }
    275 
    276 U_NAMESPACE_END
    277 
    278 U_CAPI int32_t U_EXPORT2
    279 ucol_keyHashCode(const uint8_t *key,
    280                        int32_t  length)
    281 {
    282     return icu::computeHashCode(key, length);
    283 }
    284 
    285 #endif /* #if !UCONFIG_NO_COLLATION */
    286