Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1996-2012, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 //===============================================================================
     10 //
     11 // File sortkey.cpp
     12 //
     13 //
     14 //
     15 // Created by: Helena Shih
     16 //
     17 // Modification History:
     18 //
     19 //  Date         Name          Description
     20 //
     21 //  6/20/97      helena        Java class name change.
     22 //  6/23/97      helena        Added comments to make code more readable.
     23 //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
     24 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
     25 //                             Cleaned up operator=
     26 // 07/12/99      helena        HPUX 11 CC port.
     27 // 03/06/01      synwee        Modified compareTo, to handle the result of
     28 //                             2 string similar in contents, but one is longer
     29 //                             than the other
     30 //===============================================================================
     31 
     32 #include "unicode/utypes.h"
     33 
     34 #if !UCONFIG_NO_COLLATION
     35 
     36 #include "unicode/sortkey.h"
     37 #include "cmemory.h"
     38 #include "uelement.h"
     39 #include "ustr_imp.h"
     40 
     41 U_NAMESPACE_BEGIN
     42 
     43 // A hash code of kInvalidHashCode indicates that the hash code needs
     44 // to be computed. A hash code of kEmptyHashCode is used for empty keys
     45 // and for any key whose computed hash code is kInvalidHashCode.
     46 static const int32_t kInvalidHashCode = 0;
     47 static const int32_t kEmptyHashCode = 1;
     48 // The "bogus hash code" replaces a separate fBogus flag.
     49 static const int32_t kBogusHashCode = 2;
     50 
     51 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
     52 
     53 CollationKey::CollationKey()
     54     : UObject(), fFlagAndLength(0),
     55       fHashCode(kEmptyHashCode)
     56 {
     57 }
     58 
     59 // Create a collation key from a bit array.
     60 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
     61     : UObject(), fFlagAndLength(count),
     62       fHashCode(kInvalidHashCode)
     63 {
     64     if (count < 0 || (newValues == NULL && count != 0) ||
     65             (count > getCapacity() && reallocate(count, 0) == NULL)) {
     66         setToBogus();
     67         return;
     68     }
     69 
     70     if (count > 0) {
     71         uprv_memcpy(getBytes(), newValues, count);
     72     }
     73 }
     74 
     75 CollationKey::CollationKey(const CollationKey& other)
     76     : UObject(other), fFlagAndLength(other.getLength()),
     77       fHashCode(other.fHashCode)
     78 {
     79     if (other.isBogus())
     80     {
     81         setToBogus();
     82         return;
     83     }
     84 
     85     int32_t length = fFlagAndLength;
     86     if (length > getCapacity() && reallocate(length, 0) == NULL) {
     87         setToBogus();
     88         return;
     89     }
     90 
     91     if (length > 0) {
     92         uprv_memcpy(getBytes(), other.getBytes(), length);
     93     }
     94 }
     95 
     96 CollationKey::~CollationKey()
     97 {
     98     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
     99 }
    100 
    101 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
    102     uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
    103     if(newBytes == NULL) { return NULL; }
    104     if(length > 0) {
    105         uprv_memcpy(newBytes, getBytes(), length);
    106     }
    107     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
    108     fUnion.fFields.fBytes = newBytes;
    109     fUnion.fFields.fCapacity = newCapacity;
    110     fFlagAndLength |= 0x80000000;
    111     return newBytes;
    112 }
    113 
    114 void CollationKey::setLength(int32_t newLength) {
    115     // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
    116     fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
    117     fHashCode = kInvalidHashCode;
    118 }
    119 
    120 // set the key to an empty state
    121 CollationKey&
    122 CollationKey::reset()
    123 {
    124     fFlagAndLength &= 0x80000000;
    125     fHashCode = kEmptyHashCode;
    126 
    127     return *this;
    128 }
    129 
    130 // set the key to a "bogus" or invalid state
    131 CollationKey&
    132 CollationKey::setToBogus()
    133 {
    134     fFlagAndLength &= 0x80000000;
    135     fHashCode = kBogusHashCode;
    136 
    137     return *this;
    138 }
    139 
    140 UBool
    141 CollationKey::operator==(const CollationKey& source) const
    142 {
    143     return getLength() == source.getLength() &&
    144             (this == &source ||
    145              uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
    146 }
    147 
    148 const CollationKey&
    149 CollationKey::operator=(const CollationKey& other)
    150 {
    151     if (this != &other)
    152     {
    153         if (other.isBogus())
    154         {
    155             return setToBogus();
    156         }
    157 
    158         int32_t length = other.getLength();
    159         if (length > getCapacity() && reallocate(length, 0) == NULL) {
    160             return setToBogus();
    161         }
    162         if (length > 0) {
    163             uprv_memcpy(getBytes(), other.getBytes(), length);
    164         }
    165         fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
    166         fHashCode = other.fHashCode;
    167     }
    168 
    169     return *this;
    170 }
    171 
    172 // Bitwise comparison for the collation keys.
    173 Collator::EComparisonResult
    174 CollationKey::compareTo(const CollationKey& target) const
    175 {
    176     UErrorCode errorCode = U_ZERO_ERROR;
    177     return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
    178 }
    179 
    180 // Bitwise comparison for the collation keys.
    181 UCollationResult
    182 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
    183 {
    184   if(U_SUCCESS(status)) {
    185     const uint8_t *src = getBytes();
    186     const uint8_t *tgt = target.getBytes();
    187 
    188     // are we comparing the same string
    189     if (src == tgt)
    190         return  UCOL_EQUAL;
    191 
    192     UCollationResult result;
    193 
    194     // are we comparing different lengths?
    195     int32_t minLength = getLength();
    196     int32_t targetLength = target.getLength();
    197     if (minLength < targetLength) {
    198         result = UCOL_LESS;
    199     } else if (minLength == targetLength) {
    200         result = UCOL_EQUAL;
    201     } else {
    202         minLength = targetLength;
    203         result = UCOL_GREATER;
    204     }
    205 
    206     if (minLength > 0) {
    207         int diff = uprv_memcmp(src, tgt, minLength);
    208         if (diff > 0) {
    209             return UCOL_GREATER;
    210         }
    211         else
    212             if (diff < 0) {
    213                 return UCOL_LESS;
    214             }
    215     }
    216 
    217     return result;
    218   } else {
    219     return UCOL_EQUAL;
    220   }
    221 }
    222 
    223 #ifdef U_USE_COLLATION_KEY_DEPRECATES
    224 // Create a copy of the byte array.
    225 uint8_t*
    226 CollationKey::toByteArray(int32_t& count) const
    227 {
    228     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
    229 
    230     if (result == NULL)
    231     {
    232         count = 0;
    233     }
    234     else
    235     {
    236         count = fCount;
    237         if (count > 0) {
    238             uprv_memcpy(result, fBytes, fCount);
    239         }
    240     }
    241 
    242     return result;
    243 }
    244 #endif
    245 
    246 static int32_t
    247 computeHashCode(const uint8_t *key, int32_t  length) {
    248     const char *s = reinterpret_cast<const char *>(key);
    249     int32_t hash;
    250     if (s == NULL || length == 0) {
    251         hash = kEmptyHashCode;
    252     } else {
    253         hash = ustr_hashCharsN(s, length);
    254         if (hash == kInvalidHashCode || hash == kBogusHashCode) {
    255             hash = kEmptyHashCode;
    256         }
    257     }
    258     return hash;
    259 }
    260 
    261 int32_t
    262 CollationKey::hashCode() const
    263 {
    264     // (Cribbed from UnicodeString)
    265     // We cache the hashCode; when it becomes invalid, due to any change to the
    266     // string, we note this by setting it to kInvalidHashCode. [LIU]
    267 
    268     // Note: This method is semantically const, but physically non-const.
    269 
    270     if (fHashCode == kInvalidHashCode)
    271     {
    272         fHashCode = computeHashCode(getBytes(), getLength());
    273     }
    274 
    275     return fHashCode;
    276 }
    277 
    278 U_NAMESPACE_END
    279 
    280 U_CAPI int32_t U_EXPORT2
    281 ucol_keyHashCode(const uint8_t *key,
    282                        int32_t  length)
    283 {
    284     return icu::computeHashCode(key, length);
    285 }
    286 
    287 #endif /* #if !UCONFIG_NO_COLLATION */
    288