Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 //===============================================================================
      8 //
      9 // File sortkey.cpp
     10 //
     11 //
     12 //
     13 // Created by: Helena Shih
     14 //
     15 // Modification History:
     16 //
     17 //  Date         Name          Description
     18 //
     19 //  6/20/97      helena        Java class name change.
     20 //  6/23/97      helena        Added comments to make code more readable.
     21 //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
     22 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
     23 //                             Cleaned up operator=
     24 // 07/12/99      helena        HPUX 11 CC port.
     25 // 03/06/01      synwee        Modified compareTo, to handle the result of
     26 //                             2 string similar in contents, but one is longer
     27 //                             than the other
     28 //===============================================================================
     29 
     30 #include "unicode/utypes.h"
     31 
     32 #if !UCONFIG_NO_COLLATION
     33 
     34 #include "unicode/sortkey.h"
     35 #include "cmemory.h"
     36 #include "uhash.h"
     37 
     38 U_NAMESPACE_BEGIN
     39 
     40 // A hash code of kInvalidHashCode indicates that the has code needs
     41 // to be computed. A hash code of kEmptyHashCode is used for empty keys
     42 // and for any key whose computed hash code is kInvalidHashCode.
     43 #define kInvalidHashCode ((int32_t)0)
     44 #define kEmptyHashCode ((int32_t)1)
     45 
     46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
     47 
     48 CollationKey::CollationKey()
     49     : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
     50       fHashCode(kEmptyHashCode), fBytes(NULL)
     51 {
     52 }
     53 
     54 // Create a collation key from a bit array.
     55 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
     56     : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
     57       fHashCode(kInvalidHashCode)
     58 {
     59     fBytes = (uint8_t *)uprv_malloc(count);
     60 
     61     if (fBytes == NULL)
     62     {
     63         setToBogus();
     64         return;
     65     }
     66 
     67     uprv_memcpy(fBytes, newValues, fCount);
     68 }
     69 
     70 CollationKey::CollationKey(const CollationKey& other)
     71 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
     72   fHashCode(other.fHashCode), fBytes(NULL)
     73 {
     74     if (other.fBogus)
     75     {
     76         setToBogus();
     77         return;
     78     }
     79 
     80     fBytes = (uint8_t *)uprv_malloc(fCapacity);
     81 
     82     if (fBytes == NULL)
     83     {
     84         setToBogus();
     85         return;
     86     }
     87 
     88     uprv_memcpy(fBytes, other.fBytes, other.fCount);
     89     if(fCapacity>fCount) {
     90         uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
     91     }
     92 }
     93 
     94 CollationKey::~CollationKey()
     95 {
     96         uprv_free(fBytes);
     97 }
     98 
     99 void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
    100     if(fBytes != NULL) {
    101         uprv_free(fBytes);
    102     }
    103     fBytes = values;
    104     fCapacity = capacity;
    105     setLength(count);
    106 }
    107 
    108 void CollationKey::setLength(int32_t newLength) {
    109     fBogus = FALSE;
    110     fCount = newLength;
    111     fHashCode = kInvalidHashCode;
    112 }
    113 
    114 // set the key to an empty state
    115 CollationKey&
    116 CollationKey::reset()
    117 {
    118     fCount = 0;
    119     fBogus = FALSE;
    120     fHashCode = kEmptyHashCode;
    121 
    122     return *this;
    123 }
    124 
    125 // set the key to a "bogus" or invalid state
    126 CollationKey&
    127 CollationKey::setToBogus()
    128 {
    129     uprv_free(fBytes);
    130     fBytes = NULL;
    131 
    132     fCapacity = 0;
    133     fCount = 0;
    134     fHashCode = kInvalidHashCode;
    135 
    136     return *this;
    137 }
    138 
    139 UBool
    140 CollationKey::operator==(const CollationKey& source) const
    141 {
    142     return (this->fCount == source.fCount &&
    143             (this->fBytes == source.fBytes ||
    144              uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
    145 }
    146 
    147 const CollationKey&
    148 CollationKey::operator=(const CollationKey& other)
    149 {
    150     if (this != &other)
    151     {
    152         if (other.isBogus())
    153         {
    154             return setToBogus();
    155         }
    156 
    157         if (other.fBytes != NULL)
    158         {
    159             ensureCapacity(other.fCount);
    160 
    161             if (isBogus())
    162             {
    163                 return *this;
    164             }
    165 
    166             fHashCode = other.fHashCode;
    167             uprv_memcpy(fBytes, other.fBytes, fCount);
    168         }
    169         else
    170         {
    171             fCount = 0;
    172             fBogus = FALSE;
    173             fHashCode = kEmptyHashCode;
    174         }
    175     }
    176 
    177     return *this;
    178 }
    179 
    180 // Bitwise comparison for the collation keys.
    181 // NOTE: this is somewhat messy 'cause we can't count
    182 // on memcmp returning the exact values which match
    183 // Collator::EComparisonResult
    184 Collator::EComparisonResult
    185 CollationKey::compareTo(const CollationKey& target) const
    186 {
    187     uint8_t *src = this->fBytes;
    188     uint8_t *tgt = target.fBytes;
    189 
    190     // are we comparing the same string
    191     if (src == tgt)
    192         return  Collator::EQUAL;
    193 
    194         /*
    195         int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
    196         if (count == 0)
    197         {
    198         // If count is 0, at least one of the keys is empty.
    199         // An empty key is always LESS than a non-empty one
    200         // and EQUAL to another empty
    201         if (this->fCount < target.fCount)
    202         {
    203         return Collator::LESS;
    204         }
    205 
    206           if (this->fCount > target.fCount)
    207           {
    208           return Collator::GREATER;
    209           }
    210           return Collator::EQUAL;
    211           }
    212     */
    213 
    214     int                         minLength;
    215     Collator::EComparisonResult result;
    216 
    217     // are we comparing different lengths?
    218     if (this->fCount != target.fCount) {
    219         if (this->fCount < target.fCount) {
    220             minLength = this->fCount;
    221             result    =  Collator::LESS;
    222         }
    223         else {
    224             minLength = target.fCount;
    225             result    =  Collator::GREATER;
    226         }
    227     }
    228     else {
    229         minLength = target.fCount;
    230         result    =  Collator::EQUAL;
    231     }
    232 
    233     if (minLength > 0) {
    234         int diff = uprv_memcmp(src, tgt, minLength);
    235         if (diff > 0) {
    236             return Collator::GREATER;
    237         }
    238         else
    239             if (diff < 0) {
    240                 return Collator::LESS;
    241             }
    242     }
    243 
    244     return result;
    245     /*
    246     if (result < 0)
    247     {
    248     return Collator::LESS;
    249     }
    250 
    251       if (result > 0)
    252       {
    253       return Collator::GREATER;
    254       }
    255       return Collator::EQUAL;
    256     */
    257 }
    258 
    259 // Bitwise comparison for the collation keys.
    260 UCollationResult
    261 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
    262 {
    263   if(U_SUCCESS(status)) {
    264     uint8_t *src = this->fBytes;
    265     uint8_t *tgt = target.fBytes;
    266 
    267     // are we comparing the same string
    268     if (src == tgt)
    269         return  UCOL_EQUAL;
    270 
    271     int                         minLength;
    272     UCollationResult result;
    273 
    274     // are we comparing different lengths?
    275     if (this->fCount != target.fCount) {
    276         if (this->fCount < target.fCount) {
    277             minLength = this->fCount;
    278             result    =  UCOL_LESS;
    279         }
    280         else {
    281             minLength = target.fCount;
    282             result    =  UCOL_GREATER;
    283         }
    284     }
    285     else {
    286         minLength = target.fCount;
    287         result    =  UCOL_EQUAL;
    288     }
    289 
    290     if (minLength > 0) {
    291         int diff = uprv_memcmp(src, tgt, minLength);
    292         if (diff > 0) {
    293             return UCOL_GREATER;
    294         }
    295         else
    296             if (diff < 0) {
    297                 return UCOL_LESS;
    298             }
    299     }
    300 
    301     return result;
    302   } else {
    303     return UCOL_EQUAL;
    304   }
    305 }
    306 
    307 CollationKey&
    308 CollationKey::ensureCapacity(int32_t newSize)
    309 {
    310     if (fCapacity < newSize)
    311     {
    312         uprv_free(fBytes);
    313 
    314         fBytes = (uint8_t *)uprv_malloc(newSize);
    315 
    316         if (fBytes == NULL)
    317         {
    318             return setToBogus();
    319         }
    320 
    321         uprv_memset(fBytes, 0, fCapacity);
    322         fCapacity = newSize;
    323     }
    324 
    325     fBogus = FALSE;
    326     fCount = newSize;
    327     fHashCode = kInvalidHashCode;
    328 
    329     return *this;
    330 }
    331 
    332 #ifdef U_USE_COLLATION_KEY_DEPRECATES
    333 // Create a copy of the byte array.
    334 uint8_t*
    335 CollationKey::toByteArray(int32_t& count) const
    336 {
    337     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
    338 
    339     if (result == NULL)
    340     {
    341         count = 0;
    342     }
    343     else
    344     {
    345         count = fCount;
    346         uprv_memcpy(result, fBytes, fCount);
    347     }
    348 
    349     return result;
    350 }
    351 #endif
    352 
    353 int32_t
    354 CollationKey::hashCode() const
    355 {
    356     // (Cribbed from UnicodeString)
    357     // We cache the hashCode; when it becomes invalid, due to any change to the
    358     // string, we note this by setting it to kInvalidHashCode. [LIU]
    359 
    360     // Note: This method is semantically const, but physically non-const.
    361 
    362     if (fHashCode == kInvalidHashCode)
    363     {
    364         UHashTok key;
    365         key.pointer = fBytes;
    366         ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
    367 #if 0
    368         // We compute the hash by iterating sparsely over 64 (at most) characters
    369         // spaced evenly through the string.  For each character, we multiply the
    370         // previous hash value by a prime number and add the new character in,
    371         // in the manner of a additive linear congruential random number generator,
    372         // thus producing a pseudorandom deterministic value which should be well
    373         // distributed over the output range. [LIU]
    374         const uint8_t   *p = fBytes, *limit = fBytes + fCount;
    375         int32_t         inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
    376         int32_t         hash = 0;
    377 
    378         while (p < limit)
    379         {
    380             hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
    381             p += inc;
    382         }
    383 
    384         // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
    385         if (hash == kInvalidHashCode)
    386         {
    387             hash = kEmptyHashCode;
    388         }
    389 
    390         ((CollationKey *)this)->fHashCode = hash; // cast away const
    391 #endif
    392     }
    393 
    394     return fHashCode;
    395 }
    396 
    397 U_NAMESPACE_END
    398 
    399 U_CAPI int32_t U_EXPORT2
    400 ucol_keyHashCode(const uint8_t *key,
    401                        int32_t  length)
    402 {
    403     U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
    404     return newKey.hashCode();
    405 }
    406 
    407 #endif /* #if !UCONFIG_NO_COLLATION */
    408