1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2012, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 //=============================================================================== 10 // 11 // File sortkey.cpp 12 // 13 // 14 // 15 // Created by: Helena Shih 16 // 17 // Modification History: 18 // 19 // Date Name Description 20 // 21 // 6/20/97 helena Java class name change. 22 // 6/23/97 helena Added comments to make code more readable. 23 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString 24 // 7/31/98 erm hashCode: minimum inc should be 2 not 1, 25 // Cleaned up operator= 26 // 07/12/99 helena HPUX 11 CC port. 27 // 03/06/01 synwee Modified compareTo, to handle the result of 28 // 2 string similar in contents, but one is longer 29 // than the other 30 //=============================================================================== 31 32 #include "unicode/utypes.h" 33 34 #if !UCONFIG_NO_COLLATION 35 36 #include "unicode/sortkey.h" 37 #include "cmemory.h" 38 #include "uelement.h" 39 #include "ustr_imp.h" 40 41 U_NAMESPACE_BEGIN 42 43 // A hash code of kInvalidHashCode indicates that the hash code needs 44 // to be computed. A hash code of kEmptyHashCode is used for empty keys 45 // and for any key whose computed hash code is kInvalidHashCode. 46 static const int32_t kInvalidHashCode = 0; 47 static const int32_t kEmptyHashCode = 1; 48 // The "bogus hash code" replaces a separate fBogus flag. 49 static const int32_t kBogusHashCode = 2; 50 51 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) 52 53 CollationKey::CollationKey() 54 : UObject(), fFlagAndLength(0), 55 fHashCode(kEmptyHashCode) 56 { 57 } 58 59 // Create a collation key from a bit array. 60 CollationKey::CollationKey(const uint8_t* newValues, int32_t count) 61 : UObject(), fFlagAndLength(count), 62 fHashCode(kInvalidHashCode) 63 { 64 if (count < 0 || (newValues == NULL && count != 0) || 65 (count > getCapacity() && reallocate(count, 0) == NULL)) { 66 setToBogus(); 67 return; 68 } 69 70 if (count > 0) { 71 uprv_memcpy(getBytes(), newValues, count); 72 } 73 } 74 75 CollationKey::CollationKey(const CollationKey& other) 76 : UObject(other), fFlagAndLength(other.getLength()), 77 fHashCode(other.fHashCode) 78 { 79 if (other.isBogus()) 80 { 81 setToBogus(); 82 return; 83 } 84 85 int32_t length = fFlagAndLength; 86 if (length > getCapacity() && reallocate(length, 0) == NULL) { 87 setToBogus(); 88 return; 89 } 90 91 if (length > 0) { 92 uprv_memcpy(getBytes(), other.getBytes(), length); 93 } 94 } 95 96 CollationKey::~CollationKey() 97 { 98 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 99 } 100 101 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { 102 uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); 103 if(newBytes == NULL) { return NULL; } 104 if(length > 0) { 105 uprv_memcpy(newBytes, getBytes(), length); 106 } 107 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 108 fUnion.fFields.fBytes = newBytes; 109 fUnion.fFields.fCapacity = newCapacity; 110 fFlagAndLength |= 0x80000000; 111 return newBytes; 112 } 113 114 void CollationKey::setLength(int32_t newLength) { 115 // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); 116 fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; 117 fHashCode = kInvalidHashCode; 118 } 119 120 // set the key to an empty state 121 CollationKey& 122 CollationKey::reset() 123 { 124 fFlagAndLength &= 0x80000000; 125 fHashCode = kEmptyHashCode; 126 127 return *this; 128 } 129 130 // set the key to a "bogus" or invalid state 131 CollationKey& 132 CollationKey::setToBogus() 133 { 134 fFlagAndLength &= 0x80000000; 135 fHashCode = kBogusHashCode; 136 137 return *this; 138 } 139 140 UBool 141 CollationKey::operator==(const CollationKey& source) const 142 { 143 return getLength() == source.getLength() && 144 (this == &source || 145 uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); 146 } 147 148 const CollationKey& 149 CollationKey::operator=(const CollationKey& other) 150 { 151 if (this != &other) 152 { 153 if (other.isBogus()) 154 { 155 return setToBogus(); 156 } 157 158 int32_t length = other.getLength(); 159 if (length > getCapacity() && reallocate(length, 0) == NULL) { 160 return setToBogus(); 161 } 162 if (length > 0) { 163 uprv_memcpy(getBytes(), other.getBytes(), length); 164 } 165 fFlagAndLength = (fFlagAndLength & 0x80000000) | length; 166 fHashCode = other.fHashCode; 167 } 168 169 return *this; 170 } 171 172 // Bitwise comparison for the collation keys. 173 Collator::EComparisonResult 174 CollationKey::compareTo(const CollationKey& target) const 175 { 176 UErrorCode errorCode = U_ZERO_ERROR; 177 return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); 178 } 179 180 // Bitwise comparison for the collation keys. 181 UCollationResult 182 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const 183 { 184 if(U_SUCCESS(status)) { 185 const uint8_t *src = getBytes(); 186 const uint8_t *tgt = target.getBytes(); 187 188 // are we comparing the same string 189 if (src == tgt) 190 return UCOL_EQUAL; 191 192 UCollationResult result; 193 194 // are we comparing different lengths? 195 int32_t minLength = getLength(); 196 int32_t targetLength = target.getLength(); 197 if (minLength < targetLength) { 198 result = UCOL_LESS; 199 } else if (minLength == targetLength) { 200 result = UCOL_EQUAL; 201 } else { 202 minLength = targetLength; 203 result = UCOL_GREATER; 204 } 205 206 if (minLength > 0) { 207 int diff = uprv_memcmp(src, tgt, minLength); 208 if (diff > 0) { 209 return UCOL_GREATER; 210 } 211 else 212 if (diff < 0) { 213 return UCOL_LESS; 214 } 215 } 216 217 return result; 218 } else { 219 return UCOL_EQUAL; 220 } 221 } 222 223 #ifdef U_USE_COLLATION_KEY_DEPRECATES 224 // Create a copy of the byte array. 225 uint8_t* 226 CollationKey::toByteArray(int32_t& count) const 227 { 228 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); 229 230 if (result == NULL) 231 { 232 count = 0; 233 } 234 else 235 { 236 count = fCount; 237 if (count > 0) { 238 uprv_memcpy(result, fBytes, fCount); 239 } 240 } 241 242 return result; 243 } 244 #endif 245 246 static int32_t 247 computeHashCode(const uint8_t *key, int32_t length) { 248 const char *s = reinterpret_cast<const char *>(key); 249 int32_t hash; 250 if (s == NULL || length == 0) { 251 hash = kEmptyHashCode; 252 } else { 253 hash = ustr_hashCharsN(s, length); 254 if (hash == kInvalidHashCode || hash == kBogusHashCode) { 255 hash = kEmptyHashCode; 256 } 257 } 258 return hash; 259 } 260 261 int32_t 262 CollationKey::hashCode() const 263 { 264 // (Cribbed from UnicodeString) 265 // We cache the hashCode; when it becomes invalid, due to any change to the 266 // string, we note this by setting it to kInvalidHashCode. [LIU] 267 268 // Note: This method is semantically const, but physically non-const. 269 270 if (fHashCode == kInvalidHashCode) 271 { 272 fHashCode = computeHashCode(getBytes(), getLength()); 273 } 274 275 return fHashCode; 276 } 277 278 U_NAMESPACE_END 279 280 U_CAPI int32_t U_EXPORT2 281 ucol_keyHashCode(const uint8_t *key, 282 int32_t length) 283 { 284 return icu::computeHashCode(key, length); 285 } 286 287 #endif /* #if !UCONFIG_NO_COLLATION */ 288