1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 //=============================================================================== 8 // 9 // File sortkey.cpp 10 // 11 // 12 // 13 // Created by: Helena Shih 14 // 15 // Modification History: 16 // 17 // Date Name Description 18 // 19 // 6/20/97 helena Java class name change. 20 // 6/23/97 helena Added comments to make code more readable. 21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString 22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1, 23 // Cleaned up operator= 24 // 07/12/99 helena HPUX 11 CC port. 25 // 03/06/01 synwee Modified compareTo, to handle the result of 26 // 2 string similar in contents, but one is longer 27 // than the other 28 //=============================================================================== 29 30 #include "unicode/utypes.h" 31 32 #if !UCONFIG_NO_COLLATION 33 34 #include "unicode/sortkey.h" 35 #include "cmemory.h" 36 #include "uhash.h" 37 38 U_NAMESPACE_BEGIN 39 40 // A hash code of kInvalidHashCode indicates that the has code needs 41 // to be computed. A hash code of kEmptyHashCode is used for empty keys 42 // and for any key whose computed hash code is kInvalidHashCode. 43 #define kInvalidHashCode ((int32_t)0) 44 #define kEmptyHashCode ((int32_t)1) 45 46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) 47 48 CollationKey::CollationKey() 49 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), 50 fHashCode(kEmptyHashCode), fBytes(NULL) 51 { 52 } 53 54 // Create a collation key from a bit array. 55 CollationKey::CollationKey(const uint8_t* newValues, int32_t count) 56 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), 57 fHashCode(kInvalidHashCode) 58 { 59 fBytes = (uint8_t *)uprv_malloc(count); 60 61 if (fBytes == NULL) 62 { 63 setToBogus(); 64 return; 65 } 66 67 uprv_memcpy(fBytes, newValues, fCount); 68 } 69 70 CollationKey::CollationKey(const CollationKey& other) 71 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity), 72 fHashCode(other.fHashCode), fBytes(NULL) 73 { 74 if (other.fBogus) 75 { 76 setToBogus(); 77 return; 78 } 79 80 fBytes = (uint8_t *)uprv_malloc(fCapacity); 81 82 if (fBytes == NULL) 83 { 84 setToBogus(); 85 return; 86 } 87 88 uprv_memcpy(fBytes, other.fBytes, other.fCount); 89 if(fCapacity>fCount) { 90 uprv_memset(fBytes+fCount, 0, fCapacity-fCount); 91 } 92 } 93 94 CollationKey::~CollationKey() 95 { 96 uprv_free(fBytes); 97 } 98 99 void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) { 100 if(fBytes != NULL) { 101 uprv_free(fBytes); 102 } 103 fBytes = values; 104 fCapacity = capacity; 105 setLength(count); 106 } 107 108 void CollationKey::setLength(int32_t newLength) { 109 fBogus = FALSE; 110 fCount = newLength; 111 fHashCode = kInvalidHashCode; 112 } 113 114 // set the key to an empty state 115 CollationKey& 116 CollationKey::reset() 117 { 118 fCount = 0; 119 fBogus = FALSE; 120 fHashCode = kEmptyHashCode; 121 122 return *this; 123 } 124 125 // set the key to a "bogus" or invalid state 126 CollationKey& 127 CollationKey::setToBogus() 128 { 129 uprv_free(fBytes); 130 fBytes = NULL; 131 132 fCapacity = 0; 133 fCount = 0; 134 fHashCode = kInvalidHashCode; 135 136 return *this; 137 } 138 139 UBool 140 CollationKey::operator==(const CollationKey& source) const 141 { 142 return (this->fCount == source.fCount && 143 (this->fBytes == source.fBytes || 144 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); 145 } 146 147 const CollationKey& 148 CollationKey::operator=(const CollationKey& other) 149 { 150 if (this != &other) 151 { 152 if (other.isBogus()) 153 { 154 return setToBogus(); 155 } 156 157 if (other.fBytes != NULL) 158 { 159 ensureCapacity(other.fCount); 160 161 if (isBogus()) 162 { 163 return *this; 164 } 165 166 fHashCode = other.fHashCode; 167 uprv_memcpy(fBytes, other.fBytes, fCount); 168 } 169 else 170 { 171 fCount = 0; 172 fBogus = FALSE; 173 fHashCode = kEmptyHashCode; 174 } 175 } 176 177 return *this; 178 } 179 180 // Bitwise comparison for the collation keys. 181 // NOTE: this is somewhat messy 'cause we can't count 182 // on memcmp returning the exact values which match 183 // Collator::EComparisonResult 184 Collator::EComparisonResult 185 CollationKey::compareTo(const CollationKey& target) const 186 { 187 uint8_t *src = this->fBytes; 188 uint8_t *tgt = target.fBytes; 189 190 // are we comparing the same string 191 if (src == tgt) 192 return Collator::EQUAL; 193 194 /* 195 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount; 196 if (count == 0) 197 { 198 // If count is 0, at least one of the keys is empty. 199 // An empty key is always LESS than a non-empty one 200 // and EQUAL to another empty 201 if (this->fCount < target.fCount) 202 { 203 return Collator::LESS; 204 } 205 206 if (this->fCount > target.fCount) 207 { 208 return Collator::GREATER; 209 } 210 return Collator::EQUAL; 211 } 212 */ 213 214 int minLength; 215 Collator::EComparisonResult result; 216 217 // are we comparing different lengths? 218 if (this->fCount != target.fCount) { 219 if (this->fCount < target.fCount) { 220 minLength = this->fCount; 221 result = Collator::LESS; 222 } 223 else { 224 minLength = target.fCount; 225 result = Collator::GREATER; 226 } 227 } 228 else { 229 minLength = target.fCount; 230 result = Collator::EQUAL; 231 } 232 233 if (minLength > 0) { 234 int diff = uprv_memcmp(src, tgt, minLength); 235 if (diff > 0) { 236 return Collator::GREATER; 237 } 238 else 239 if (diff < 0) { 240 return Collator::LESS; 241 } 242 } 243 244 return result; 245 /* 246 if (result < 0) 247 { 248 return Collator::LESS; 249 } 250 251 if (result > 0) 252 { 253 return Collator::GREATER; 254 } 255 return Collator::EQUAL; 256 */ 257 } 258 259 // Bitwise comparison for the collation keys. 260 UCollationResult 261 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const 262 { 263 if(U_SUCCESS(status)) { 264 uint8_t *src = this->fBytes; 265 uint8_t *tgt = target.fBytes; 266 267 // are we comparing the same string 268 if (src == tgt) 269 return UCOL_EQUAL; 270 271 int minLength; 272 UCollationResult result; 273 274 // are we comparing different lengths? 275 if (this->fCount != target.fCount) { 276 if (this->fCount < target.fCount) { 277 minLength = this->fCount; 278 result = UCOL_LESS; 279 } 280 else { 281 minLength = target.fCount; 282 result = UCOL_GREATER; 283 } 284 } 285 else { 286 minLength = target.fCount; 287 result = UCOL_EQUAL; 288 } 289 290 if (minLength > 0) { 291 int diff = uprv_memcmp(src, tgt, minLength); 292 if (diff > 0) { 293 return UCOL_GREATER; 294 } 295 else 296 if (diff < 0) { 297 return UCOL_LESS; 298 } 299 } 300 301 return result; 302 } else { 303 return UCOL_EQUAL; 304 } 305 } 306 307 CollationKey& 308 CollationKey::ensureCapacity(int32_t newSize) 309 { 310 if (fCapacity < newSize) 311 { 312 uprv_free(fBytes); 313 314 fBytes = (uint8_t *)uprv_malloc(newSize); 315 316 if (fBytes == NULL) 317 { 318 return setToBogus(); 319 } 320 321 uprv_memset(fBytes, 0, fCapacity); 322 fCapacity = newSize; 323 } 324 325 fBogus = FALSE; 326 fCount = newSize; 327 fHashCode = kInvalidHashCode; 328 329 return *this; 330 } 331 332 #ifdef U_USE_COLLATION_KEY_DEPRECATES 333 // Create a copy of the byte array. 334 uint8_t* 335 CollationKey::toByteArray(int32_t& count) const 336 { 337 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); 338 339 if (result == NULL) 340 { 341 count = 0; 342 } 343 else 344 { 345 count = fCount; 346 uprv_memcpy(result, fBytes, fCount); 347 } 348 349 return result; 350 } 351 #endif 352 353 int32_t 354 CollationKey::hashCode() const 355 { 356 // (Cribbed from UnicodeString) 357 // We cache the hashCode; when it becomes invalid, due to any change to the 358 // string, we note this by setting it to kInvalidHashCode. [LIU] 359 360 // Note: This method is semantically const, but physically non-const. 361 362 if (fHashCode == kInvalidHashCode) 363 { 364 UHashTok key; 365 key.pointer = fBytes; 366 ((CollationKey *)this)->fHashCode = uhash_hashChars(key); 367 #if 0 368 // We compute the hash by iterating sparsely over 64 (at most) characters 369 // spaced evenly through the string. For each character, we multiply the 370 // previous hash value by a prime number and add the new character in, 371 // in the manner of a additive linear congruential random number generator, 372 // thus producing a pseudorandom deterministic value which should be well 373 // distributed over the output range. [LIU] 374 const uint8_t *p = fBytes, *limit = fBytes + fCount; 375 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1); 376 int32_t hash = 0; 377 378 while (p < limit) 379 { 380 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); 381 p += inc; 382 } 383 384 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode 385 if (hash == kInvalidHashCode) 386 { 387 hash = kEmptyHashCode; 388 } 389 390 ((CollationKey *)this)->fHashCode = hash; // cast away const 391 #endif 392 } 393 394 return fHashCode; 395 } 396 397 U_NAMESPACE_END 398 399 U_CAPI int32_t U_EXPORT2 400 ucol_keyHashCode(const uint8_t *key, 401 int32_t length) 402 { 403 U_NAMESPACE_QUALIFIER CollationKey newKey(key, length); 404 return newKey.hashCode(); 405 } 406 407 #endif /* #if !UCONFIG_NO_COLLATION */ 408