1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 //=============================================================================== 8 // 9 // File sortkey.cpp 10 // 11 // 12 // 13 // Created by: Helena Shih 14 // 15 // Modification History: 16 // 17 // Date Name Description 18 // 19 // 6/20/97 helena Java class name change. 20 // 6/23/97 helena Added comments to make code more readable. 21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString 22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1, 23 // Cleaned up operator= 24 // 07/12/99 helena HPUX 11 CC port. 25 // 03/06/01 synwee Modified compareTo, to handle the result of 26 // 2 string similar in contents, but one is longer 27 // than the other 28 //=============================================================================== 29 30 #include "unicode/utypes.h" 31 32 #if !UCONFIG_NO_COLLATION 33 34 #include "unicode/sortkey.h" 35 #include "cmemory.h" 36 #include "uelement.h" 37 #include "ustr_imp.h" 38 39 U_NAMESPACE_BEGIN 40 41 // A hash code of kInvalidHashCode indicates that the hash code needs 42 // to be computed. A hash code of kEmptyHashCode is used for empty keys 43 // and for any key whose computed hash code is kInvalidHashCode. 44 static const int32_t kInvalidHashCode = 0; 45 static const int32_t kEmptyHashCode = 1; 46 // The "bogus hash code" replaces a separate fBogus flag. 47 static const int32_t kBogusHashCode = 2; 48 49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) 50 51 CollationKey::CollationKey() 52 : UObject(), fFlagAndLength(0), 53 fHashCode(kEmptyHashCode) 54 { 55 } 56 57 // Create a collation key from a bit array. 58 CollationKey::CollationKey(const uint8_t* newValues, int32_t count) 59 : UObject(), fFlagAndLength(count), 60 fHashCode(kInvalidHashCode) 61 { 62 if (count < 0 || (newValues == NULL && count != 0) || 63 (count > getCapacity() && reallocate(count, 0) == NULL)) { 64 setToBogus(); 65 return; 66 } 67 68 if (count > 0) { 69 uprv_memcpy(getBytes(), newValues, count); 70 } 71 } 72 73 CollationKey::CollationKey(const CollationKey& other) 74 : UObject(other), fFlagAndLength(other.getLength()), 75 fHashCode(other.fHashCode) 76 { 77 if (other.isBogus()) 78 { 79 setToBogus(); 80 return; 81 } 82 83 int32_t length = fFlagAndLength; 84 if (length > getCapacity() && reallocate(length, 0) == NULL) { 85 setToBogus(); 86 return; 87 } 88 89 if (length > 0) { 90 uprv_memcpy(getBytes(), other.getBytes(), length); 91 } 92 } 93 94 CollationKey::~CollationKey() 95 { 96 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 97 } 98 99 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { 100 uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); 101 if(newBytes == NULL) { return NULL; } 102 if(length > 0) { 103 uprv_memcpy(newBytes, getBytes(), length); 104 } 105 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 106 fUnion.fFields.fBytes = newBytes; 107 fUnion.fFields.fCapacity = newCapacity; 108 fFlagAndLength |= 0x80000000; 109 return newBytes; 110 } 111 112 void CollationKey::setLength(int32_t newLength) { 113 // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); 114 fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; 115 fHashCode = kInvalidHashCode; 116 } 117 118 // set the key to an empty state 119 CollationKey& 120 CollationKey::reset() 121 { 122 fFlagAndLength &= 0x80000000; 123 fHashCode = kEmptyHashCode; 124 125 return *this; 126 } 127 128 // set the key to a "bogus" or invalid state 129 CollationKey& 130 CollationKey::setToBogus() 131 { 132 fFlagAndLength &= 0x80000000; 133 fHashCode = kBogusHashCode; 134 135 return *this; 136 } 137 138 UBool 139 CollationKey::operator==(const CollationKey& source) const 140 { 141 return getLength() == source.getLength() && 142 (this == &source || 143 uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); 144 } 145 146 const CollationKey& 147 CollationKey::operator=(const CollationKey& other) 148 { 149 if (this != &other) 150 { 151 if (other.isBogus()) 152 { 153 return setToBogus(); 154 } 155 156 int32_t length = other.getLength(); 157 if (length > getCapacity() && reallocate(length, 0) == NULL) { 158 return setToBogus(); 159 } 160 if (length > 0) { 161 uprv_memcpy(getBytes(), other.getBytes(), length); 162 } 163 fFlagAndLength = (fFlagAndLength & 0x80000000) | length; 164 fHashCode = other.fHashCode; 165 } 166 167 return *this; 168 } 169 170 // Bitwise comparison for the collation keys. 171 Collator::EComparisonResult 172 CollationKey::compareTo(const CollationKey& target) const 173 { 174 UErrorCode errorCode = U_ZERO_ERROR; 175 return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); 176 } 177 178 // Bitwise comparison for the collation keys. 179 UCollationResult 180 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const 181 { 182 if(U_SUCCESS(status)) { 183 const uint8_t *src = getBytes(); 184 const uint8_t *tgt = target.getBytes(); 185 186 // are we comparing the same string 187 if (src == tgt) 188 return UCOL_EQUAL; 189 190 UCollationResult result; 191 192 // are we comparing different lengths? 193 int32_t minLength = getLength(); 194 int32_t targetLength = target.getLength(); 195 if (minLength < targetLength) { 196 result = UCOL_LESS; 197 } else if (minLength == targetLength) { 198 result = UCOL_EQUAL; 199 } else { 200 minLength = targetLength; 201 result = UCOL_GREATER; 202 } 203 204 if (minLength > 0) { 205 int diff = uprv_memcmp(src, tgt, minLength); 206 if (diff > 0) { 207 return UCOL_GREATER; 208 } 209 else 210 if (diff < 0) { 211 return UCOL_LESS; 212 } 213 } 214 215 return result; 216 } else { 217 return UCOL_EQUAL; 218 } 219 } 220 221 #ifdef U_USE_COLLATION_KEY_DEPRECATES 222 // Create a copy of the byte array. 223 uint8_t* 224 CollationKey::toByteArray(int32_t& count) const 225 { 226 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); 227 228 if (result == NULL) 229 { 230 count = 0; 231 } 232 else 233 { 234 count = fCount; 235 if (count > 0) { 236 uprv_memcpy(result, fBytes, fCount); 237 } 238 } 239 240 return result; 241 } 242 #endif 243 244 static int32_t 245 computeHashCode(const uint8_t *key, int32_t length) { 246 const char *s = reinterpret_cast<const char *>(key); 247 int32_t hash; 248 if (s == NULL || length == 0) { 249 hash = kEmptyHashCode; 250 } else { 251 hash = ustr_hashCharsN(s, length); 252 if (hash == kInvalidHashCode || hash == kBogusHashCode) { 253 hash = kEmptyHashCode; 254 } 255 } 256 return hash; 257 } 258 259 int32_t 260 CollationKey::hashCode() const 261 { 262 // (Cribbed from UnicodeString) 263 // We cache the hashCode; when it becomes invalid, due to any change to the 264 // string, we note this by setting it to kInvalidHashCode. [LIU] 265 266 // Note: This method is semantically const, but physically non-const. 267 268 if (fHashCode == kInvalidHashCode) 269 { 270 fHashCode = computeHashCode(getBytes(), getLength()); 271 } 272 273 return fHashCode; 274 } 275 276 U_NAMESPACE_END 277 278 U_CAPI int32_t U_EXPORT2 279 ucol_keyHashCode(const uint8_t *key, 280 int32_t length) 281 { 282 return icu::computeHashCode(key, length); 283 } 284 285 #endif /* #if !UCONFIG_NO_COLLATION */ 286