1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_BASIC_HASHTABLE_H 18 #define ANDROID_BASIC_HASHTABLE_H 19 20 #include <stdint.h> 21 #include <sys/types.h> 22 #include <utils/SharedBuffer.h> 23 #include <utils/TypeHelpers.h> 24 25 namespace android { 26 27 /* Implementation type. Nothing to see here. */ 28 class BasicHashtableImpl { 29 protected: 30 struct Bucket { 31 // The collision flag indicates that the bucket is part of a collision chain 32 // such that at least two entries both hash to this bucket. When true, we 33 // may need to seek further along the chain to find the entry. 34 static const uint32_t COLLISION = 0x80000000UL; 35 36 // The present flag indicates that the bucket contains an initialized entry value. 37 static const uint32_t PRESENT = 0x40000000UL; 38 39 // Mask for 30 bits worth of the hash code that are stored within the bucket to 40 // speed up lookups and rehashing by eliminating the need to recalculate the 41 // hash code of the entry's key. 42 static const uint32_t HASH_MASK = 0x3fffffffUL; 43 44 // Combined value that stores the collision and present flags as well as 45 // a 30 bit hash code. 46 uint32_t cookie; 47 48 // Storage for the entry begins here. 49 char entry[0]; 50 }; 51 52 BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor, 53 size_t minimumInitialCapacity, float loadFactor); 54 BasicHashtableImpl(const BasicHashtableImpl& other); 55 virtual ~BasicHashtableImpl(); 56 57 void dispose(); 58 59 inline void edit() { 60 if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) { 61 clone(); 62 } 63 } 64 65 void setTo(const BasicHashtableImpl& other); 66 void clear(); 67 68 ssize_t next(ssize_t index) const; 69 ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const; 70 size_t add(hash_t hash, const void* __restrict__ entry); 71 void removeAt(size_t index); 72 void rehash(size_t minimumCapacity, float loadFactor); 73 74 const size_t mBucketSize; // number of bytes per bucket including the entry 75 const bool mHasTrivialDestructor; // true if the entry type does not require destruction 76 size_t mCapacity; // number of buckets that can be filled before exceeding load factor 77 float mLoadFactor; // load factor 78 size_t mSize; // number of elements actually in the table 79 size_t mFilledBuckets; // number of buckets for which collision or present is true 80 size_t mBucketCount; // number of slots in the mBuckets array 81 void* mBuckets; // array of buckets, as a SharedBuffer 82 83 inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const { 84 return *reinterpret_cast<const Bucket*>( 85 static_cast<const uint8_t*>(buckets) + index * mBucketSize); 86 } 87 88 inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const { 89 return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize); 90 } 91 92 virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0; 93 virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0; 94 virtual void destroyBucketEntry(Bucket& bucket) const = 0; 95 96 private: 97 void clone(); 98 99 // Allocates a bucket array as a SharedBuffer. 100 void* allocateBuckets(size_t count) const; 101 102 // Releases a bucket array's associated SharedBuffer. 103 void releaseBuckets(void* __restrict__ buckets, size_t count) const; 104 105 // Destroys the contents of buckets (invokes destroyBucketEntry for each 106 // populated bucket if needed). 107 void destroyBuckets(void* __restrict__ buckets, size_t count) const; 108 109 // Copies the content of buckets (copies the cookie and invokes copyBucketEntry 110 // for each populated bucket if needed). 111 void copyBuckets(const void* __restrict__ fromBuckets, 112 void* __restrict__ toBuckets, size_t count) const; 113 114 // Determines the appropriate size of a bucket array to store a certain minimum 115 // number of entries and returns its effective capacity. 116 static void determineCapacity(size_t minimumCapacity, float loadFactor, 117 size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity); 118 119 // Trim a hash code to 30 bits to match what we store in the bucket's cookie. 120 inline static hash_t trimHash(hash_t hash) { 121 return (hash & Bucket::HASH_MASK) ^ (hash >> 30); 122 } 123 124 // Returns the index of the first bucket that is in the collision chain 125 // for the specified hash code, given the total number of buckets. 126 // (Primary hash) 127 inline static size_t chainStart(hash_t hash, size_t count) { 128 return hash % count; 129 } 130 131 // Returns the increment to add to a bucket index to seek to the next bucket 132 // in the collision chain for the specified hash code, given the total number of buckets. 133 // (Secondary hash) 134 inline static size_t chainIncrement(hash_t hash, size_t count) { 135 return ((hash >> 7) | (hash << 25)) % (count - 1) + 1; 136 } 137 138 // Returns the index of the next bucket that is in the collision chain 139 // that is defined by the specified increment, given the total number of buckets. 140 inline static size_t chainSeek(size_t index, size_t increment, size_t count) { 141 return (index + increment) % count; 142 } 143 }; 144 145 /* 146 * A BasicHashtable stores entries that are indexed by hash code in place 147 * within an array. The basic operations are finding entries by key, 148 * adding new entries and removing existing entries. 149 * 150 * This class provides a very limited set of operations with simple semantics. 151 * It is intended to be used as a building block to construct more complex 152 * and interesting data structures such as HashMap. Think very hard before 153 * adding anything extra to BasicHashtable, it probably belongs at a 154 * higher level of abstraction. 155 * 156 * TKey: The key type. 157 * TEntry: The entry type which is what is actually stored in the array. 158 * 159 * TKey must support the following contract: 160 * bool operator==(const TKey& other) const; // return true if equal 161 * bool operator!=(const TKey& other) const; // return true if unequal 162 * 163 * TEntry must support the following contract: 164 * const TKey& getKey() const; // get the key from the entry 165 * 166 * This class supports storing entries with duplicate keys. Of course, it can't 167 * tell them apart during removal so only the first entry will be removed. 168 * We do this because it means that operations like add() can't fail. 169 */ 170 template <typename TKey, typename TEntry> 171 class BasicHashtable : private BasicHashtableImpl { 172 public: 173 /* Creates a hashtable with the specified minimum initial capacity. 174 * The underlying array will be created when the first entry is added. 175 * 176 * minimumInitialCapacity: The minimum initial capacity for the hashtable. 177 * Default is 0. 178 * loadFactor: The desired load factor for the hashtable, between 0 and 1. 179 * Default is 0.75. 180 */ 181 BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f); 182 183 /* Copies a hashtable. 184 * The underlying storage is shared copy-on-write. 185 */ 186 BasicHashtable(const BasicHashtable& other); 187 188 /* Clears and destroys the hashtable. 189 */ 190 virtual ~BasicHashtable(); 191 192 /* Making this hashtable a copy of the other hashtable. 193 * The underlying storage is shared copy-on-write. 194 * 195 * other: The hashtable to copy. 196 */ 197 inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) { 198 setTo(other); 199 return *this; 200 } 201 202 /* Returns the number of entries in the hashtable. 203 */ 204 inline size_t size() const { 205 return mSize; 206 } 207 208 /* Returns the capacity of the hashtable, which is the number of elements that can 209 * added to the hashtable without requiring it to be grown. 210 */ 211 inline size_t capacity() const { 212 return mCapacity; 213 } 214 215 /* Returns the number of buckets that the hashtable has, which is the size of its 216 * underlying array. 217 */ 218 inline size_t bucketCount() const { 219 return mBucketCount; 220 } 221 222 /* Returns the load factor of the hashtable. */ 223 inline float loadFactor() const { 224 return mLoadFactor; 225 }; 226 227 /* Returns a const reference to the entry at the specified index. 228 * 229 * index: The index of the entry to retrieve. Must be a valid index within 230 * the bounds of the hashtable. 231 */ 232 inline const TEntry& entryAt(size_t index) const { 233 return entryFor(bucketAt(mBuckets, index)); 234 } 235 236 /* Returns a non-const reference to the entry at the specified index. 237 * 238 * index: The index of the entry to edit. Must be a valid index within 239 * the bounds of the hashtable. 240 */ 241 inline TEntry& editEntryAt(size_t index) { 242 edit(); 243 return entryFor(bucketAt(mBuckets, index)); 244 } 245 246 /* Clears the hashtable. 247 * All entries in the hashtable are destroyed immediately. 248 * If you need to do something special with the entries in the hashtable then iterate 249 * over them and do what you need before clearing the hashtable. 250 */ 251 inline void clear() { 252 BasicHashtableImpl::clear(); 253 } 254 255 /* Returns the index of the next entry in the hashtable given the index of a previous entry. 256 * If the given index is -1, then returns the index of the first entry in the hashtable, 257 * if there is one, or -1 otherwise. 258 * If the given index is not -1, then returns the index of the next entry in the hashtable, 259 * in strictly increasing order, or -1 if there are none left. 260 * 261 * index: The index of the previous entry that was iterated, or -1 to begin 262 * iteration at the beginning of the hashtable. 263 */ 264 inline ssize_t next(ssize_t index) const { 265 return BasicHashtableImpl::next(index); 266 } 267 268 /* Finds the index of an entry with the specified key. 269 * If the given index is -1, then returns the index of the first matching entry, 270 * otherwise returns the index of the next matching entry. 271 * If the hashtable contains multiple entries with keys that match the requested 272 * key, then the sequence of entries returned is arbitrary. 273 * Returns -1 if no entry was found. 274 * 275 * index: The index of the previous entry with the specified key, or -1 to 276 * find the first matching entry. 277 * hash: The hashcode of the key. 278 * key: The key. 279 */ 280 inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const { 281 return BasicHashtableImpl::find(index, hash, &key); 282 } 283 284 /* Adds the entry to the hashtable. 285 * Returns the index of the newly added entry. 286 * If an entry with the same key already exists, then a duplicate entry is added. 287 * If the entry will not fit, then the hashtable's capacity is increased and 288 * its contents are rehashed. See rehash(). 289 * 290 * hash: The hashcode of the key. 291 * entry: The entry to add. 292 */ 293 inline size_t add(hash_t hash, const TEntry& entry) { 294 return BasicHashtableImpl::add(hash, &entry); 295 } 296 297 /* Removes the entry with the specified index from the hashtable. 298 * The entry is destroyed immediately. 299 * The index must be valid. 300 * 301 * The hashtable is not compacted after an item is removed, so it is legal 302 * to continue iterating over the hashtable using next() or find(). 303 * 304 * index: The index of the entry to remove. Must be a valid index within the 305 * bounds of the hashtable, and it must refer to an existing entry. 306 */ 307 inline void removeAt(size_t index) { 308 BasicHashtableImpl::removeAt(index); 309 } 310 311 /* Rehashes the contents of the hashtable. 312 * Grows the hashtable to at least the specified minimum capacity or the 313 * current number of elements, whichever is larger. 314 * 315 * Rehashing causes all entries to be copied and the entry indices may change. 316 * Although the hash codes are cached by the hashtable, rehashing can be an 317 * expensive operation and should be avoided unless the hashtable's size 318 * needs to be changed. 319 * 320 * Rehashing is the only way to change the capacity or load factor of the 321 * hashtable once it has been created. It can be used to compact the 322 * hashtable by choosing a minimum capacity that is smaller than the current 323 * capacity (such as 0). 324 * 325 * minimumCapacity: The desired minimum capacity after rehashing. 326 * loadFactor: The desired load factor after rehashing. 327 */ 328 inline void rehash(size_t minimumCapacity, float loadFactor) { 329 BasicHashtableImpl::rehash(minimumCapacity, loadFactor); 330 } 331 332 /* Determines whether there is room to add another entry without rehashing. 333 * When this returns true, a subsequent add() operation is guaranteed to 334 * complete without performing a rehash. 335 */ 336 inline bool hasMoreRoom() const { 337 return mCapacity > mFilledBuckets; 338 } 339 340 protected: 341 static inline const TEntry& entryFor(const Bucket& bucket) { 342 return reinterpret_cast<const TEntry&>(bucket.entry); 343 } 344 345 static inline TEntry& entryFor(Bucket& bucket) { 346 return reinterpret_cast<TEntry&>(bucket.entry); 347 } 348 349 virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const; 350 virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const; 351 virtual void destroyBucketEntry(Bucket& bucket) const; 352 353 private: 354 // For dumping the raw contents of a hashtable during testing. 355 friend class BasicHashtableTest; 356 inline uint32_t cookieAt(size_t index) const { 357 return bucketAt(mBuckets, index).cookie; 358 } 359 }; 360 361 template <typename TKey, typename TEntry> 362 BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) : 363 BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor, 364 minimumInitialCapacity, loadFactor) { 365 } 366 367 template <typename TKey, typename TEntry> 368 BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) : 369 BasicHashtableImpl(other) { 370 } 371 372 template <typename TKey, typename TEntry> 373 BasicHashtable<TKey, TEntry>::~BasicHashtable() { 374 dispose(); 375 } 376 377 template <typename TKey, typename TEntry> 378 bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket, 379 const void* __restrict__ key) const { 380 return entryFor(bucket).getKey() == *static_cast<const TKey*>(key); 381 } 382 383 template <typename TKey, typename TEntry> 384 void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket, 385 const void* __restrict__ entry) const { 386 if (!traits<TEntry>::has_trivial_copy) { 387 new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry))); 388 } else { 389 memcpy(&entryFor(bucket), entry, sizeof(TEntry)); 390 } 391 } 392 393 template <typename TKey, typename TEntry> 394 void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const { 395 if (!traits<TEntry>::has_trivial_dtor) { 396 entryFor(bucket).~TEntry(); 397 } 398 } 399 400 }; // namespace android 401 402 #endif // ANDROID_BASIC_HASHTABLE_H 403