Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_BASIC_HASHTABLE_H
     18 #define ANDROID_BASIC_HASHTABLE_H
     19 
     20 #include <stdint.h>
     21 #include <sys/types.h>
     22 #include <utils/SharedBuffer.h>
     23 #include <utils/TypeHelpers.h>
     24 
     25 namespace android {
     26 
     27 /* Implementation type.  Nothing to see here. */
     28 class BasicHashtableImpl {
     29 protected:
     30     struct Bucket {
     31         // The collision flag indicates that the bucket is part of a collision chain
     32         // such that at least two entries both hash to this bucket.  When true, we
     33         // may need to seek further along the chain to find the entry.
     34         static const uint32_t COLLISION = 0x80000000UL;
     35 
     36         // The present flag indicates that the bucket contains an initialized entry value.
     37         static const uint32_t PRESENT   = 0x40000000UL;
     38 
     39         // Mask for 30 bits worth of the hash code that are stored within the bucket to
     40         // speed up lookups and rehashing by eliminating the need to recalculate the
     41         // hash code of the entry's key.
     42         static const uint32_t HASH_MASK = 0x3fffffffUL;
     43 
     44         // Combined value that stores the collision and present flags as well as
     45         // a 30 bit hash code.
     46         uint32_t cookie;
     47 
     48         // Storage for the entry begins here.
     49         char entry[0];
     50     };
     51 
     52     BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor,
     53             size_t minimumInitialCapacity, float loadFactor);
     54     BasicHashtableImpl(const BasicHashtableImpl& other);
     55     virtual ~BasicHashtableImpl();
     56 
     57     void dispose();
     58 
     59     inline void edit() {
     60         if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) {
     61             clone();
     62         }
     63     }
     64 
     65     void setTo(const BasicHashtableImpl& other);
     66     void clear();
     67 
     68     ssize_t next(ssize_t index) const;
     69     ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const;
     70     size_t add(hash_t hash, const void* __restrict__ entry);
     71     void removeAt(size_t index);
     72     void rehash(size_t minimumCapacity, float loadFactor);
     73 
     74     const size_t mBucketSize; // number of bytes per bucket including the entry
     75     const bool mHasTrivialDestructor; // true if the entry type does not require destruction
     76     size_t mCapacity;         // number of buckets that can be filled before exceeding load factor
     77     float mLoadFactor;        // load factor
     78     size_t mSize;             // number of elements actually in the table
     79     size_t mFilledBuckets;    // number of buckets for which collision or present is true
     80     size_t mBucketCount;      // number of slots in the mBuckets array
     81     void* mBuckets;           // array of buckets, as a SharedBuffer
     82 
     83     inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const {
     84         return *reinterpret_cast<const Bucket*>(
     85                 static_cast<const uint8_t*>(buckets) + index * mBucketSize);
     86     }
     87 
     88     inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const {
     89         return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize);
     90     }
     91 
     92     virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0;
     93     virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0;
     94     virtual void destroyBucketEntry(Bucket& bucket) const = 0;
     95 
     96 private:
     97     void clone();
     98 
     99     // Allocates a bucket array as a SharedBuffer.
    100     void* allocateBuckets(size_t count) const;
    101 
    102     // Releases a bucket array's associated SharedBuffer.
    103     void releaseBuckets(void* __restrict__ buckets, size_t count) const;
    104 
    105     // Destroys the contents of buckets (invokes destroyBucketEntry for each
    106     // populated bucket if needed).
    107     void destroyBuckets(void* __restrict__ buckets, size_t count) const;
    108 
    109     // Copies the content of buckets (copies the cookie and invokes copyBucketEntry
    110     // for each populated bucket if needed).
    111     void copyBuckets(const void* __restrict__ fromBuckets,
    112             void* __restrict__ toBuckets, size_t count) const;
    113 
    114     // Determines the appropriate size of a bucket array to store a certain minimum
    115     // number of entries and returns its effective capacity.
    116     static void determineCapacity(size_t minimumCapacity, float loadFactor,
    117             size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity);
    118 
    119     // Trim a hash code to 30 bits to match what we store in the bucket's cookie.
    120     inline static hash_t trimHash(hash_t hash) {
    121         return (hash & Bucket::HASH_MASK) ^ (hash >> 30);
    122     }
    123 
    124     // Returns the index of the first bucket that is in the collision chain
    125     // for the specified hash code, given the total number of buckets.
    126     // (Primary hash)
    127     inline static size_t chainStart(hash_t hash, size_t count) {
    128         return hash % count;
    129     }
    130 
    131     // Returns the increment to add to a bucket index to seek to the next bucket
    132     // in the collision chain for the specified hash code, given the total number of buckets.
    133     // (Secondary hash)
    134     inline static size_t chainIncrement(hash_t hash, size_t count) {
    135         return ((hash >> 7) | (hash << 25)) % (count - 1) + 1;
    136     }
    137 
    138     // Returns the index of the next bucket that is in the collision chain
    139     // that is defined by the specified increment, given the total number of buckets.
    140     inline static size_t chainSeek(size_t index, size_t increment, size_t count) {
    141         return (index + increment) % count;
    142     }
    143 };
    144 
    145 /*
    146  * A BasicHashtable stores entries that are indexed by hash code in place
    147  * within an array.  The basic operations are finding entries by key,
    148  * adding new entries and removing existing entries.
    149  *
    150  * This class provides a very limited set of operations with simple semantics.
    151  * It is intended to be used as a building block to construct more complex
    152  * and interesting data structures such as HashMap.  Think very hard before
    153  * adding anything extra to BasicHashtable, it probably belongs at a
    154  * higher level of abstraction.
    155  *
    156  * TKey: The key type.
    157  * TEntry: The entry type which is what is actually stored in the array.
    158  *
    159  * TKey must support the following contract:
    160  *     bool operator==(const TKey& other) const;  // return true if equal
    161  *     bool operator!=(const TKey& other) const;  // return true if unequal
    162  *
    163  * TEntry must support the following contract:
    164  *     const TKey& getKey() const;  // get the key from the entry
    165  *
    166  * This class supports storing entries with duplicate keys.  Of course, it can't
    167  * tell them apart during removal so only the first entry will be removed.
    168  * We do this because it means that operations like add() can't fail.
    169  */
    170 template <typename TKey, typename TEntry>
    171 class BasicHashtable : private BasicHashtableImpl {
    172 public:
    173     /* Creates a hashtable with the specified minimum initial capacity.
    174      * The underlying array will be created when the first entry is added.
    175      *
    176      * minimumInitialCapacity: The minimum initial capacity for the hashtable.
    177      *     Default is 0.
    178      * loadFactor: The desired load factor for the hashtable, between 0 and 1.
    179      *     Default is 0.75.
    180      */
    181     BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f);
    182 
    183     /* Copies a hashtable.
    184      * The underlying storage is shared copy-on-write.
    185      */
    186     BasicHashtable(const BasicHashtable& other);
    187 
    188     /* Clears and destroys the hashtable.
    189      */
    190     virtual ~BasicHashtable();
    191 
    192     /* Making this hashtable a copy of the other hashtable.
    193      * The underlying storage is shared copy-on-write.
    194      *
    195      * other: The hashtable to copy.
    196      */
    197     inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) {
    198         setTo(other);
    199         return *this;
    200     }
    201 
    202     /* Returns the number of entries in the hashtable.
    203      */
    204     inline size_t size() const {
    205         return mSize;
    206     }
    207 
    208     /* Returns the capacity of the hashtable, which is the number of elements that can
    209      * added to the hashtable without requiring it to be grown.
    210      */
    211     inline size_t capacity() const {
    212         return mCapacity;
    213     }
    214 
    215     /* Returns the number of buckets that the hashtable has, which is the size of its
    216      * underlying array.
    217      */
    218     inline size_t bucketCount() const {
    219         return mBucketCount;
    220     }
    221 
    222     /* Returns the load factor of the hashtable. */
    223     inline float loadFactor() const {
    224         return mLoadFactor;
    225     };
    226 
    227     /* Returns a const reference to the entry at the specified index.
    228      *
    229      * index:   The index of the entry to retrieve.  Must be a valid index within
    230      *          the bounds of the hashtable.
    231      */
    232     inline const TEntry& entryAt(size_t index) const {
    233         return entryFor(bucketAt(mBuckets, index));
    234     }
    235 
    236     /* Returns a non-const reference to the entry at the specified index.
    237      *
    238      * index: The index of the entry to edit.  Must be a valid index within
    239      *        the bounds of the hashtable.
    240      */
    241     inline TEntry& editEntryAt(size_t index) {
    242         edit();
    243         return entryFor(bucketAt(mBuckets, index));
    244     }
    245 
    246     /* Clears the hashtable.
    247      * All entries in the hashtable are destroyed immediately.
    248      * If you need to do something special with the entries in the hashtable then iterate
    249      * over them and do what you need before clearing the hashtable.
    250      */
    251     inline void clear() {
    252         BasicHashtableImpl::clear();
    253     }
    254 
    255     /* Returns the index of the next entry in the hashtable given the index of a previous entry.
    256      * If the given index is -1, then returns the index of the first entry in the hashtable,
    257      * if there is one, or -1 otherwise.
    258      * If the given index is not -1, then returns the index of the next entry in the hashtable,
    259      * in strictly increasing order, or -1 if there are none left.
    260      *
    261      * index:   The index of the previous entry that was iterated, or -1 to begin
    262      *          iteration at the beginning of the hashtable.
    263      */
    264     inline ssize_t next(ssize_t index) const {
    265         return BasicHashtableImpl::next(index);
    266     }
    267 
    268     /* Finds the index of an entry with the specified key.
    269      * If the given index is -1, then returns the index of the first matching entry,
    270      * otherwise returns the index of the next matching entry.
    271      * If the hashtable contains multiple entries with keys that match the requested
    272      * key, then the sequence of entries returned is arbitrary.
    273      * Returns -1 if no entry was found.
    274      *
    275      * index:   The index of the previous entry with the specified key, or -1 to
    276      *          find the first matching entry.
    277      * hash:    The hashcode of the key.
    278      * key:     The key.
    279      */
    280     inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const {
    281         return BasicHashtableImpl::find(index, hash, &key);
    282     }
    283 
    284     /* Adds the entry to the hashtable.
    285      * Returns the index of the newly added entry.
    286      * If an entry with the same key already exists, then a duplicate entry is added.
    287      * If the entry will not fit, then the hashtable's capacity is increased and
    288      * its contents are rehashed.  See rehash().
    289      *
    290      * hash:    The hashcode of the key.
    291      * entry:   The entry to add.
    292      */
    293     inline size_t add(hash_t hash, const TEntry& entry) {
    294         return BasicHashtableImpl::add(hash, &entry);
    295     }
    296 
    297     /* Removes the entry with the specified index from the hashtable.
    298      * The entry is destroyed immediately.
    299      * The index must be valid.
    300      *
    301      * The hashtable is not compacted after an item is removed, so it is legal
    302      * to continue iterating over the hashtable using next() or find().
    303      *
    304      * index:   The index of the entry to remove.  Must be a valid index within the
    305      *          bounds of the hashtable, and it must refer to an existing entry.
    306      */
    307     inline void removeAt(size_t index) {
    308         BasicHashtableImpl::removeAt(index);
    309     }
    310 
    311     /* Rehashes the contents of the hashtable.
    312      * Grows the hashtable to at least the specified minimum capacity or the
    313      * current number of elements, whichever is larger.
    314      *
    315      * Rehashing causes all entries to be copied and the entry indices may change.
    316      * Although the hash codes are cached by the hashtable, rehashing can be an
    317      * expensive operation and should be avoided unless the hashtable's size
    318      * needs to be changed.
    319      *
    320      * Rehashing is the only way to change the capacity or load factor of the
    321      * hashtable once it has been created.  It can be used to compact the
    322      * hashtable by choosing a minimum capacity that is smaller than the current
    323      * capacity (such as 0).
    324      *
    325      * minimumCapacity: The desired minimum capacity after rehashing.
    326      * loadFactor: The desired load factor after rehashing.
    327      */
    328     inline void rehash(size_t minimumCapacity, float loadFactor) {
    329         BasicHashtableImpl::rehash(minimumCapacity, loadFactor);
    330     }
    331 
    332     /* Determines whether there is room to add another entry without rehashing.
    333      * When this returns true, a subsequent add() operation is guaranteed to
    334      * complete without performing a rehash.
    335      */
    336     inline bool hasMoreRoom() const {
    337         return mCapacity > mFilledBuckets;
    338     }
    339 
    340 protected:
    341     static inline const TEntry& entryFor(const Bucket& bucket) {
    342         return reinterpret_cast<const TEntry&>(bucket.entry);
    343     }
    344 
    345     static inline TEntry& entryFor(Bucket& bucket) {
    346         return reinterpret_cast<TEntry&>(bucket.entry);
    347     }
    348 
    349     virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const;
    350     virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const;
    351     virtual void destroyBucketEntry(Bucket& bucket) const;
    352 
    353 private:
    354     // For dumping the raw contents of a hashtable during testing.
    355     friend class BasicHashtableTest;
    356     inline uint32_t cookieAt(size_t index) const {
    357         return bucketAt(mBuckets, index).cookie;
    358     }
    359 };
    360 
    361 template <typename TKey, typename TEntry>
    362 BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) :
    363         BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor,
    364                 minimumInitialCapacity, loadFactor) {
    365 }
    366 
    367 template <typename TKey, typename TEntry>
    368 BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) :
    369         BasicHashtableImpl(other) {
    370 }
    371 
    372 template <typename TKey, typename TEntry>
    373 BasicHashtable<TKey, TEntry>::~BasicHashtable() {
    374     dispose();
    375 }
    376 
    377 template <typename TKey, typename TEntry>
    378 bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket,
    379         const void* __restrict__ key) const {
    380     return entryFor(bucket).getKey() == *static_cast<const TKey*>(key);
    381 }
    382 
    383 template <typename TKey, typename TEntry>
    384 void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket,
    385         const void* __restrict__ entry) const {
    386     if (!traits<TEntry>::has_trivial_copy) {
    387         new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry)));
    388     } else {
    389         memcpy(&entryFor(bucket), entry, sizeof(TEntry));
    390     }
    391 }
    392 
    393 template <typename TKey, typename TEntry>
    394 void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const {
    395     if (!traits<TEntry>::has_trivial_dtor) {
    396         entryFor(bucket).~TEntry();
    397     }
    398 }
    399 
    400 }; // namespace android
    401 
    402 #endif // ANDROID_BASIC_HASHTABLE_H
    403