Home | History | Annotate | Download | only in src
      1 // Copyright (c) 2005, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // ---
     31 // Author: Sanjay Ghemawat
     32 //
     33 // A fast map from addresses to values.  Assumes that addresses are
     34 // clustered.  The main use is intended to be for heap-profiling.
     35 // May be too memory-hungry for other uses.
     36 //
     37 // We use a user-defined allocator/de-allocator so that we can use
     38 // this data structure during heap-profiling.
     39 //
     40 // IMPLEMENTATION DETAIL:
     41 //
     42 // Some default definitions/parameters:
     43 //  * Block      -- aligned 128-byte region of the address space
     44 //  * Cluster    -- aligned 1-MB region of the address space
     45 //  * Block-ID   -- block-number within a cluster
     46 //  * Cluster-ID -- Starting address of cluster divided by cluster size
     47 //
     48 // We use a three-level map to represent the state:
     49 //  1. A hash-table maps from a cluster-ID to the data for that cluster.
     50 //  2. For each non-empty cluster we keep an array indexed by
     51 //     block-ID tht points to the first entry in the linked-list
     52 //     for the block.
     53 //  3. At the bottom, we keep a singly-linked list of all
     54 //     entries in a block (for non-empty blocks).
     55 //
     56 //    hash table
     57 //  +-------------+
     58 //  | id->cluster |---> ...
     59 //  |     ...     |
     60 //  | id->cluster |--->  Cluster
     61 //  +-------------+     +-------+    Data for one block
     62 //                      |  nil  |   +------------------------------------+
     63 //                      |   ----+---|->[addr/value]-->[addr/value]-->... |
     64 //                      |  nil  |   +------------------------------------+
     65 //                      |   ----+--> ...
     66 //                      |  nil  |
     67 //                      |  ...  |
     68 //                      +-------+
     69 //
     70 // Note that we require zero-bytes of overhead for completely empty
     71 // clusters.  The minimum space requirement for a cluster is the size
     72 // of the hash-table entry plus a pointer value for each block in
     73 // the cluster.  Empty blocks impose no extra space requirement.
     74 //
     75 // The cost of a lookup is:
     76 //      a. A hash-table lookup to find the cluster
     77 //      b. An array access in the cluster structure
     78 //      c. A traversal over the linked-list for a block
     79 
     80 #ifndef BASE_ADDRESSMAP_INL_H_
     81 #define BASE_ADDRESSMAP_INL_H_
     82 
     83 #include "config.h"
     84 #include <stddef.h>
     85 #include <string.h>
     86 #if defined HAVE_STDINT_H
     87 #include <stdint.h>             // to get uint16_t (ISO naming madness)
     88 #elif defined HAVE_INTTYPES_H
     89 #include <inttypes.h>           // another place uint16_t might be defined
     90 #else
     91 #include <sys/types.h>          // our last best hope
     92 #endif
     93 
     94 // This class is thread-unsafe -- that is, instances of this class can
     95 // not be accessed concurrently by multiple threads -- because the
     96 // callback function for Iterate() may mutate contained values. If the
     97 // callback functions you pass do not mutate their Value* argument,
     98 // AddressMap can be treated as thread-compatible -- that is, it's
     99 // safe for multiple threads to call "const" methods on this class,
    100 // but not safe for one thread to call const methods on this class
    101 // while another thread is calling non-const methods on the class.
    102 template <class Value>
    103 class AddressMap {
    104  public:
    105   typedef void* (*Allocator)(size_t size);
    106   typedef void  (*DeAllocator)(void* ptr);
    107   typedef const void* Key;
    108 
    109   // Create an AddressMap that uses the specified allocator/deallocator.
    110   // The allocator/deallocator should behave like malloc/free.
    111   // For instance, the allocator does not need to return initialized memory.
    112   AddressMap(Allocator alloc, DeAllocator dealloc);
    113   ~AddressMap();
    114 
    115   // If the map contains an entry for "key", return it. Else return NULL.
    116   inline const Value* Find(Key key) const;
    117   inline Value* FindMutable(Key key);
    118 
    119   // Insert <key,value> into the map.  Any old value associated
    120   // with key is forgotten.
    121   void Insert(Key key, Value value);
    122 
    123   // Remove any entry for key in the map.  If an entry was found
    124   // and removed, stores the associated value in "*removed_value"
    125   // and returns true.  Else returns false.
    126   bool FindAndRemove(Key key, Value* removed_value);
    127 
    128   // Similar to Find but we assume that keys are addresses of non-overlapping
    129   // memory ranges whose sizes are given by size_func.
    130   // If the map contains a range into which "key" points
    131   // (at its start or inside of it, but not at the end),
    132   // return the address of the associated value
    133   // and store its key in "*res_key".
    134   // Else return NULL.
    135   // max_size specifies largest range size possibly in existence now.
    136   typedef size_t (*ValueSizeFunc)(const Value& v);
    137   const Value* FindInside(ValueSizeFunc size_func, size_t max_size,
    138                           Key key, Key* res_key);
    139 
    140   // Iterate over the address map calling 'callback'
    141   // for all stored key-value pairs and passing 'arg' to it.
    142   // We don't use full Closure/Callback machinery not to add
    143   // unnecessary dependencies to this class with low-level uses.
    144   template<class Type>
    145   inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const;
    146 
    147  private:
    148   typedef uintptr_t Number;
    149 
    150   // The implementation assumes that addresses inserted into the map
    151   // will be clustered.  We take advantage of this fact by splitting
    152   // up the address-space into blocks and using a linked-list entry
    153   // for each block.
    154 
    155   // Size of each block.  There is one linked-list for each block, so
    156   // do not make the block-size too big.  Oterwise, a lot of time
    157   // will be spent traversing linked lists.
    158   static const int kBlockBits = 7;
    159   static const int kBlockSize = 1 << kBlockBits;
    160 
    161   // Entry kept in per-block linked-list
    162   struct Entry {
    163     Entry* next;
    164     Key    key;
    165     Value  value;
    166   };
    167 
    168   // We further group a sequence of consecutive blocks into a cluster.
    169   // The data for a cluster is represented as a dense array of
    170   // linked-lists, one list per contained block.
    171   static const int kClusterBits = 13;
    172   static const Number kClusterSize = 1 << (kBlockBits + kClusterBits);
    173   static const int kClusterBlocks = 1 << kClusterBits;
    174 
    175   // We use a simple chaining hash-table to represent the clusters.
    176   struct Cluster {
    177     Cluster* next;                      // Next cluster in hash table chain
    178     Number   id;                        // Cluster ID
    179     Entry*   blocks[kClusterBlocks];    // Per-block linked-lists
    180   };
    181 
    182   // Number of hash-table entries.  With the block-size/cluster-size
    183   // defined above, each cluster covers 1 MB, so an 4K entry
    184   // hash-table will give an average hash-chain length of 1 for 4GB of
    185   // in-use memory.
    186   static const int kHashBits = 12;
    187   static const int kHashSize = 1 << 12;
    188 
    189   // Number of entry objects allocated at a time
    190   static const int ALLOC_COUNT = 64;
    191 
    192   Cluster**     hashtable_;              // The hash-table
    193   Entry*        free_;                   // Free list of unused Entry objects
    194 
    195   // Multiplicative hash function:
    196   // The value "kHashMultiplier" is the bottom 32 bits of
    197   //    int((sqrt(5)-1)/2 * 2^32)
    198   // This is a good multiplier as suggested in CLR, Knuth.  The hash
    199   // value is taken to be the top "k" bits of the bottom 32 bits
    200   // of the muliplied value.
    201   static const uint32_t kHashMultiplier = 2654435769u;
    202   static int HashInt(Number x) {
    203     // Multiply by a constant and take the top bits of the result.
    204     const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier;
    205     return static_cast<int>(m >> (32 - kHashBits));
    206   }
    207 
    208   // Find cluster object for specified address.  If not found
    209   // and "create" is true, create the object.  If not found
    210   // and "create" is false, return NULL.
    211   //
    212   // This method is bitwise-const if create is false.
    213   Cluster* FindCluster(Number address, bool create) {
    214     // Look in hashtable
    215     const Number cluster_id = address >> (kBlockBits + kClusterBits);
    216     const int h = HashInt(cluster_id);
    217     for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
    218       if (c->id == cluster_id) {
    219         return c;
    220       }
    221     }
    222 
    223     // Create cluster if necessary
    224     if (create) {
    225       Cluster* c = New<Cluster>(1);
    226       c->id = cluster_id;
    227       c->next = hashtable_[h];
    228       hashtable_[h] = c;
    229       return c;
    230     }
    231     return NULL;
    232   }
    233 
    234   // Return the block ID for an address within its cluster
    235   static int BlockID(Number address) {
    236     return (address >> kBlockBits) & (kClusterBlocks - 1);
    237   }
    238 
    239   //--------------------------------------------------------------
    240   // Memory management -- we keep all objects we allocate linked
    241   // together in a singly linked list so we can get rid of them
    242   // when we are all done.  Furthermore, we allow the client to
    243   // pass in custom memory allocator/deallocator routines.
    244   //--------------------------------------------------------------
    245   struct Object {
    246     Object* next;
    247     // The real data starts here
    248   };
    249 
    250   Allocator     alloc_;                 // The allocator
    251   DeAllocator   dealloc_;               // The deallocator
    252   Object*       allocated_;             // List of allocated objects
    253 
    254   // Allocates a zeroed array of T with length "num".  Also inserts
    255   // the allocated block into a linked list so it can be deallocated
    256   // when we are all done.
    257   template <class T> T* New(int num) {
    258     void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T));
    259     memset(ptr, 0, sizeof(Object) + num*sizeof(T));
    260     Object* obj = reinterpret_cast<Object*>(ptr);
    261     obj->next = allocated_;
    262     allocated_ = obj;
    263     return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1);
    264   }
    265 };
    266 
    267 // More implementation details follow:
    268 
    269 template <class Value>
    270 AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc)
    271   : free_(NULL),
    272     alloc_(alloc),
    273     dealloc_(dealloc),
    274     allocated_(NULL) {
    275   hashtable_ = New<Cluster*>(kHashSize);
    276 }
    277 
    278 template <class Value>
    279 AddressMap<Value>::~AddressMap() {
    280   // De-allocate all of the objects we allocated
    281   for (Object* obj = allocated_; obj != NULL; /**/) {
    282     Object* next = obj->next;
    283     (*dealloc_)(obj);
    284     obj = next;
    285   }
    286 }
    287 
    288 template <class Value>
    289 inline const Value* AddressMap<Value>::Find(Key key) const {
    290   return const_cast<AddressMap*>(this)->FindMutable(key);
    291 }
    292 
    293 template <class Value>
    294 inline Value* AddressMap<Value>::FindMutable(Key key) {
    295   const Number num = reinterpret_cast<Number>(key);
    296   const Cluster* const c = FindCluster(num, false/*do not create*/);
    297   if (c != NULL) {
    298     for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) {
    299       if (e->key == key) {
    300         return &e->value;
    301       }
    302     }
    303   }
    304   return NULL;
    305 }
    306 
    307 template <class Value>
    308 void AddressMap<Value>::Insert(Key key, Value value) {
    309   const Number num = reinterpret_cast<Number>(key);
    310   Cluster* const c = FindCluster(num, true/*create*/);
    311 
    312   // Look in linked-list for this block
    313   const int block = BlockID(num);
    314   for (Entry* e = c->blocks[block]; e != NULL; e = e->next) {
    315     if (e->key == key) {
    316       e->value = value;
    317       return;
    318     }
    319   }
    320 
    321   // Create entry
    322   if (free_ == NULL) {
    323     // Allocate a new batch of entries and add to free-list
    324     Entry* array = New<Entry>(ALLOC_COUNT);
    325     for (int i = 0; i < ALLOC_COUNT-1; i++) {
    326       array[i].next = &array[i+1];
    327     }
    328     array[ALLOC_COUNT-1].next = free_;
    329     free_ = &array[0];
    330   }
    331   Entry* e = free_;
    332   free_ = e->next;
    333   e->key = key;
    334   e->value = value;
    335   e->next = c->blocks[block];
    336   c->blocks[block] = e;
    337 }
    338 
    339 template <class Value>
    340 bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) {
    341   const Number num = reinterpret_cast<Number>(key);
    342   Cluster* const c = FindCluster(num, false/*do not create*/);
    343   if (c != NULL) {
    344     for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) {
    345       Entry* e = *p;
    346       if (e->key == key) {
    347         *removed_value = e->value;
    348         *p = e->next;         // Remove e from linked-list
    349         e->next = free_;      // Add e to free-list
    350         free_ = e;
    351         return true;
    352       }
    353     }
    354   }
    355   return false;
    356 }
    357 
    358 template <class Value>
    359 const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func,
    360                                            size_t max_size,
    361                                            Key key,
    362                                            Key* res_key) {
    363   const Number key_num = reinterpret_cast<Number>(key);
    364   Number num = key_num;  // we'll move this to move back through the clusters
    365   while (1) {
    366     const Cluster* c = FindCluster(num, false/*do not create*/);
    367     if (c != NULL) {
    368       while (1) {
    369         const int block = BlockID(num);
    370         bool had_smaller_key = false;
    371         for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) {
    372           const Number e_num = reinterpret_cast<Number>(e->key);
    373           if (e_num <= key_num) {
    374             if (e_num == key_num  ||  // to handle 0-sized ranges
    375                 key_num < e_num + (*size_func)(e->value)) {
    376               *res_key = e->key;
    377               return &e->value;
    378             }
    379             had_smaller_key = true;
    380           }
    381         }
    382         if (had_smaller_key) return NULL;  // got a range before 'key'
    383                                            // and it did not contain 'key'
    384         if (block == 0) break;
    385         // try address-wise previous block
    386         num |= kBlockSize - 1;  // start at the last addr of prev block
    387         num -= kBlockSize;
    388         if (key_num - num > max_size) return NULL;
    389       }
    390     }
    391     if (num < kClusterSize) return NULL;  // first cluster
    392     // go to address-wise previous cluster to try
    393     num |= kClusterSize - 1;  // start at the last block of previous cluster
    394     num -= kClusterSize;
    395     if (key_num - num > max_size) return NULL;
    396       // Having max_size to limit the search is crucial: else
    397       // we have to traverse a lot of empty clusters (or blocks).
    398       // We can avoid needing max_size if we put clusters into
    399       // a search tree, but performance suffers considerably
    400       // if we use this approach by using stl::set.
    401   }
    402 }
    403 
    404 template <class Value>
    405 template <class Type>
    406 inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type),
    407                                        Type arg) const {
    408   // We could optimize this by traversing only non-empty clusters and/or blocks
    409   // but it does not speed up heap-checker noticeably.
    410   for (int h = 0; h < kHashSize; ++h) {
    411     for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) {
    412       for (int b = 0; b < kClusterBlocks; ++b) {
    413         for (Entry* e = c->blocks[b]; e != NULL; e = e->next) {
    414           callback(e->key, &e->value, arg);
    415         }
    416       }
    417     }
    418   }
    419 }
    420 
    421 #endif  // BASE_ADDRESSMAP_INL_H_
    422