1 // Copyright (c) 2005, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // --- 31 // Author: Sanjay Ghemawat 32 // 33 // A fast map from addresses to values. Assumes that addresses are 34 // clustered. The main use is intended to be for heap-profiling. 35 // May be too memory-hungry for other uses. 36 // 37 // We use a user-defined allocator/de-allocator so that we can use 38 // this data structure during heap-profiling. 39 // 40 // IMPLEMENTATION DETAIL: 41 // 42 // Some default definitions/parameters: 43 // * Block -- aligned 128-byte region of the address space 44 // * Cluster -- aligned 1-MB region of the address space 45 // * Block-ID -- block-number within a cluster 46 // * Cluster-ID -- Starting address of cluster divided by cluster size 47 // 48 // We use a three-level map to represent the state: 49 // 1. A hash-table maps from a cluster-ID to the data for that cluster. 50 // 2. For each non-empty cluster we keep an array indexed by 51 // block-ID tht points to the first entry in the linked-list 52 // for the block. 53 // 3. At the bottom, we keep a singly-linked list of all 54 // entries in a block (for non-empty blocks). 55 // 56 // hash table 57 // +-------------+ 58 // | id->cluster |---> ... 59 // | ... | 60 // | id->cluster |---> Cluster 61 // +-------------+ +-------+ Data for one block 62 // | nil | +------------------------------------+ 63 // | ----+---|->[addr/value]-->[addr/value]-->... | 64 // | nil | +------------------------------------+ 65 // | ----+--> ... 66 // | nil | 67 // | ... | 68 // +-------+ 69 // 70 // Note that we require zero-bytes of overhead for completely empty 71 // clusters. The minimum space requirement for a cluster is the size 72 // of the hash-table entry plus a pointer value for each block in 73 // the cluster. Empty blocks impose no extra space requirement. 74 // 75 // The cost of a lookup is: 76 // a. A hash-table lookup to find the cluster 77 // b. An array access in the cluster structure 78 // c. A traversal over the linked-list for a block 79 80 #ifndef BASE_ADDRESSMAP_INL_H_ 81 #define BASE_ADDRESSMAP_INL_H_ 82 83 #include "config.h" 84 #include <stddef.h> 85 #include <string.h> 86 #if defined HAVE_STDINT_H 87 #include <stdint.h> // to get uint16_t (ISO naming madness) 88 #elif defined HAVE_INTTYPES_H 89 #include <inttypes.h> // another place uint16_t might be defined 90 #else 91 #include <sys/types.h> // our last best hope 92 #endif 93 94 // This class is thread-unsafe -- that is, instances of this class can 95 // not be accessed concurrently by multiple threads -- because the 96 // callback function for Iterate() may mutate contained values. If the 97 // callback functions you pass do not mutate their Value* argument, 98 // AddressMap can be treated as thread-compatible -- that is, it's 99 // safe for multiple threads to call "const" methods on this class, 100 // but not safe for one thread to call const methods on this class 101 // while another thread is calling non-const methods on the class. 102 template <class Value> 103 class AddressMap { 104 public: 105 typedef void* (*Allocator)(size_t size); 106 typedef void (*DeAllocator)(void* ptr); 107 typedef const void* Key; 108 109 // Create an AddressMap that uses the specified allocator/deallocator. 110 // The allocator/deallocator should behave like malloc/free. 111 // For instance, the allocator does not need to return initialized memory. 112 AddressMap(Allocator alloc, DeAllocator dealloc); 113 ~AddressMap(); 114 115 // If the map contains an entry for "key", return it. Else return NULL. 116 inline const Value* Find(Key key) const; 117 inline Value* FindMutable(Key key); 118 119 // Insert <key,value> into the map. Any old value associated 120 // with key is forgotten. 121 void Insert(Key key, Value value); 122 123 // Remove any entry for key in the map. If an entry was found 124 // and removed, stores the associated value in "*removed_value" 125 // and returns true. Else returns false. 126 bool FindAndRemove(Key key, Value* removed_value); 127 128 // Similar to Find but we assume that keys are addresses of non-overlapping 129 // memory ranges whose sizes are given by size_func. 130 // If the map contains a range into which "key" points 131 // (at its start or inside of it, but not at the end), 132 // return the address of the associated value 133 // and store its key in "*res_key". 134 // Else return NULL. 135 // max_size specifies largest range size possibly in existence now. 136 typedef size_t (*ValueSizeFunc)(const Value& v); 137 const Value* FindInside(ValueSizeFunc size_func, size_t max_size, 138 Key key, Key* res_key); 139 140 // Iterate over the address map calling 'callback' 141 // for all stored key-value pairs and passing 'arg' to it. 142 // We don't use full Closure/Callback machinery not to add 143 // unnecessary dependencies to this class with low-level uses. 144 template<class Type> 145 inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const; 146 147 private: 148 typedef uintptr_t Number; 149 150 // The implementation assumes that addresses inserted into the map 151 // will be clustered. We take advantage of this fact by splitting 152 // up the address-space into blocks and using a linked-list entry 153 // for each block. 154 155 // Size of each block. There is one linked-list for each block, so 156 // do not make the block-size too big. Oterwise, a lot of time 157 // will be spent traversing linked lists. 158 static const int kBlockBits = 7; 159 static const int kBlockSize = 1 << kBlockBits; 160 161 // Entry kept in per-block linked-list 162 struct Entry { 163 Entry* next; 164 Key key; 165 Value value; 166 }; 167 168 // We further group a sequence of consecutive blocks into a cluster. 169 // The data for a cluster is represented as a dense array of 170 // linked-lists, one list per contained block. 171 static const int kClusterBits = 13; 172 static const Number kClusterSize = 1 << (kBlockBits + kClusterBits); 173 static const int kClusterBlocks = 1 << kClusterBits; 174 175 // We use a simple chaining hash-table to represent the clusters. 176 struct Cluster { 177 Cluster* next; // Next cluster in hash table chain 178 Number id; // Cluster ID 179 Entry* blocks[kClusterBlocks]; // Per-block linked-lists 180 }; 181 182 // Number of hash-table entries. With the block-size/cluster-size 183 // defined above, each cluster covers 1 MB, so an 4K entry 184 // hash-table will give an average hash-chain length of 1 for 4GB of 185 // in-use memory. 186 static const int kHashBits = 12; 187 static const int kHashSize = 1 << 12; 188 189 // Number of entry objects allocated at a time 190 static const int ALLOC_COUNT = 64; 191 192 Cluster** hashtable_; // The hash-table 193 Entry* free_; // Free list of unused Entry objects 194 195 // Multiplicative hash function: 196 // The value "kHashMultiplier" is the bottom 32 bits of 197 // int((sqrt(5)-1)/2 * 2^32) 198 // This is a good multiplier as suggested in CLR, Knuth. The hash 199 // value is taken to be the top "k" bits of the bottom 32 bits 200 // of the muliplied value. 201 static const uint32_t kHashMultiplier = 2654435769u; 202 static int HashInt(Number x) { 203 // Multiply by a constant and take the top bits of the result. 204 const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier; 205 return static_cast<int>(m >> (32 - kHashBits)); 206 } 207 208 // Find cluster object for specified address. If not found 209 // and "create" is true, create the object. If not found 210 // and "create" is false, return NULL. 211 // 212 // This method is bitwise-const if create is false. 213 Cluster* FindCluster(Number address, bool create) { 214 // Look in hashtable 215 const Number cluster_id = address >> (kBlockBits + kClusterBits); 216 const int h = HashInt(cluster_id); 217 for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) { 218 if (c->id == cluster_id) { 219 return c; 220 } 221 } 222 223 // Create cluster if necessary 224 if (create) { 225 Cluster* c = New<Cluster>(1); 226 c->id = cluster_id; 227 c->next = hashtable_[h]; 228 hashtable_[h] = c; 229 return c; 230 } 231 return NULL; 232 } 233 234 // Return the block ID for an address within its cluster 235 static int BlockID(Number address) { 236 return (address >> kBlockBits) & (kClusterBlocks - 1); 237 } 238 239 //-------------------------------------------------------------- 240 // Memory management -- we keep all objects we allocate linked 241 // together in a singly linked list so we can get rid of them 242 // when we are all done. Furthermore, we allow the client to 243 // pass in custom memory allocator/deallocator routines. 244 //-------------------------------------------------------------- 245 struct Object { 246 Object* next; 247 // The real data starts here 248 }; 249 250 Allocator alloc_; // The allocator 251 DeAllocator dealloc_; // The deallocator 252 Object* allocated_; // List of allocated objects 253 254 // Allocates a zeroed array of T with length "num". Also inserts 255 // the allocated block into a linked list so it can be deallocated 256 // when we are all done. 257 template <class T> T* New(int num) { 258 void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T)); 259 memset(ptr, 0, sizeof(Object) + num*sizeof(T)); 260 Object* obj = reinterpret_cast<Object*>(ptr); 261 obj->next = allocated_; 262 allocated_ = obj; 263 return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1); 264 } 265 }; 266 267 // More implementation details follow: 268 269 template <class Value> 270 AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc) 271 : free_(NULL), 272 alloc_(alloc), 273 dealloc_(dealloc), 274 allocated_(NULL) { 275 hashtable_ = New<Cluster*>(kHashSize); 276 } 277 278 template <class Value> 279 AddressMap<Value>::~AddressMap() { 280 // De-allocate all of the objects we allocated 281 for (Object* obj = allocated_; obj != NULL; /**/) { 282 Object* next = obj->next; 283 (*dealloc_)(obj); 284 obj = next; 285 } 286 } 287 288 template <class Value> 289 inline const Value* AddressMap<Value>::Find(Key key) const { 290 return const_cast<AddressMap*>(this)->FindMutable(key); 291 } 292 293 template <class Value> 294 inline Value* AddressMap<Value>::FindMutable(Key key) { 295 const Number num = reinterpret_cast<Number>(key); 296 const Cluster* const c = FindCluster(num, false/*do not create*/); 297 if (c != NULL) { 298 for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) { 299 if (e->key == key) { 300 return &e->value; 301 } 302 } 303 } 304 return NULL; 305 } 306 307 template <class Value> 308 void AddressMap<Value>::Insert(Key key, Value value) { 309 const Number num = reinterpret_cast<Number>(key); 310 Cluster* const c = FindCluster(num, true/*create*/); 311 312 // Look in linked-list for this block 313 const int block = BlockID(num); 314 for (Entry* e = c->blocks[block]; e != NULL; e = e->next) { 315 if (e->key == key) { 316 e->value = value; 317 return; 318 } 319 } 320 321 // Create entry 322 if (free_ == NULL) { 323 // Allocate a new batch of entries and add to free-list 324 Entry* array = New<Entry>(ALLOC_COUNT); 325 for (int i = 0; i < ALLOC_COUNT-1; i++) { 326 array[i].next = &array[i+1]; 327 } 328 array[ALLOC_COUNT-1].next = free_; 329 free_ = &array[0]; 330 } 331 Entry* e = free_; 332 free_ = e->next; 333 e->key = key; 334 e->value = value; 335 e->next = c->blocks[block]; 336 c->blocks[block] = e; 337 } 338 339 template <class Value> 340 bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) { 341 const Number num = reinterpret_cast<Number>(key); 342 Cluster* const c = FindCluster(num, false/*do not create*/); 343 if (c != NULL) { 344 for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) { 345 Entry* e = *p; 346 if (e->key == key) { 347 *removed_value = e->value; 348 *p = e->next; // Remove e from linked-list 349 e->next = free_; // Add e to free-list 350 free_ = e; 351 return true; 352 } 353 } 354 } 355 return false; 356 } 357 358 template <class Value> 359 const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func, 360 size_t max_size, 361 Key key, 362 Key* res_key) { 363 const Number key_num = reinterpret_cast<Number>(key); 364 Number num = key_num; // we'll move this to move back through the clusters 365 while (1) { 366 const Cluster* c = FindCluster(num, false/*do not create*/); 367 if (c != NULL) { 368 while (1) { 369 const int block = BlockID(num); 370 bool had_smaller_key = false; 371 for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) { 372 const Number e_num = reinterpret_cast<Number>(e->key); 373 if (e_num <= key_num) { 374 if (e_num == key_num || // to handle 0-sized ranges 375 key_num < e_num + (*size_func)(e->value)) { 376 *res_key = e->key; 377 return &e->value; 378 } 379 had_smaller_key = true; 380 } 381 } 382 if (had_smaller_key) return NULL; // got a range before 'key' 383 // and it did not contain 'key' 384 if (block == 0) break; 385 // try address-wise previous block 386 num |= kBlockSize - 1; // start at the last addr of prev block 387 num -= kBlockSize; 388 if (key_num - num > max_size) return NULL; 389 } 390 } 391 if (num < kClusterSize) return NULL; // first cluster 392 // go to address-wise previous cluster to try 393 num |= kClusterSize - 1; // start at the last block of previous cluster 394 num -= kClusterSize; 395 if (key_num - num > max_size) return NULL; 396 // Having max_size to limit the search is crucial: else 397 // we have to traverse a lot of empty clusters (or blocks). 398 // We can avoid needing max_size if we put clusters into 399 // a search tree, but performance suffers considerably 400 // if we use this approach by using stl::set. 401 } 402 } 403 404 template <class Value> 405 template <class Type> 406 inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type), 407 Type arg) const { 408 // We could optimize this by traversing only non-empty clusters and/or blocks 409 // but it does not speed up heap-checker noticeably. 410 for (int h = 0; h < kHashSize; ++h) { 411 for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) { 412 for (int b = 0; b < kClusterBlocks; ++b) { 413 for (Entry* e = c->blocks[b]; e != NULL; e = e->next) { 414 callback(e->key, &e->value, arg); 415 } 416 } 417 } 418 } 419 } 420 421 #endif // BASE_ADDRESSMAP_INL_H_ 422