Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_
     18 #define ART_COMPILER_UTILS_DEDUPE_SET_H_
     19 
     20 #include <algorithm>
     21 #include <inttypes.h>
     22 #include <memory>
     23 #include <set>
     24 #include <string>
     25 
     26 #include "base/mutex.h"
     27 #include "base/stl_util.h"
     28 #include "base/stringprintf.h"
     29 #include "base/time_utils.h"
     30 #include "utils/swap_space.h"
     31 
     32 namespace art {
     33 
     34 // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
     35 // Add method. The data-structure is thread-safe through the use of internal locks, it also
     36 // supports the lock being sharded.
     37 template <typename InKey, typename StoreKey, typename HashType, typename HashFunc,
     38           HashType kShard = 1>
     39 class DedupeSet {
     40   typedef std::pair<HashType, const InKey*> HashedInKey;
     41   struct HashedKey {
     42     StoreKey* store_ptr;
     43     union {
     44       HashType store_hash;        // Valid if store_ptr != null.
     45       const HashedInKey* in_key;  // Valid if store_ptr == null.
     46     };
     47   };
     48 
     49   class Comparator {
     50    public:
     51     bool operator()(const HashedKey& a, const HashedKey& b) const {
     52       HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first;
     53       HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first;
     54       if (a_hash != b_hash) {
     55         return a_hash < b_hash;
     56       }
     57       if (a.store_ptr != nullptr && b.store_ptr != nullptr) {
     58         return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
     59                                             b.store_ptr->begin(), b.store_ptr->end());
     60       } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) {
     61         return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
     62                                             b.in_key->second->begin(), b.in_key->second->end());
     63       } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) {
     64         return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
     65                                             b.store_ptr->begin(), b.store_ptr->end());
     66       } else {
     67         return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
     68                                             b.in_key->second->begin(), b.in_key->second->end());
     69       }
     70     }
     71   };
     72 
     73  public:
     74   StoreKey* Add(Thread* self, const InKey& key) {
     75     uint64_t hash_start;
     76     if (kIsDebugBuild) {
     77       hash_start = NanoTime();
     78     }
     79     HashType raw_hash = HashFunc()(key);
     80     if (kIsDebugBuild) {
     81       uint64_t hash_end = NanoTime();
     82       hash_time_ += hash_end - hash_start;
     83     }
     84     HashType shard_hash = raw_hash / kShard;
     85     HashType shard_bin = raw_hash % kShard;
     86     HashedInKey hashed_in_key(shard_hash, &key);
     87     HashedKey hashed_key;
     88     hashed_key.store_ptr = nullptr;
     89     hashed_key.in_key = &hashed_in_key;
     90     MutexLock lock(self, *lock_[shard_bin]);
     91     auto it = keys_[shard_bin].find(hashed_key);
     92     if (it != keys_[shard_bin].end()) {
     93       DCHECK(it->store_ptr != nullptr);
     94       return it->store_ptr;
     95     }
     96     hashed_key.store_ptr = CreateStoreKey(key);
     97     hashed_key.store_hash = shard_hash;
     98     keys_[shard_bin].insert(hashed_key);
     99     return hashed_key.store_ptr;
    100   }
    101 
    102   explicit DedupeSet(const char* set_name, SwapAllocator<void>& alloc)
    103       : allocator_(alloc), hash_time_(0) {
    104     for (HashType i = 0; i < kShard; ++i) {
    105       std::ostringstream oss;
    106       oss << set_name << " lock " << i;
    107       lock_name_[i] = oss.str();
    108       lock_[i].reset(new Mutex(lock_name_[i].c_str()));
    109     }
    110   }
    111 
    112   ~DedupeSet() {
    113     // Have to manually free all pointers.
    114     for (auto& shard : keys_) {
    115       for (const auto& hashed_key : shard) {
    116         DCHECK(hashed_key.store_ptr != nullptr);
    117         DeleteStoreKey(hashed_key.store_ptr);
    118       }
    119     }
    120   }
    121 
    122   std::string DumpStats() const {
    123     size_t collision_sum = 0;
    124     size_t collision_max = 0;
    125     for (HashType shard = 0; shard < kShard; ++shard) {
    126       HashType last_hash = 0;
    127       size_t collision_cur_max = 0;
    128       for (const HashedKey& key : keys_[shard]) {
    129         DCHECK(key.store_ptr != nullptr);
    130         if (key.store_hash == last_hash) {
    131           collision_cur_max++;
    132           if (collision_cur_max > 1) {
    133             collision_sum++;
    134             if (collision_cur_max > collision_max) {
    135               collision_max = collision_cur_max;
    136             }
    137           }
    138         } else {
    139           collision_cur_max = 1;
    140           last_hash = key.store_hash;
    141         }
    142       }
    143     }
    144     return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time",
    145                         collision_sum, collision_max, hash_time_);
    146   }
    147 
    148  private:
    149   StoreKey* CreateStoreKey(const InKey& key) {
    150     StoreKey* ret = allocator_.allocate(1);
    151     allocator_.construct(ret, key.begin(), key.end(), allocator_);
    152     return ret;
    153   }
    154 
    155   void DeleteStoreKey(StoreKey* key) {
    156     SwapAllocator<StoreKey> alloc(allocator_);
    157     alloc.destroy(key);
    158     alloc.deallocate(key, 1);
    159   }
    160 
    161   std::string lock_name_[kShard];
    162   std::unique_ptr<Mutex> lock_[kShard];
    163   std::set<HashedKey, Comparator> keys_[kShard];
    164   SwapAllocator<StoreKey> allocator_;
    165   uint64_t hash_time_;
    166 
    167   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
    168 };
    169 
    170 }  // namespace art
    171 
    172 #endif  // ART_COMPILER_UTILS_DEDUPE_SET_H_
    173