Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
     18 #define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
     19 
     20 #include "dedupe_set.h"
     21 
     22 #include <algorithm>
     23 #include <inttypes.h>
     24 #include <unordered_map>
     25 
     26 #include "base/mutex.h"
     27 #include "base/hash_set.h"
     28 #include "base/stl_util.h"
     29 #include "base/stringprintf.h"
     30 #include "base/time_utils.h"
     31 
     32 namespace art {
     33 
     34 template <typename InKey,
     35           typename StoreKey,
     36           typename Alloc,
     37           typename HashType,
     38           typename HashFunc,
     39           HashType kShard>
     40 struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
     41   size_t collision_sum = 0u;
     42   size_t collision_max = 0u;
     43   size_t total_probe_distance = 0u;
     44   size_t total_size = 0u;
     45 };
     46 
     47 template <typename InKey,
     48           typename StoreKey,
     49           typename Alloc,
     50           typename HashType,
     51           typename HashFunc,
     52           HashType kShard>
     53 class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
     54  public:
     55   Shard(const Alloc& alloc, const std::string& lock_name)
     56       : alloc_(alloc),
     57         lock_name_(lock_name),
     58         lock_(lock_name_.c_str()),
     59         keys_() {
     60   }
     61 
     62   ~Shard() {
     63     for (const HashedKey<StoreKey>& key : keys_) {
     64       DCHECK(key.Key() != nullptr);
     65       alloc_.Destroy(key.Key());
     66     }
     67   }
     68 
     69   const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
     70     MutexLock lock(self, lock_);
     71     HashedKey<InKey> hashed_in_key(hash, &in_key);
     72     auto it = keys_.Find(hashed_in_key);
     73     if (it != keys_.end()) {
     74       DCHECK(it->Key() != nullptr);
     75       return it->Key();
     76     }
     77     const StoreKey* store_key = alloc_.Copy(in_key);
     78     keys_.Insert(HashedKey<StoreKey> { hash, store_key });
     79     return store_key;
     80   }
     81 
     82   void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
     83     // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
     84     // for bookkeeping while collecting the stats.
     85     std::unordered_map<HashType, size_t> stats;
     86     {
     87       MutexLock lock(self, lock_);
     88       // Note: The total_probe_distance will be updated with the current state.
     89       // It may have been higher before a re-hash.
     90       global_stats->total_probe_distance += keys_.TotalProbeDistance();
     91       global_stats->total_size += keys_.Size();
     92       for (const HashedKey<StoreKey>& key : keys_) {
     93         auto it = stats.find(key.Hash());
     94         if (it == stats.end()) {
     95           stats.insert({key.Hash(), 1u});
     96         } else {
     97           ++it->second;
     98         }
     99       }
    100     }
    101     for (const auto& entry : stats) {
    102       size_t number_of_entries = entry.second;
    103       if (number_of_entries > 1u) {
    104         global_stats->collision_sum += number_of_entries - 1u;
    105         global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
    106       }
    107     }
    108   }
    109 
    110  private:
    111   template <typename T>
    112   class HashedKey {
    113    public:
    114     HashedKey() : hash_(0u), key_(nullptr) { }
    115     HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
    116 
    117     size_t Hash() const {
    118       return hash_;
    119     }
    120 
    121     const T* Key() const {
    122       return key_;
    123     }
    124 
    125     bool IsEmpty() const {
    126       return Key() == nullptr;
    127     }
    128 
    129     void MakeEmpty() {
    130       key_ = nullptr;
    131     }
    132 
    133    private:
    134     size_t hash_;
    135     const T* key_;
    136   };
    137 
    138   class ShardEmptyFn {
    139    public:
    140     bool IsEmpty(const HashedKey<StoreKey>& key) const {
    141       return key.IsEmpty();
    142     }
    143 
    144     void MakeEmpty(HashedKey<StoreKey>& key) {
    145       key.MakeEmpty();
    146     }
    147   };
    148 
    149   struct ShardHashFn {
    150     template <typename T>
    151     size_t operator()(const HashedKey<T>& key) const {
    152       return key.Hash();
    153     }
    154   };
    155 
    156   struct ShardPred {
    157     typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
    158     operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
    159       DCHECK(lhs.Key() != nullptr);
    160       DCHECK(rhs.Key() != nullptr);
    161       // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
    162       return lhs.Key() == rhs.Key();
    163     }
    164 
    165     template <typename LeftT, typename RightT>
    166     bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
    167       DCHECK(lhs.Key() != nullptr);
    168       DCHECK(rhs.Key() != nullptr);
    169       return lhs.Hash() == rhs.Hash() &&
    170           lhs.Key()->size() == rhs.Key()->size() &&
    171           std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
    172     }
    173   };
    174 
    175   Alloc alloc_;
    176   const std::string lock_name_;
    177   Mutex lock_;
    178   HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
    179 };
    180 
    181 template <typename InKey,
    182           typename StoreKey,
    183           typename Alloc,
    184           typename HashType,
    185           typename HashFunc,
    186           HashType kShard>
    187 const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
    188     Thread* self, const InKey& key) {
    189   uint64_t hash_start;
    190   if (kIsDebugBuild) {
    191     hash_start = NanoTime();
    192   }
    193   HashType raw_hash = HashFunc()(key);
    194   if (kIsDebugBuild) {
    195     uint64_t hash_end = NanoTime();
    196     hash_time_ += hash_end - hash_start;
    197   }
    198   HashType shard_hash = raw_hash / kShard;
    199   HashType shard_bin = raw_hash % kShard;
    200   return shards_[shard_bin]->Add(self, shard_hash, key);
    201 }
    202 
    203 template <typename InKey,
    204           typename StoreKey,
    205           typename Alloc,
    206           typename HashType,
    207           typename HashFunc,
    208           HashType kShard>
    209 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
    210                                                                          const Alloc& alloc)
    211     : hash_time_(0) {
    212   for (HashType i = 0; i < kShard; ++i) {
    213     std::ostringstream oss;
    214     oss << set_name << " lock " << i;
    215     shards_[i].reset(new Shard(alloc, oss.str()));
    216   }
    217 }
    218 
    219 template <typename InKey,
    220           typename StoreKey,
    221           typename Alloc,
    222           typename HashType,
    223           typename HashFunc,
    224           HashType kShard>
    225 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
    226   // Everything done by member destructors.
    227 }
    228 
    229 template <typename InKey,
    230           typename StoreKey,
    231           typename Alloc,
    232           typename HashType,
    233           typename HashFunc,
    234           HashType kShard>
    235 std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
    236     Thread* self) const {
    237   Stats stats;
    238   for (HashType shard = 0; shard < kShard; ++shard) {
    239     shards_[shard]->UpdateStats(self, &stats);
    240   }
    241   return StringPrintf("%zu collisions, %zu max hash collisions, "
    242                       "%zu/%zu probe distance, %" PRIu64 " ns hash time",
    243                       stats.collision_sum,
    244                       stats.collision_max,
    245                       stats.total_probe_distance,
    246                       stats.total_size,
    247                       hash_time_);
    248 }
    249 
    250 
    251 }  // namespace art
    252 
    253 #endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
    254