Home | History | Annotate | Download | only in base
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "hash_set.h"
     18 
     19 #include <forward_list>
     20 #include <map>
     21 #include <sstream>
     22 #include <string>
     23 #include <unordered_set>
     24 #include <vector>
     25 
     26 #include <gtest/gtest.h>
     27 #include "hash_map.h"
     28 
     29 namespace art {
     30 
     31 struct IsEmptyFnString {
     32   void MakeEmpty(std::string& item) const {
     33     item.clear();
     34   }
     35   bool IsEmpty(const std::string& item) const {
     36     return item.empty();
     37   }
     38 };
     39 
     40 class HashSetTest : public testing::Test {
     41  public:
     42   HashSetTest() : seed_(97421), unique_number_(0) {
     43   }
     44   std::string RandomString(size_t len) {
     45     std::ostringstream oss;
     46     for (size_t i = 0; i < len; ++i) {
     47       oss << static_cast<char>('A' + PRand() % 64);
     48     }
     49     static_assert(' ' < 'A', "space must be less than a");
     50     oss << " " << unique_number_++;  // Relies on ' ' < 'A'
     51     return oss.str();
     52   }
     53   void SetSeed(size_t seed) {
     54     seed_ = seed;
     55   }
     56   size_t PRand() {  // Pseudo random.
     57     seed_ = seed_ * 1103515245 + 12345;
     58     return seed_;
     59   }
     60 
     61  private:
     62   size_t seed_;
     63   size_t unique_number_;
     64 };
     65 
     66 TEST_F(HashSetTest, TestSmoke) {
     67   HashSet<std::string, IsEmptyFnString> hash_set;
     68   const std::string test_string = "hello world 1234";
     69   ASSERT_TRUE(hash_set.Empty());
     70   ASSERT_EQ(hash_set.Size(), 0U);
     71   hash_set.Insert(test_string);
     72   auto it = hash_set.Find(test_string);
     73   ASSERT_EQ(*it, test_string);
     74   auto after_it = hash_set.Erase(it);
     75   ASSERT_TRUE(after_it == hash_set.end());
     76   ASSERT_TRUE(hash_set.Empty());
     77   ASSERT_EQ(hash_set.Size(), 0U);
     78   it = hash_set.Find(test_string);
     79   ASSERT_TRUE(it == hash_set.end());
     80 }
     81 
     82 TEST_F(HashSetTest, TestInsertAndErase) {
     83   HashSet<std::string, IsEmptyFnString> hash_set;
     84   static constexpr size_t count = 1000;
     85   std::vector<std::string> strings;
     86   for (size_t i = 0; i < count; ++i) {
     87     // Insert a bunch of elements and make sure we can find them.
     88     strings.push_back(RandomString(10));
     89     hash_set.Insert(strings[i]);
     90     auto it = hash_set.Find(strings[i]);
     91     ASSERT_TRUE(it != hash_set.end());
     92     ASSERT_EQ(*it, strings[i]);
     93   }
     94   ASSERT_EQ(strings.size(), hash_set.Size());
     95   // Try to erase the odd strings.
     96   for (size_t i = 1; i < count; i += 2) {
     97     auto it = hash_set.Find(strings[i]);
     98     ASSERT_TRUE(it != hash_set.end());
     99     ASSERT_EQ(*it, strings[i]);
    100     hash_set.Erase(it);
    101   }
    102   // Test removed.
    103   for (size_t i = 1; i < count; i += 2) {
    104     auto it = hash_set.Find(strings[i]);
    105     ASSERT_TRUE(it == hash_set.end());
    106   }
    107   for (size_t i = 0; i < count; i += 2) {
    108     auto it = hash_set.Find(strings[i]);
    109     ASSERT_TRUE(it != hash_set.end());
    110     ASSERT_EQ(*it, strings[i]);
    111   }
    112 }
    113 
    114 TEST_F(HashSetTest, TestIterator) {
    115   HashSet<std::string, IsEmptyFnString> hash_set;
    116   ASSERT_TRUE(hash_set.begin() == hash_set.end());
    117   static constexpr size_t count = 1000;
    118   std::vector<std::string> strings;
    119   for (size_t i = 0; i < count; ++i) {
    120     // Insert a bunch of elements and make sure we can find them.
    121     strings.push_back(RandomString(10));
    122     hash_set.Insert(strings[i]);
    123   }
    124   // Make sure we visit each string exactly once.
    125   std::map<std::string, size_t> found_count;
    126   for (const std::string& s : hash_set) {
    127     ++found_count[s];
    128   }
    129   for (size_t i = 0; i < count; ++i) {
    130     ASSERT_EQ(found_count[strings[i]], 1U);
    131   }
    132   found_count.clear();
    133   // Remove all the elements with iterator erase.
    134   for (auto it = hash_set.begin(); it != hash_set.end();) {
    135     ++found_count[*it];
    136     it = hash_set.Erase(it);
    137     ASSERT_EQ(hash_set.Verify(), 0U);
    138   }
    139   for (size_t i = 0; i < count; ++i) {
    140     ASSERT_EQ(found_count[strings[i]], 1U);
    141   }
    142 }
    143 
    144 TEST_F(HashSetTest, TestSwap) {
    145   HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
    146   std::vector<std::string> strings;
    147   static constexpr size_t count = 1000;
    148   for (size_t i = 0; i < count; ++i) {
    149     strings.push_back(RandomString(10));
    150     hash_seta.Insert(strings[i]);
    151   }
    152   std::swap(hash_seta, hash_setb);
    153   hash_seta.Insert("TEST");
    154   hash_setb.Insert("TEST2");
    155   for (size_t i = 0; i < count; ++i) {
    156     strings.push_back(RandomString(10));
    157     hash_seta.Insert(strings[i]);
    158   }
    159 }
    160 
    161 TEST_F(HashSetTest, TestShrink) {
    162   HashSet<std::string, IsEmptyFnString> hash_set;
    163   std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
    164   for (size_t i = 0; i < strings.size(); ++i) {
    165     // Insert some strings into the beginning of our hash set to establish an initial size
    166     hash_set.Insert(strings[i]);
    167   }
    168 
    169   hash_set.ShrinkToMaximumLoad();
    170   const double initial_load = hash_set.CalculateLoadFactor();
    171 
    172   // Insert a bunch of random strings to guarantee that we grow the capacity.
    173   std::vector<std::string> random_strings;
    174   static constexpr size_t count = 1000;
    175   for (size_t i = 0; i < count; ++i) {
    176     random_strings.push_back(RandomString(10));
    177     hash_set.Insert(random_strings[i]);
    178   }
    179 
    180   // Erase all the extra strings which guarantees that our load factor will be really bad.
    181   for (size_t i = 0; i < count; ++i) {
    182     hash_set.Erase(hash_set.Find(random_strings[i]));
    183   }
    184 
    185   const double bad_load = hash_set.CalculateLoadFactor();
    186   EXPECT_GT(initial_load, bad_load);
    187 
    188   // Shrink again, the load factor should be good again.
    189   hash_set.ShrinkToMaximumLoad();
    190   EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
    191 
    192   // Make sure all the initial elements we had are still there
    193   for (const std::string& initial_string : strings) {
    194     EXPECT_NE(hash_set.end(), hash_set.Find(initial_string))
    195         << "expected to find " << initial_string;
    196   }
    197 }
    198 
    199 TEST_F(HashSetTest, TestLoadFactor) {
    200   HashSet<std::string, IsEmptyFnString> hash_set;
    201   static constexpr size_t kStringCount = 1000;
    202   static constexpr double kEpsilon = 0.01;
    203   for (size_t i = 0; i < kStringCount; ++i) {
    204     hash_set.Insert(RandomString(i % 10 + 1));
    205   }
    206   // Check that changing the load factor resizes the table to be within the target range.
    207   EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
    208   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
    209   hash_set.SetLoadFactor(0.1, 0.3);
    210   EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
    211   EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
    212   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
    213   hash_set.SetLoadFactor(0.6, 0.8);
    214   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
    215 }
    216 
    217 TEST_F(HashSetTest, TestStress) {
    218   HashSet<std::string, IsEmptyFnString> hash_set;
    219   std::unordered_multiset<std::string> std_set;
    220   std::vector<std::string> strings;
    221   static constexpr size_t string_count = 2000;
    222   static constexpr size_t operations = 100000;
    223   static constexpr size_t target_size = 5000;
    224   for (size_t i = 0; i < string_count; ++i) {
    225     strings.push_back(RandomString(i % 10 + 1));
    226   }
    227   const size_t seed = time(nullptr);
    228   SetSeed(seed);
    229   LOG(INFO) << "Starting stress test with seed " << seed;
    230   for (size_t i = 0; i < operations; ++i) {
    231     ASSERT_EQ(hash_set.Size(), std_set.size());
    232     size_t delta = std::abs(static_cast<ssize_t>(target_size) -
    233                             static_cast<ssize_t>(hash_set.Size()));
    234     size_t n = PRand();
    235     if (n % target_size == 0) {
    236       hash_set.Clear();
    237       std_set.clear();
    238       ASSERT_TRUE(hash_set.Empty());
    239       ASSERT_TRUE(std_set.empty());
    240     } else  if (n % target_size < delta) {
    241       // Skew towards adding elements until we are at the desired size.
    242       const std::string& s = strings[PRand() % string_count];
    243       hash_set.Insert(s);
    244       std_set.insert(s);
    245       ASSERT_EQ(*hash_set.Find(s), *std_set.find(s));
    246     } else {
    247       const std::string& s = strings[PRand() % string_count];
    248       auto it1 = hash_set.Find(s);
    249       auto it2 = std_set.find(s);
    250       ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
    251       if (it1 != hash_set.end()) {
    252         ASSERT_EQ(*it1, *it2);
    253         hash_set.Erase(it1);
    254         std_set.erase(it2);
    255       }
    256     }
    257   }
    258 }
    259 
    260 struct IsEmptyStringPair {
    261   void MakeEmpty(std::pair<std::string, int>& pair) const {
    262     pair.first.clear();
    263   }
    264   bool IsEmpty(const std::pair<std::string, int>& pair) const {
    265     return pair.first.empty();
    266   }
    267 };
    268 
    269 TEST_F(HashSetTest, TestHashMap) {
    270   HashMap<std::string, int, IsEmptyStringPair> hash_map;
    271   hash_map.Insert(std::make_pair(std::string("abcd"), 123));
    272   hash_map.Insert(std::make_pair(std::string("abcd"), 124));
    273   hash_map.Insert(std::make_pair(std::string("bags"), 444));
    274   auto it = hash_map.Find(std::string("abcd"));
    275   ASSERT_EQ(it->second, 123);
    276   hash_map.Erase(it);
    277   it = hash_map.Find(std::string("abcd"));
    278   ASSERT_EQ(it->second, 124);
    279 }
    280 
    281 struct IsEmptyFnVectorInt {
    282   void MakeEmpty(std::vector<int>& item) const {
    283     item.clear();
    284   }
    285   bool IsEmpty(const std::vector<int>& item) const {
    286     return item.empty();
    287   }
    288 };
    289 
    290 template <typename T>
    291 size_t HashIntSequence(T begin, T end) {
    292   size_t hash = 0;
    293   for (auto iter = begin; iter != end; ++iter) {
    294     hash = hash * 2 + *iter;
    295   }
    296   return hash;
    297 }
    298 
    299 struct VectorIntHashEquals {
    300   std::size_t operator()(const std::vector<int>& item) const {
    301     return HashIntSequence(item.begin(), item.end());
    302   }
    303 
    304   std::size_t operator()(const std::forward_list<int>& item) const {
    305     return HashIntSequence(item.begin(), item.end());
    306   }
    307 
    308   bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
    309     return a == b;
    310   }
    311 
    312   bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
    313     auto aiter = a.begin();
    314     auto biter = b.begin();
    315     while (aiter != a.end() && biter != b.end()) {
    316       if (*aiter != *biter) {
    317         return false;
    318       }
    319       aiter++;
    320       biter++;
    321     }
    322     return (aiter == a.end() && biter == b.end());
    323   }
    324 };
    325 
    326 TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
    327   HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
    328   hash_set.Insert(std::vector<int>({1, 2, 3, 4}));
    329   hash_set.Insert(std::vector<int>({4, 2}));
    330   ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1})));
    331   ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4})));
    332   ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1})));
    333   ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
    334 }
    335 
    336 TEST_F(HashSetTest, TestReserve) {
    337   HashSet<std::string, IsEmptyFnString> hash_set;
    338   std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
    339   for (size_t size : sizes) {
    340     hash_set.Reserve(size);
    341     const size_t buckets_before = hash_set.NumBuckets();
    342     // Check that we expanded enough.
    343     CHECK_GE(hash_set.ElementsUntilExpand(), size);
    344     // Try inserting elements until we are at our reserve size and ensure the hash set did not
    345     // expand.
    346     while (hash_set.Size() < size) {
    347       hash_set.Insert(std::to_string(hash_set.Size()));
    348     }
    349     CHECK_EQ(hash_set.NumBuckets(), buckets_before);
    350   }
    351   // Check the behaviour for shrinking, it does not necessarily resize down.
    352   constexpr size_t size = 100;
    353   hash_set.Reserve(size);
    354   CHECK_GE(hash_set.ElementsUntilExpand(), size);
    355 }
    356 
    357 }  // namespace art
    358