1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "hash_set.h" 18 19 #include <map> 20 #include <forward_list> 21 #include <sstream> 22 #include <string> 23 #include <unordered_set> 24 #include <vector> 25 26 #include <gtest/gtest.h> 27 #include "hash_map.h" 28 29 namespace art { 30 31 struct IsEmptyFnString { 32 void MakeEmpty(std::string& item) const { 33 item.clear(); 34 } 35 bool IsEmpty(const std::string& item) const { 36 return item.empty(); 37 } 38 }; 39 40 class HashSetTest : public testing::Test { 41 public: 42 HashSetTest() : seed_(97421), unique_number_(0) { 43 } 44 std::string RandomString(size_t len) { 45 std::ostringstream oss; 46 for (size_t i = 0; i < len; ++i) { 47 oss << static_cast<char>('A' + PRand() % 64); 48 } 49 static_assert(' ' < 'A', "space must be less than a"); 50 oss << " " << unique_number_++; // Relies on ' ' < 'A' 51 return oss.str(); 52 } 53 void SetSeed(size_t seed) { 54 seed_ = seed; 55 } 56 size_t PRand() { // Pseudo random. 57 seed_ = seed_ * 1103515245 + 12345; 58 return seed_; 59 } 60 61 private: 62 size_t seed_; 63 size_t unique_number_; 64 }; 65 66 TEST_F(HashSetTest, TestSmoke) { 67 HashSet<std::string, IsEmptyFnString> hash_set; 68 const std::string test_string = "hello world 1234"; 69 ASSERT_TRUE(hash_set.Empty()); 70 ASSERT_EQ(hash_set.Size(), 0U); 71 hash_set.Insert(test_string); 72 auto it = hash_set.Find(test_string); 73 ASSERT_EQ(*it, test_string); 74 auto after_it = hash_set.Erase(it); 75 ASSERT_TRUE(after_it == hash_set.end()); 76 ASSERT_TRUE(hash_set.Empty()); 77 ASSERT_EQ(hash_set.Size(), 0U); 78 it = hash_set.Find(test_string); 79 ASSERT_TRUE(it == hash_set.end()); 80 } 81 82 TEST_F(HashSetTest, TestInsertAndErase) { 83 HashSet<std::string, IsEmptyFnString> hash_set; 84 static constexpr size_t count = 1000; 85 std::vector<std::string> strings; 86 for (size_t i = 0; i < count; ++i) { 87 // Insert a bunch of elements and make sure we can find them. 88 strings.push_back(RandomString(10)); 89 hash_set.Insert(strings[i]); 90 auto it = hash_set.Find(strings[i]); 91 ASSERT_TRUE(it != hash_set.end()); 92 ASSERT_EQ(*it, strings[i]); 93 } 94 ASSERT_EQ(strings.size(), hash_set.Size()); 95 // Try to erase the odd strings. 96 for (size_t i = 1; i < count; i += 2) { 97 auto it = hash_set.Find(strings[i]); 98 ASSERT_TRUE(it != hash_set.end()); 99 ASSERT_EQ(*it, strings[i]); 100 hash_set.Erase(it); 101 } 102 // Test removed. 103 for (size_t i = 1; i < count; i += 2) { 104 auto it = hash_set.Find(strings[i]); 105 ASSERT_TRUE(it == hash_set.end()); 106 } 107 for (size_t i = 0; i < count; i += 2) { 108 auto it = hash_set.Find(strings[i]); 109 ASSERT_TRUE(it != hash_set.end()); 110 ASSERT_EQ(*it, strings[i]); 111 } 112 } 113 114 TEST_F(HashSetTest, TestIterator) { 115 HashSet<std::string, IsEmptyFnString> hash_set; 116 ASSERT_TRUE(hash_set.begin() == hash_set.end()); 117 static constexpr size_t count = 1000; 118 std::vector<std::string> strings; 119 for (size_t i = 0; i < count; ++i) { 120 // Insert a bunch of elements and make sure we can find them. 121 strings.push_back(RandomString(10)); 122 hash_set.Insert(strings[i]); 123 } 124 // Make sure we visit each string exactly once. 125 std::map<std::string, size_t> found_count; 126 for (const std::string& s : hash_set) { 127 ++found_count[s]; 128 } 129 for (size_t i = 0; i < count; ++i) { 130 ASSERT_EQ(found_count[strings[i]], 1U); 131 } 132 found_count.clear(); 133 // Remove all the elements with iterator erase. 134 for (auto it = hash_set.begin(); it != hash_set.end();) { 135 ++found_count[*it]; 136 it = hash_set.Erase(it); 137 ASSERT_EQ(hash_set.Verify(), 0U); 138 } 139 for (size_t i = 0; i < count; ++i) { 140 ASSERT_EQ(found_count[strings[i]], 1U); 141 } 142 } 143 144 TEST_F(HashSetTest, TestSwap) { 145 HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb; 146 std::vector<std::string> strings; 147 static constexpr size_t count = 1000; 148 for (size_t i = 0; i < count; ++i) { 149 strings.push_back(RandomString(10)); 150 hash_seta.Insert(strings[i]); 151 } 152 std::swap(hash_seta, hash_setb); 153 hash_seta.Insert("TEST"); 154 hash_setb.Insert("TEST2"); 155 for (size_t i = 0; i < count; ++i) { 156 strings.push_back(RandomString(10)); 157 hash_seta.Insert(strings[i]); 158 } 159 } 160 161 TEST_F(HashSetTest, TestShrink) { 162 HashSet<std::string, IsEmptyFnString> hash_set; 163 std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"}; 164 for (size_t i = 0; i < strings.size(); ++i) { 165 // Insert some strings into the beginning of our hash set to establish an initial size 166 hash_set.Insert(strings[i]); 167 } 168 169 hash_set.ShrinkToMaximumLoad(); 170 const double initial_load = hash_set.CalculateLoadFactor(); 171 172 // Insert a bunch of random strings to guarantee that we grow the capacity. 173 std::vector<std::string> random_strings; 174 static constexpr size_t count = 1000; 175 for (size_t i = 0; i < count; ++i) { 176 random_strings.push_back(RandomString(10)); 177 hash_set.Insert(random_strings[i]); 178 } 179 180 // Erase all the extra strings which guarantees that our load factor will be really bad. 181 for (size_t i = 0; i < count; ++i) { 182 hash_set.Erase(hash_set.Find(random_strings[i])); 183 } 184 185 const double bad_load = hash_set.CalculateLoadFactor(); 186 EXPECT_GT(initial_load, bad_load); 187 188 // Shrink again, the load factor should be good again. 189 hash_set.ShrinkToMaximumLoad(); 190 EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor()); 191 192 // Make sure all the initial elements we had are still there 193 for (const std::string& initial_string : strings) { 194 EXPECT_NE(hash_set.end(), hash_set.Find(initial_string)) 195 << "expected to find " << initial_string; 196 } 197 } 198 199 TEST_F(HashSetTest, TestLoadFactor) { 200 HashSet<std::string, IsEmptyFnString> hash_set; 201 static constexpr size_t kStringCount = 1000; 202 static constexpr double kEpsilon = 0.01; 203 for (size_t i = 0; i < kStringCount; ++i) { 204 hash_set.Insert(RandomString(i % 10 + 1)); 205 } 206 // Check that changing the load factor resizes the table to be within the target range. 207 EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor()); 208 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); 209 hash_set.SetLoadFactor(0.1, 0.3); 210 EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor()); 211 EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor()); 212 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); 213 hash_set.SetLoadFactor(0.6, 0.8); 214 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); 215 } 216 217 TEST_F(HashSetTest, TestStress) { 218 HashSet<std::string, IsEmptyFnString> hash_set; 219 std::unordered_multiset<std::string> std_set; 220 std::vector<std::string> strings; 221 static constexpr size_t string_count = 2000; 222 static constexpr size_t operations = 100000; 223 static constexpr size_t target_size = 5000; 224 for (size_t i = 0; i < string_count; ++i) { 225 strings.push_back(RandomString(i % 10 + 1)); 226 } 227 const size_t seed = time(nullptr); 228 SetSeed(seed); 229 LOG(INFO) << "Starting stress test with seed " << seed; 230 for (size_t i = 0; i < operations; ++i) { 231 ASSERT_EQ(hash_set.Size(), std_set.size()); 232 size_t delta = std::abs(static_cast<ssize_t>(target_size) - 233 static_cast<ssize_t>(hash_set.Size())); 234 size_t n = PRand(); 235 if (n % target_size == 0) { 236 hash_set.Clear(); 237 std_set.clear(); 238 ASSERT_TRUE(hash_set.Empty()); 239 ASSERT_TRUE(std_set.empty()); 240 } else if (n % target_size < delta) { 241 // Skew towards adding elements until we are at the desired size. 242 const std::string& s = strings[PRand() % string_count]; 243 hash_set.Insert(s); 244 std_set.insert(s); 245 ASSERT_EQ(*hash_set.Find(s), *std_set.find(s)); 246 } else { 247 const std::string& s = strings[PRand() % string_count]; 248 auto it1 = hash_set.Find(s); 249 auto it2 = std_set.find(s); 250 ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end()); 251 if (it1 != hash_set.end()) { 252 ASSERT_EQ(*it1, *it2); 253 hash_set.Erase(it1); 254 std_set.erase(it2); 255 } 256 } 257 } 258 } 259 260 struct IsEmptyStringPair { 261 void MakeEmpty(std::pair<std::string, int>& pair) const { 262 pair.first.clear(); 263 } 264 bool IsEmpty(const std::pair<std::string, int>& pair) const { 265 return pair.first.empty(); 266 } 267 }; 268 269 TEST_F(HashSetTest, TestHashMap) { 270 HashMap<std::string, int, IsEmptyStringPair> hash_map; 271 hash_map.Insert(std::make_pair(std::string("abcd"), 123)); 272 hash_map.Insert(std::make_pair(std::string("abcd"), 124)); 273 hash_map.Insert(std::make_pair(std::string("bags"), 444)); 274 auto it = hash_map.Find(std::string("abcd")); 275 ASSERT_EQ(it->second, 123); 276 hash_map.Erase(it); 277 it = hash_map.Find(std::string("abcd")); 278 ASSERT_EQ(it->second, 124); 279 } 280 281 struct IsEmptyFnVectorInt { 282 void MakeEmpty(std::vector<int>& item) const { 283 item.clear(); 284 } 285 bool IsEmpty(const std::vector<int>& item) const { 286 return item.empty(); 287 } 288 }; 289 290 template <typename T> 291 size_t HashIntSequence(T begin, T end) { 292 size_t hash = 0; 293 for (auto iter = begin; iter != end; ++iter) { 294 hash = hash * 2 + *iter; 295 } 296 return hash; 297 }; 298 299 struct VectorIntHashEquals { 300 std::size_t operator()(const std::vector<int>& item) const { 301 return HashIntSequence(item.begin(), item.end()); 302 } 303 304 std::size_t operator()(const std::forward_list<int>& item) const { 305 return HashIntSequence(item.begin(), item.end()); 306 } 307 308 bool operator()(const std::vector<int>& a, const std::vector<int>& b) const { 309 return a == b; 310 } 311 312 bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const { 313 auto aiter = a.begin(); 314 auto biter = b.begin(); 315 while (aiter != a.end() && biter != b.end()) { 316 if (*aiter != *biter) { 317 return false; 318 } 319 aiter++; 320 biter++; 321 } 322 return (aiter == a.end() && biter == b.end()); 323 } 324 }; 325 326 TEST_F(HashSetTest, TestLookupByAlternateKeyType) { 327 HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set; 328 hash_set.Insert(std::vector<int>({1, 2, 3, 4})); 329 hash_set.Insert(std::vector<int>({4, 2})); 330 ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1}))); 331 ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4}))); 332 ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1}))); 333 ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4}))); 334 } 335 336 TEST_F(HashSetTest, TestReserve) { 337 HashSet<std::string, IsEmptyFnString> hash_set; 338 std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096}; 339 for (size_t size : sizes) { 340 hash_set.Reserve(size); 341 const size_t buckets_before = hash_set.NumBuckets(); 342 // Check that we expanded enough. 343 CHECK_GE(hash_set.ElementsUntilExpand(), size); 344 // Try inserting elements until we are at our reserve size and ensure the hash set did not 345 // expand. 346 while (hash_set.Size() < size) { 347 hash_set.Insert(std::to_string(hash_set.Size())); 348 } 349 CHECK_EQ(hash_set.NumBuckets(), buckets_before); 350 } 351 // Check the behaviour for shrinking, it does not necessarily resize down. 352 constexpr size_t size = 100; 353 hash_set.Reserve(size); 354 CHECK_GE(hash_set.ElementsUntilExpand(), size); 355 } 356 357 } // namespace art 358