1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <config.h> 17 #include "blockhash.h" 18 #include <limits.h> // INT_MIN 19 #include <string.h> // memcpy, memcmp, strlen 20 #include <iostream> 21 #include <memory> // auto_ptr 22 #include "encodetable.h" 23 #include "rolling_hash.h" 24 #include "testing.h" 25 26 namespace open_vcdiff { 27 28 const int kBlockSize = BlockHash::kBlockSize; 29 30 class BlockHashTest : public testing::Test { 31 protected: 32 static const int kTimingTestSize = 1 << 21; // 2M 33 static const int kTimingTestIterations = 32; 34 35 BlockHashTest() { 36 dh_.reset(BlockHash::CreateDictionaryHash(sample_text, 37 strlen(sample_text))); 38 th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0)); 39 EXPECT_TRUE(dh_.get() != NULL); 40 EXPECT_TRUE(th_.get() != NULL); 41 } 42 43 // BlockHashTest is a friend to BlockHash. Expose the protected functions 44 // that will be tested by the children of BlockHashTest. 45 static bool BlockContentsMatch(const char* block1, const char* block2) { 46 return BlockHash::BlockContentsMatch(block1, block2); 47 } 48 49 int FirstMatchingBlock(const BlockHash& block_hash, 50 uint32_t hash_value, 51 const char* block_ptr) const { 52 return block_hash.FirstMatchingBlock(hash_value, block_ptr); 53 } 54 55 int NextMatchingBlock(const BlockHash& block_hash, 56 int block_number, 57 const char* block_ptr) const { 58 return block_hash.NextMatchingBlock(block_number, block_ptr); 59 } 60 61 static int MatchingBytesToLeft(const char* source_match_start, 62 const char* target_match_start, 63 int max_bytes) { 64 return BlockHash::MatchingBytesToLeft(source_match_start, 65 target_match_start, 66 max_bytes); 67 } 68 69 static int MatchingBytesToRight(const char* source_match_end, 70 const char* target_match_end, 71 int max_bytes) { 72 return BlockHash::MatchingBytesToRight(source_match_end, 73 target_match_end, 74 max_bytes); 75 } 76 77 static int StringLengthAsInt(const char* s) { 78 return static_cast<int>(strlen(s)); 79 } 80 81 void InitBlocksToDifferAtNthByte(int n) { 82 CHECK(n < kBlockSize); 83 memset(compare_buffer_1_, 0xBE, kTimingTestSize); 84 memset(compare_buffer_2_, 0xBE, kTimingTestSize); 85 for (int index = n; index < kTimingTestSize; index += kBlockSize) { 86 compare_buffer_1_[index] = 0x00; 87 compare_buffer_2_[index] = 0x01; 88 } 89 } 90 91 void TestAndPrintTimesForCompareFunctions(bool should_be_identical); 92 93 void TimingTestForBlocksThatDifferAtByte(int n) { 94 InitBlocksToDifferAtNthByte(n); 95 std::cout << "Comparing blocks that differ at byte " << n << std::endl; 96 TestAndPrintTimesForCompareFunctions(false); 97 } 98 99 // Copy sample_text_without_spaces and search_string_without_spaces 100 // into newly allocated sample_text and search_string buffers, 101 // but pad them with space characters so that every character 102 // in sample_text_without_spaces matches (kBlockSize - 1) 103 // space characters in sample_text, followed by that character. 104 // For example: 105 // Since sample_text_without_spaces begins "The only thing"..., 106 // if kBlockSize is 4, then 3 space characters will be inserted 107 // between each letter of sample_text, as follows: 108 // " T h e o n l y t h i n g"... 109 // This makes testing simpler, because finding a kBlockSize-byte match 110 // between the sample text and search string only depends on the 111 // trailing letter in each block. 112 static void MakeEachLetterABlock(const char* string_without_spaces, 113 const char** result) { 114 const size_t length_without_spaces = strlen(string_without_spaces); 115 char* padded_text = new char[(kBlockSize * length_without_spaces) + 1]; 116 memset(padded_text, ' ', kBlockSize * length_without_spaces); 117 char* padded_text_ptr = padded_text + (kBlockSize - 1); 118 for (size_t i = 0; i < length_without_spaces; ++i) { 119 *padded_text_ptr = string_without_spaces[i]; 120 padded_text_ptr += kBlockSize; 121 } 122 padded_text[kBlockSize * length_without_spaces] = '\0'; 123 *result = padded_text; 124 } 125 126 static void SetUpTestCase() { 127 MakeEachLetterABlock(sample_text_without_spaces, &sample_text); 128 MakeEachLetterABlock(search_string_without_spaces, &search_string); 129 MakeEachLetterABlock(search_string_altered_without_spaces, 130 &search_string_altered); 131 MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string); 132 MakeEachLetterABlock(search_to_beginning_without_spaces, 133 &search_to_beginning_string); 134 MakeEachLetterABlock(sample_text_many_matches_without_spaces, 135 &sample_text_many_matches); 136 MakeEachLetterABlock(search_string_many_matches_without_spaces, 137 &search_string_many_matches); 138 MakeEachLetterABlock("y", &test_string_y); 139 MakeEachLetterABlock("e", &test_string_e); 140 char* new_test_string_unaligned_e = new char[kBlockSize]; 141 memset(new_test_string_unaligned_e, ' ', kBlockSize); 142 new_test_string_unaligned_e[kBlockSize - 2] = 'e'; 143 test_string_unaligned_e = new_test_string_unaligned_e; 144 char* new_test_string_all_Qs = new char[kBlockSize]; 145 memset(new_test_string_all_Qs, 'Q', kBlockSize); 146 test_string_all_Qs = new_test_string_all_Qs; 147 hashed_y = RollingHash<kBlockSize>::Hash(test_string_y); 148 hashed_e = RollingHash<kBlockSize>::Hash(test_string_e); 149 hashed_f = 150 RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]); 151 hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e); 152 hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs); 153 } 154 155 static void TearDownTestCase() { 156 delete[] sample_text; 157 delete[] search_string; 158 delete[] search_string_altered; 159 delete[] search_to_end_string; 160 delete[] search_to_beginning_string; 161 delete[] sample_text_many_matches; 162 delete[] search_string_many_matches; 163 delete[] test_string_y; 164 delete[] test_string_e; 165 delete[] test_string_unaligned_e; 166 delete[] test_string_all_Qs; 167 } 168 169 // Each block in the sample text and search string is kBlockSize bytes long, 170 // and consists of (kBlockSize - 1) space characters 171 // followed by a single letter of text. 172 173 // Block numbers of certain characters within the sample text: 174 // All six occurrences of "e", in order. 175 static const int block_of_first_e = 2; 176 static const int block_of_second_e = 16; 177 static const int block_of_third_e = 21; 178 static const int block_of_fourth_e = 27; 179 static const int block_of_fifth_e = 35; 180 static const int block_of_sixth_e = 42; 181 182 static const int block_of_y_in_only = 7; 183 // The block number is multiplied by kBlockSize to arrive at the 184 // index, which points to the (kBlockSize - 1) space characters before 185 // the letter specified. 186 // Indices of certain characters within the sample text. 187 static const int index_of_first_e = block_of_first_e * kBlockSize; 188 static const int index_of_fourth_e = block_of_fourth_e * kBlockSize; 189 static const int index_of_sixth_e = block_of_sixth_e * kBlockSize; 190 static const int index_of_y_in_only = block_of_y_in_only * kBlockSize; 191 static const int index_of_space_before_fear_is_fear = 25 * kBlockSize; 192 static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize; 193 static const int index_of_i_in_fear_is_fear = 31 * kBlockSize; 194 static const int index_of_space_before_fear_itself = 33 * kBlockSize; 195 static const int index_of_space_before_itself = 38 * kBlockSize; 196 static const int index_of_ababc = 4 * kBlockSize; 197 198 // Indices of certain characters within the search strings. 199 static const int index_of_second_w_in_what_we = 5 * kBlockSize; 200 static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize; 201 static const int index_of_f_in_fearsome = 16 * kBlockSize; 202 static const int index_of_space_in_eat_itself = 12 * kBlockSize; 203 static const int index_of_i_in_itself = 13 * kBlockSize; 204 static const int index_of_t_in_use_the = 4 * kBlockSize; 205 static const int index_of_o_in_online = 8 * kBlockSize; 206 207 static const char sample_text_without_spaces[]; 208 static const char search_string_without_spaces[]; 209 static const char search_string_altered_without_spaces[]; 210 static const char search_to_end_without_spaces[]; 211 static const char search_to_beginning_without_spaces[]; 212 static const char sample_text_many_matches_without_spaces[]; 213 static const char search_string_many_matches_without_spaces[]; 214 215 static const char* sample_text; 216 static const char* search_string; 217 static const char* search_string_altered; 218 static const char* search_to_end_string; 219 static const char* search_to_beginning_string; 220 static const char* sample_text_many_matches; 221 static const char* search_string_many_matches; 222 223 static const char* test_string_y; 224 static const char* test_string_e; 225 static const char* test_string_all_Qs; 226 static const char* test_string_unaligned_e; 227 228 static uint32_t hashed_y; 229 static uint32_t hashed_e; 230 static uint32_t hashed_f; 231 static uint32_t hashed_unaligned_e; 232 static uint32_t hashed_all_Qs; 233 234 // Boost scoped_ptr, if available, could be used instead of std::auto_ptr. 235 std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup 236 std::auto_ptr<BlockHash> th_; // hash table not populated; 237 // used to test incremental adds 238 239 BlockHash::Match best_match_; 240 char* compare_buffer_1_; 241 char* compare_buffer_2_; 242 int prime_result_; 243 }; 244 245 #ifdef GTEST_HAS_DEATH_TEST 246 typedef BlockHashTest BlockHashDeathTest; 247 #endif // GTEST_HAS_DEATH_TEST 248 249 // The C++ standard requires a separate definition of these static const values, 250 // even though their initializers are given within the class definition. 251 const int BlockHashTest::block_of_first_e; 252 const int BlockHashTest::block_of_second_e; 253 const int BlockHashTest::block_of_third_e; 254 const int BlockHashTest::block_of_fourth_e; 255 const int BlockHashTest::block_of_fifth_e; 256 const int BlockHashTest::block_of_sixth_e; 257 const int BlockHashTest::block_of_y_in_only; 258 const int BlockHashTest::index_of_first_e; 259 const int BlockHashTest::index_of_fourth_e; 260 const int BlockHashTest::index_of_sixth_e; 261 const int BlockHashTest::index_of_y_in_only; 262 const int BlockHashTest::index_of_space_before_fear_is_fear; 263 const int BlockHashTest::index_of_longest_match_ear_is_fear; 264 const int BlockHashTest::index_of_i_in_fear_is_fear; 265 const int BlockHashTest::index_of_space_before_fear_itself; 266 const int BlockHashTest::index_of_space_before_itself; 267 const int BlockHashTest::index_of_ababc; 268 const int BlockHashTest::index_of_second_w_in_what_we; 269 const int BlockHashTest::index_of_second_e_in_what_we_hear; 270 const int BlockHashTest::index_of_f_in_fearsome; 271 const int BlockHashTest::index_of_space_in_eat_itself; 272 const int BlockHashTest::index_of_i_in_itself; 273 const int BlockHashTest::index_of_t_in_use_the; 274 const int BlockHashTest::index_of_o_in_online; 275 276 const char BlockHashTest::sample_text_without_spaces[] = 277 "The only thing we have to fear is fear itself"; 278 279 const char BlockHashTest::search_string_without_spaces[] = 280 "What we hear is fearsome"; 281 282 const char BlockHashTest::search_string_altered_without_spaces[] = 283 "Vhat ve hear is fearsomm"; 284 285 const char BlockHashTest::search_to_end_without_spaces[] = 286 "Pop will eat itself, eventually"; 287 288 const char BlockHashTest::search_to_beginning_without_spaces[] = 289 "Use The online dictionary"; 290 291 const char BlockHashTest::sample_text_many_matches_without_spaces[] = 292 "ababababcab"; 293 294 const char BlockHashTest::search_string_many_matches_without_spaces[] = 295 "ababc"; 296 297 const char* BlockHashTest::sample_text = NULL; 298 const char* BlockHashTest::search_string = NULL; 299 const char* BlockHashTest::search_string_altered = NULL; 300 const char* BlockHashTest::search_to_end_string = NULL; 301 const char* BlockHashTest::search_to_beginning_string = NULL; 302 const char* BlockHashTest::sample_text_many_matches = NULL; 303 const char* BlockHashTest::search_string_many_matches = NULL; 304 305 const char* BlockHashTest::test_string_y = NULL; 306 const char* BlockHashTest::test_string_e = NULL; 307 const char* BlockHashTest::test_string_unaligned_e = NULL; 308 const char* BlockHashTest::test_string_all_Qs = NULL; 309 310 uint32_t BlockHashTest::hashed_y = 0; 311 uint32_t BlockHashTest::hashed_e = 0; 312 uint32_t BlockHashTest::hashed_f = 0; 313 uint32_t BlockHashTest::hashed_unaligned_e = 0; 314 uint32_t BlockHashTest::hashed_all_Qs = 0; 315 316 void BlockHashTest::TestAndPrintTimesForCompareFunctions( 317 bool should_be_identical) { 318 CHECK(compare_buffer_1_ != NULL); 319 CHECK(compare_buffer_2_ != NULL); 320 // Prime the memory cache. 321 prime_result_ = 322 memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize); 323 const char* const block1_limit = 324 &compare_buffer_1_[kTimingTestSize - kBlockSize]; 325 int block_compare_words_result = 0; 326 CycleTimer block_compare_words_timer; 327 block_compare_words_timer.Start(); 328 for (int i = 0; i < kTimingTestIterations; ++i) { 329 const char* block1 = compare_buffer_1_; 330 const char* block2 = compare_buffer_2_; 331 while (block1 < block1_limit) { 332 if (!BlockHash::BlockCompareWords(block1, block2)) { 333 ++block_compare_words_result; 334 } 335 block1 += kBlockSize; 336 block2 += kBlockSize; 337 } 338 } 339 block_compare_words_timer.Stop(); 340 double time_for_block_compare_words = 341 static_cast<double>(block_compare_words_timer.GetInUsec()) 342 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); 343 int block_contents_match_result = 0; 344 CycleTimer block_contents_match_timer; 345 block_contents_match_timer.Start(); 346 for (int i = 0; i < kTimingTestIterations; ++i) { 347 const char* block1 = compare_buffer_1_; 348 const char* block2 = compare_buffer_2_; 349 while (block1 < block1_limit) { 350 if (!BlockHash::BlockContentsMatch(block1, block2)) { 351 ++block_contents_match_result; 352 } 353 block1 += kBlockSize; 354 block2 += kBlockSize; 355 } 356 } 357 block_contents_match_timer.Stop(); 358 double time_for_block_contents_match = 359 static_cast<double>(block_contents_match_timer.GetInUsec()) 360 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); 361 EXPECT_EQ(block_contents_match_result, block_compare_words_result); 362 if (should_be_identical) { 363 CHECK_EQ(0, block_compare_words_result); 364 } else { 365 CHECK_GT(block_compare_words_result, 0); 366 } 367 std::cout << "BlockHash::BlockCompareWords: " 368 << time_for_block_compare_words << " us per operation" << std::endl; 369 std::cout << "BlockHash::BlockContentsMatch: " 370 << time_for_block_contents_match << " us per operation" 371 << std::endl; 372 if (time_for_block_compare_words > 0) { 373 double percent_change = 374 (((time_for_block_contents_match - time_for_block_compare_words) 375 / time_for_block_compare_words) * 100.0); 376 if (percent_change >= 0.0) { 377 std::cout << "BlockContentsMatch is " << percent_change << "%" 378 << " SLOWER than BlockCompareWords" << std::endl; 379 } else { 380 std::cout << "BlockContentsMatch is " << (-percent_change) << "%" 381 << " FASTER than BlockCompareWords" << std::endl; 382 } 383 } 384 #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS) 385 // Only check timings for optimized build. There's plenty of margin: this 386 // check will fail only if BlockContentsMatch is at least twice as slow as 387 // BlockCompareWords. 388 EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match); 389 #endif // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS 390 } 391 392 // The two strings passed to BlockHash::MatchingBytesToLeft do have matching 393 // characters -- in fact, they're the same string -- but since max_bytes is zero 394 // or negative, BlockHash::MatchingBytesToLeft should not read from the strings 395 // and should return 0. 396 TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) { 397 EXPECT_EQ(0, MatchingBytesToLeft( 398 &search_string[index_of_f_in_fearsome], 399 &search_string[index_of_f_in_fearsome], 400 0)); 401 EXPECT_EQ(0, MatchingBytesToRight( 402 &search_string[index_of_f_in_fearsome], 403 &search_string[index_of_f_in_fearsome], 404 0)); 405 } 406 407 TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) { 408 EXPECT_EQ(0, MatchingBytesToLeft( 409 &search_string[index_of_f_in_fearsome], 410 &search_string[index_of_f_in_fearsome], 411 -1)); 412 EXPECT_EQ(0, MatchingBytesToLeft( 413 &search_string[index_of_f_in_fearsome], 414 &search_string[index_of_f_in_fearsome], 415 INT_MIN)); 416 EXPECT_EQ(0, MatchingBytesToRight( 417 &search_string[index_of_f_in_fearsome], 418 &search_string[index_of_f_in_fearsome], 419 -1)); 420 EXPECT_EQ(0, MatchingBytesToRight( 421 &search_string[index_of_f_in_fearsome], 422 &search_string[index_of_f_in_fearsome], 423 INT_MIN)); 424 } 425 426 TEST_F(BlockHashTest, MaxBytesOneMatch) { 427 EXPECT_EQ(1, MatchingBytesToLeft( 428 &search_string[index_of_f_in_fearsome], 429 &search_string[index_of_f_in_fearsome], 430 1)); 431 EXPECT_EQ(1, MatchingBytesToRight( 432 &search_string[index_of_f_in_fearsome], 433 &search_string[index_of_f_in_fearsome], 434 1)); 435 } 436 437 TEST_F(BlockHashTest, MaxBytesOneNoMatch) { 438 EXPECT_EQ(0, MatchingBytesToLeft( 439 &search_string[index_of_f_in_fearsome], 440 &search_string[index_of_second_e_in_what_we_hear], 441 1)); 442 EXPECT_EQ(0, MatchingBytesToRight( 443 &search_string[index_of_f_in_fearsome], 444 &search_string[index_of_second_e_in_what_we_hear - 1], 445 1)); 446 } 447 448 TEST_F(BlockHashTest, LeftLimitedByMaxBytes) { 449 // The number of bytes that match between the original "we hear is fearsome" 450 // and the altered "ve hear is fearsome". 451 const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); 452 const int max_bytes = expected_length - 1; 453 EXPECT_EQ(max_bytes, MatchingBytesToLeft( 454 &search_string[index_of_f_in_fearsome], 455 &search_string_altered[index_of_f_in_fearsome], 456 max_bytes)); 457 } 458 459 TEST_F(BlockHashTest, LeftNotLimited) { 460 // The number of bytes that match between the original "we hear is fearsome" 461 // and the altered "ve hear is fearsome". 462 const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); 463 const int max_bytes = expected_length + 1; 464 EXPECT_EQ(expected_length, MatchingBytesToLeft( 465 &search_string[index_of_f_in_fearsome], 466 &search_string_altered[index_of_f_in_fearsome], 467 max_bytes)); 468 EXPECT_EQ(expected_length, MatchingBytesToLeft( 469 &search_string[index_of_f_in_fearsome], 470 &search_string_altered[index_of_f_in_fearsome], 471 INT_MAX)); 472 } 473 474 TEST_F(BlockHashTest, RightLimitedByMaxBytes) { 475 // The number of bytes that match between the original "fearsome" 476 // and the altered "fearsomm". 477 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) 478 + (kBlockSize - 1); // spacing between letters 479 const int max_bytes = expected_length - 1; 480 EXPECT_EQ(max_bytes, MatchingBytesToRight( 481 &search_string[index_of_f_in_fearsome], 482 &search_string_altered[index_of_f_in_fearsome], 483 max_bytes)); 484 } 485 486 TEST_F(BlockHashTest, RightNotLimited) { 487 // The number of bytes that match between the original "we hear is fearsome" 488 // and the altered "ve hear is fearsome". 489 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) 490 + (kBlockSize - 1); // spacing between letters 491 const int max_bytes = expected_length + 1; 492 EXPECT_EQ(expected_length, MatchingBytesToRight( 493 &search_string[index_of_f_in_fearsome], 494 &search_string_altered[index_of_f_in_fearsome], 495 max_bytes)); 496 EXPECT_EQ(expected_length, MatchingBytesToRight( 497 &search_string[index_of_f_in_fearsome], 498 &search_string_altered[index_of_f_in_fearsome], 499 INT_MAX)); 500 } 501 502 // If this test fails in a non-x86 or non-gcc environment, consider adding 503 // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and 504 // Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc 505 // to use a special implementation (BlockCompareWords) to compare blocks 506 // rather than using standard memcmp. 507 TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) { 508 compare_buffer_1_ = new char[kTimingTestSize]; 509 compare_buffer_2_ = new char[kTimingTestSize]; 510 511 // The value 0xBE is arbitrarily chosen. First test with identical contents 512 // in the buffers, so that the comparison functions cannot short-circuit 513 // and will return true. 514 memset(compare_buffer_1_, 0xBE, kTimingTestSize); 515 memset(compare_buffer_2_, 0xBE, kTimingTestSize); 516 std::cout << "Comparing " 517 << (kTimingTestSize / kBlockSize) << " identical values:" 518 << std::endl; 519 TestAndPrintTimesForCompareFunctions(true); 520 521 // Now change one value in the middle of one buffer, so that the contents 522 // are no longer the same. 523 compare_buffer_1_[kTimingTestSize / 2] = 0x00; 524 std::cout << "Comparing " 525 << ((kTimingTestSize / kBlockSize) - 1) << " identical values" 526 << " and one mismatch:" << std::endl; 527 TestAndPrintTimesForCompareFunctions(false); 528 529 // Set one of the bytes of each block to differ so that 530 // none of the compare operations will return true, and run timing tests. 531 // In practice, BlockHash::BlockContentsMatch will only be called 532 // for two blocks whose hash values match, and the two most important 533 // cases are: (1) the blocks are identical, or (2) none of their bytes match. 534 TimingTestForBlocksThatDifferAtByte(0); 535 TimingTestForBlocksThatDifferAtByte(1); 536 TimingTestForBlocksThatDifferAtByte(kBlockSize / 2); 537 TimingTestForBlocksThatDifferAtByte(kBlockSize - 1); 538 539 delete[] compare_buffer_1_; 540 delete[] compare_buffer_2_; 541 } 542 543 TEST_F(BlockHashTest, FindFailsBeforeHashing) { 544 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); 545 } 546 547 TEST_F(BlockHashTest, HashOneFindOne) { 548 for (int i = 0; i <= index_of_y_in_only; ++i) { 549 th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i])); 550 } 551 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y, 552 test_string_y)); 553 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y)); 554 } 555 556 TEST_F(BlockHashTest, HashAllFindOne) { 557 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y, 558 test_string_y)); 559 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y)); 560 } 561 562 TEST_F(BlockHashTest, NonMatchingTextNotFound) { 563 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs)); 564 } 565 566 // Search for unaligned text. The test string is contained in the 567 // sample text (unlike the non-matching string in NonMatchingTextNotFound, 568 // above), but it is not aligned on a block boundary. FindMatchingBlock 569 // will only work if the test string is aligned on a block boundary. 570 // 571 // " T h e o n l y" 572 // ^^^^ Here is the test string 573 // 574 TEST_F(BlockHashTest, UnalignedTextNotFound) { 575 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e, 576 test_string_unaligned_e)); 577 } 578 579 TEST_F(BlockHashTest, FindSixMatches) { 580 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, 581 test_string_e)); 582 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e, 583 test_string_e)); 584 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e, 585 test_string_e)); 586 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e, 587 test_string_e)); 588 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e, 589 test_string_e)); 590 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e, 591 test_string_e)); 592 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e)); 593 594 // Starting over gives same result 595 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, 596 test_string_e)); 597 } 598 599 TEST_F(BlockHashTest, AddRangeFindThreeMatches) { 600 // Add hash values only for those characters before the fourth instance 601 // of "e" in the sample text. Tests that the ending index 602 // of AddAllBlocksThroughIndex() is not inclusive: only three matches 603 // for "e" should be found. 604 th_->AddAllBlocksThroughIndex(index_of_fourth_e); 605 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 606 test_string_e)); 607 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 608 test_string_e)); 609 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 610 test_string_e)); 611 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); 612 613 // Starting over gives same result 614 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 615 test_string_e)); 616 } 617 618 // Try indices that are not even multiples of the block size. 619 // Add three ranges and verify the results after each 620 // call to AddAllBlocksThroughIndex(). 621 TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) { 622 th_->AddAllBlocksThroughIndex(index_of_first_e + 1); 623 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 624 test_string_e)); 625 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); 626 627 // Starting over gives same result 628 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 629 test_string_e)); 630 631 // Add the second range to expand the result set 632 th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3); 633 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 634 test_string_e)); 635 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 636 test_string_e)); 637 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 638 test_string_e)); 639 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); 640 641 // Starting over gives same result 642 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 643 test_string_e)); 644 645 // Add the third range to expand the result set 646 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); 647 648 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 649 test_string_e)); 650 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 651 test_string_e)); 652 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 653 test_string_e)); 654 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 655 test_string_e)); 656 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); 657 658 // Starting over gives same result 659 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 660 test_string_e)); 661 } 662 663 #ifdef GTEST_HAS_DEATH_TEST 664 TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) { 665 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); 666 667 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 668 test_string_e)); 669 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 670 test_string_e)); 671 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 672 test_string_e)); 673 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 674 test_string_e)); 675 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); 676 677 // Starting over gives same result 678 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 679 test_string_e)); 680 681 // These calls will produce DFATAL error messages, and will do nothing, 682 // since the ranges have already been added. 683 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3), 684 "<"); 685 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1), 686 "<"); 687 688 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 689 test_string_e)); 690 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 691 test_string_e)); 692 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 693 test_string_e)); 694 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 695 test_string_e)); 696 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); 697 698 // Starting over gives same result 699 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 700 test_string_e)); 701 } 702 #endif // GTEST_HAS_DEATH_TEST 703 704 TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) { 705 th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text)); 706 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 707 test_string_e)); 708 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 709 test_string_e)); 710 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 711 test_string_e)); 712 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 713 test_string_e)); 714 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e, 715 test_string_e)); 716 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e, 717 test_string_e)); 718 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e)); 719 720 // Starting over gives same result 721 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 722 test_string_e)); 723 } 724 725 TEST_F(BlockHashTest, ZeroSizeSourceAccepted) { 726 BlockHash zero_sized_hash(sample_text, 0, 0); 727 EXPECT_EQ(true, zero_sized_hash.Init(true)); 728 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); 729 } 730 731 #ifdef GTEST_HAS_DEATH_TEST 732 TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) { 733 EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")), 734 "invalid"); 735 } 736 737 TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) { 738 BlockHash bh(sample_text, strlen(sample_text), 0); 739 EXPECT_TRUE(bh.Init(false)); 740 EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice"); 741 } 742 743 TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) { 744 BlockHash bh(sample_text, strlen(sample_text), 0); 745 EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e), 746 "called before"); 747 } 748 749 TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) { 750 EXPECT_DEBUG_DEATH( 751 th_->AddAllBlocksThroughIndex(static_cast<int>(strlen(sample_text) + 1)), 752 "higher than end"); 753 } 754 #endif // GTEST_HAS_DEATH_TEST 755 756 TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) { 757 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA")); 758 } 759 760 TEST_F(BlockHashTest, FindBestMatch) { 761 dh_->FindBestMatch(hashed_f, 762 &search_string[index_of_f_in_fearsome], 763 search_string, 764 strlen(search_string), 765 &best_match_); 766 EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset()); 767 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); 768 // The match includes the spaces after the final character, 769 // which is why (kBlockSize - 1) is added to the expected best size. 770 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), 771 best_match_.size()); 772 } 773 774 TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) { 775 BlockHash th2(sample_text, strlen(sample_text), 0x10000); 776 th2.Init(true); // hash all blocks 777 th2.FindBestMatch(hashed_f, 778 &search_string[index_of_f_in_fearsome], 779 search_string, 780 strlen(search_string), 781 &best_match_); 782 // Offset should begin with dictionary_size 783 EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear), 784 best_match_.source_offset()); 785 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); 786 // The match includes the spaces after the final character, 787 // which is why (kBlockSize - 1) is added to the expected best size. 788 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), 789 best_match_.size()); 790 } 791 792 TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) { 793 // Hash the "i" in "fear itself" 794 uint32_t hash_value = RollingHash<kBlockSize>::Hash( 795 &search_to_end_string[index_of_i_in_itself]); 796 dh_->FindBestMatch(hash_value, 797 &search_to_end_string[index_of_i_in_itself], 798 search_to_end_string, 799 strlen(search_to_end_string), 800 &best_match_); 801 EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset()); 802 EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset()); 803 EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size()); 804 } 805 806 TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) { 807 // Hash the "i" in "fear itself" 808 uint32_t hash_value = RollingHash<kBlockSize>::Hash( 809 &search_to_beginning_string[index_of_o_in_online]); 810 dh_->FindBestMatch(hash_value, 811 &search_to_beginning_string[index_of_o_in_online], 812 search_to_beginning_string, 813 strlen(search_to_beginning_string), 814 &best_match_); 815 EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary 816 EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset()); 817 // The match includes the spaces after the final character, 818 // which is why (kBlockSize - 1) is added to the expected best size. 819 EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1), 820 best_match_.size()); 821 } 822 823 TEST_F(BlockHashTest, BestMatchWithManyMatches) { 824 BlockHash many_matches_hash(sample_text_many_matches, 825 strlen(sample_text_many_matches), 826 0); 827 EXPECT_TRUE(many_matches_hash.Init(true)); 828 // Hash the " a" at the beginning of the search string "ababc" 829 uint32_t hash_value = 830 RollingHash<kBlockSize>::Hash(search_string_many_matches); 831 many_matches_hash.FindBestMatch(hash_value, 832 search_string_many_matches, 833 search_string_many_matches, 834 strlen(search_string_many_matches), 835 &best_match_); 836 EXPECT_EQ(index_of_ababc, best_match_.source_offset()); 837 EXPECT_EQ(0, best_match_.target_offset()); 838 EXPECT_EQ(strlen(search_string_many_matches), best_match_.size()); 839 } 840 841 TEST_F(BlockHashTest, HashCollisionFindsNoMatch) { 842 char* collision_search_string = new char[strlen(search_string) + 1]; 843 memcpy(collision_search_string, search_string, strlen(search_string) + 1); 844 char* fearsome_location = &collision_search_string[index_of_f_in_fearsome]; 845 846 // Tweak the collision string so that it has the same hash value 847 // but different text. The last four characters of the search string 848 // should be " f", and the bytes given below have the same hash value 849 // as those characters. 850 CHECK_GE(kBlockSize, 4); 851 fearsome_location[kBlockSize - 4] = 0x84; 852 fearsome_location[kBlockSize - 3] = 0xF1; 853 fearsome_location[kBlockSize - 2] = 0x51; 854 fearsome_location[kBlockSize - 1] = 0x00; 855 EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location)); 856 EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome], 857 fearsome_location, 858 kBlockSize)); 859 // No match should be found this time. 860 dh_->FindBestMatch(hashed_f, 861 fearsome_location, 862 collision_search_string, 863 strlen(search_string), // since collision_search_string has embedded \0 864 &best_match_); 865 EXPECT_EQ(-1, best_match_.source_offset()); 866 EXPECT_EQ(-1, best_match_.target_offset()); 867 EXPECT_EQ(0U, best_match_.size()); 868 delete[] collision_search_string; 869 } 870 871 // If the footprint passed to FindBestMatch does not actually match 872 // the search string, it should not find any matches. 873 TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) { 874 dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"! 875 &search_string[index_of_f_in_fearsome], 876 search_string, 877 strlen(search_string), 878 &best_match_); 879 EXPECT_EQ(-1, best_match_.source_offset()); 880 EXPECT_EQ(-1, best_match_.target_offset()); 881 EXPECT_EQ(0U, best_match_.size()); 882 } 883 884 // Use a dictionary containing 1M copies of the letter 'Q', 885 // and target data that also contains 1M Qs. If FindBestMatch 886 // is not throttled to find a maximum number of matches, this 887 // will take a very long time -- several seconds at least. 888 // If this test appears to hang, it is because the throttling code 889 // (see BlockHash::kMaxMatchesToCheck for details) is not working. 890 TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) { 891 const int kTestSize = 1 << 20; // 1M 892 char* huge_dictionary = new char[kTestSize]; 893 memset(huge_dictionary, 'Q', kTestSize); 894 BlockHash huge_bh(huge_dictionary, kTestSize, 0); 895 EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true)); 896 char* huge_target = new char[kTestSize]; 897 memset(huge_target, 'Q', kTestSize); 898 CycleTimer timer; 899 timer.Start(); 900 huge_bh.FindBestMatch(hashed_all_Qs, 901 huge_target + (kTestSize / 2), // middle of target 902 huge_target, 903 kTestSize, 904 &best_match_); 905 timer.Stop(); 906 double elapsed_time_in_us = static_cast<double>(timer.GetInUsec()); 907 std::cout << "Time to search for best match with 1M matches: " 908 << elapsed_time_in_us << " us" << std::endl; 909 // All blocks match the candidate block. FindBestMatch should have checked 910 // a certain number of matches before giving up. The best match 911 // should include at least half the source and target, since the candidate 912 // block was in the middle of the target data. 913 EXPECT_GT((kTestSize / 2), best_match_.source_offset()); 914 EXPECT_GT((kTestSize / 2), best_match_.target_offset()); 915 EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size()); 916 EXPECT_GT(5000000, elapsed_time_in_us); // < 5 seconds 917 #ifdef NDEBUG 918 EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second 919 #endif // NDEBUG 920 delete[] huge_target; 921 delete[] huge_dictionary; 922 } 923 924 #ifdef GTEST_HAS_DEATH_TEST 925 TEST_F(BlockHashDeathTest, AddTooManyBlocks) { 926 for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) { 927 th_->AddOneIndexHash(i * kBlockSize, hashed_e); 928 } 929 // Didn't expect another block to be added 930 EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text), 931 hashed_e), 932 "AddBlock"); 933 } 934 #endif // GTEST_HAS_DEATH_TEST 935 936 } // namespace open_vcdiff 937