1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <config.h> 17 #include "blockhash.h" 18 #include <limits.h> // INT_MIN 19 #include <string.h> // memcpy, memcmp, strlen 20 #include <iostream> 21 #include <memory> // auto_ptr 22 #include "encodetable.h" 23 #include "rolling_hash.h" 24 #include "testing.h" 25 26 namespace open_vcdiff { 27 28 const int kBlockSize = BlockHash::kBlockSize; 29 30 class BlockHashTest : public testing::Test { 31 protected: 32 static const int kTimingTestSize = 1 << 21; // 2M 33 static const int kTimingTestIterations = 32; 34 35 BlockHashTest() { 36 dh_.reset(BlockHash::CreateDictionaryHash(sample_text, 37 strlen(sample_text))); 38 th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0)); 39 EXPECT_TRUE(dh_.get() != NULL); 40 EXPECT_TRUE(th_.get() != NULL); 41 } 42 43 // BlockHashTest is a friend to BlockHash. Expose the protected functions 44 // that will be tested by the children of BlockHashTest. 45 static bool BlockContentsMatch(const char* block1, const char* block2) { 46 return BlockHash::BlockContentsMatch(block1, block2); 47 } 48 49 int FirstMatchingBlock(const BlockHash& block_hash, 50 uint32_t hash_value, 51 const char* block_ptr) const { 52 return block_hash.FirstMatchingBlock(hash_value, block_ptr); 53 } 54 55 int NextMatchingBlock(const BlockHash& block_hash, 56 int block_number, 57 const char* block_ptr) const { 58 return block_hash.NextMatchingBlock(block_number, block_ptr); 59 } 60 61 static int MatchingBytesToLeft(const char* source_match_start, 62 const char* target_match_start, 63 int max_bytes) { 64 return BlockHash::MatchingBytesToLeft(source_match_start, 65 target_match_start, 66 max_bytes); 67 } 68 69 static int MatchingBytesToRight(const char* source_match_end, 70 const char* target_match_end, 71 int max_bytes) { 72 return BlockHash::MatchingBytesToRight(source_match_end, 73 target_match_end, 74 max_bytes); 75 } 76 77 static int StringLengthAsInt(const char* s) { 78 return static_cast<int>(strlen(s)); 79 } 80 81 void InitBlocksToDifferAtNthByte(int n) { 82 CHECK(n < kBlockSize); 83 memset(compare_buffer_1_, 0xBE, kTimingTestSize); 84 memset(compare_buffer_2_, 0xBE, kTimingTestSize); 85 for (int index = n; index < kTimingTestSize; index += kBlockSize) { 86 compare_buffer_1_[index] = 0x00; 87 compare_buffer_2_[index] = 0x01; 88 } 89 } 90 91 void TestAndPrintTimesForCompareFunctions(bool should_be_identical); 92 93 void TimingTestForBlocksThatDifferAtByte(int n) { 94 InitBlocksToDifferAtNthByte(n); 95 std::cout << "Comparing blocks that differ at byte " << n << std::endl; 96 TestAndPrintTimesForCompareFunctions(false); 97 } 98 99 // Copy sample_text_without_spaces and search_string_without_spaces 100 // into newly allocated sample_text and search_string buffers, 101 // but pad them with space characters so that every character 102 // in sample_text_without_spaces matches (kBlockSize - 1) 103 // space characters in sample_text, followed by that character. 104 // For example: 105 // Since sample_text_without_spaces begins "The only thing"..., 106 // if kBlockSize is 4, then 3 space characters will be inserted 107 // between each letter of sample_text, as follows: 108 // " T h e o n l y t h i n g"... 109 // This makes testing simpler, because finding a kBlockSize-byte match 110 // between the sample text and search string only depends on the 111 // trailing letter in each block. 112 static void MakeEachLetterABlock(const char* string_without_spaces, 113 const char** result) { 114 const size_t length_without_spaces = strlen(string_without_spaces); 115 char* padded_text = new char[(kBlockSize * length_without_spaces) + 1]; 116 memset(padded_text, ' ', kBlockSize * length_without_spaces); 117 char* padded_text_ptr = padded_text + (kBlockSize - 1); 118 for (size_t i = 0; i < length_without_spaces; ++i) { 119 *padded_text_ptr = string_without_spaces[i]; 120 padded_text_ptr += kBlockSize; 121 } 122 padded_text[kBlockSize * length_without_spaces] = '\0'; 123 *result = padded_text; 124 } 125 126 static void SetUpTestCase() { 127 MakeEachLetterABlock(sample_text_without_spaces, &sample_text); 128 MakeEachLetterABlock(search_string_without_spaces, &search_string); 129 MakeEachLetterABlock(search_string_altered_without_spaces, 130 &search_string_altered); 131 MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string); 132 MakeEachLetterABlock(search_to_beginning_without_spaces, 133 &search_to_beginning_string); 134 MakeEachLetterABlock(sample_text_many_matches_without_spaces, 135 &sample_text_many_matches); 136 MakeEachLetterABlock(search_string_many_matches_without_spaces, 137 &search_string_many_matches); 138 MakeEachLetterABlock("y", &test_string_y); 139 MakeEachLetterABlock("e", &test_string_e); 140 char* new_test_string_unaligned_e = new char[kBlockSize]; 141 memset(new_test_string_unaligned_e, ' ', kBlockSize); 142 new_test_string_unaligned_e[kBlockSize - 2] = 'e'; 143 test_string_unaligned_e = new_test_string_unaligned_e; 144 char* new_test_string_all_Qs = new char[kBlockSize]; 145 memset(new_test_string_all_Qs, 'Q', kBlockSize); 146 test_string_all_Qs = new_test_string_all_Qs; 147 hashed_y = RollingHash<kBlockSize>::Hash(test_string_y); 148 hashed_e = RollingHash<kBlockSize>::Hash(test_string_e); 149 hashed_f = 150 RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]); 151 hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e); 152 hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs); 153 } 154 155 static void TearDownTestCase() { 156 delete[] sample_text; 157 delete[] search_string; 158 delete[] search_string_altered; 159 delete[] search_to_end_string; 160 delete[] search_to_beginning_string; 161 delete[] sample_text_many_matches; 162 delete[] search_string_many_matches; 163 delete[] test_string_y; 164 delete[] test_string_e; 165 delete[] test_string_unaligned_e; 166 delete[] test_string_all_Qs; 167 } 168 169 // Each block in the sample text and search string is kBlockSize bytes long, 170 // and consists of (kBlockSize - 1) space characters 171 // followed by a single letter of text. 172 173 // Block numbers of certain characters within the sample text: 174 // All six occurrences of "e", in order. 175 static const int block_of_first_e = 2; 176 static const int block_of_second_e = 16; 177 static const int block_of_third_e = 21; 178 static const int block_of_fourth_e = 27; 179 static const int block_of_fifth_e = 35; 180 static const int block_of_sixth_e = 42; 181 182 static const int block_of_y_in_only = 7; 183 // The block number is multiplied by kBlockSize to arrive at the 184 // index, which points to the (kBlockSize - 1) space characters before 185 // the letter specified. 186 // Indices of certain characters within the sample text. 187 static const int index_of_first_e = block_of_first_e * kBlockSize; 188 static const int index_of_fourth_e = block_of_fourth_e * kBlockSize; 189 static const int index_of_sixth_e = block_of_sixth_e * kBlockSize; 190 static const int index_of_y_in_only = block_of_y_in_only * kBlockSize; 191 static const int index_of_space_before_fear_is_fear = 25 * kBlockSize; 192 static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize; 193 static const int index_of_i_in_fear_is_fear = 31 * kBlockSize; 194 static const int index_of_space_before_fear_itself = 33 * kBlockSize; 195 static const int index_of_space_before_itself = 38 * kBlockSize; 196 static const int index_of_ababc = 4 * kBlockSize; 197 198 // Indices of certain characters within the search strings. 199 static const int index_of_second_w_in_what_we = 5 * kBlockSize; 200 static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize; 201 static const int index_of_f_in_fearsome = 16 * kBlockSize; 202 static const int index_of_space_in_eat_itself = 12 * kBlockSize; 203 static const int index_of_i_in_itself = 13 * kBlockSize; 204 static const int index_of_t_in_use_the = 4 * kBlockSize; 205 static const int index_of_o_in_online = 8 * kBlockSize; 206 207 static const char sample_text_without_spaces[]; 208 static const char search_string_without_spaces[]; 209 static const char search_string_altered_without_spaces[]; 210 static const char search_to_end_without_spaces[]; 211 static const char search_to_beginning_without_spaces[]; 212 static const char sample_text_many_matches_without_spaces[]; 213 static const char search_string_many_matches_without_spaces[]; 214 215 static const char* sample_text; 216 static const char* search_string; 217 static const char* search_string_altered; 218 static const char* search_to_end_string; 219 static const char* search_to_beginning_string; 220 static const char* sample_text_many_matches; 221 static const char* search_string_many_matches; 222 223 static const char* test_string_y; 224 static const char* test_string_e; 225 static const char* test_string_all_Qs; 226 static const char* test_string_unaligned_e; 227 228 static uint32_t hashed_y; 229 static uint32_t hashed_e; 230 static uint32_t hashed_f; 231 static uint32_t hashed_unaligned_e; 232 static uint32_t hashed_all_Qs; 233 234 // Boost scoped_ptr, if available, could be used instead of std::auto_ptr. 235 std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup 236 std::auto_ptr<BlockHash> th_; // hash table not populated; 237 // used to test incremental adds 238 239 BlockHash::Match best_match_; 240 char* compare_buffer_1_; 241 char* compare_buffer_2_; 242 int prime_result_; 243 }; 244 245 #ifdef GTEST_HAS_DEATH_TEST 246 typedef BlockHashTest BlockHashDeathTest; 247 #endif // GTEST_HAS_DEATH_TEST 248 249 // The C++ standard requires a separate definition of these static const values, 250 // even though their initializers are given within the class definition. 251 const int BlockHashTest::block_of_first_e; 252 const int BlockHashTest::block_of_second_e; 253 const int BlockHashTest::block_of_third_e; 254 const int BlockHashTest::block_of_fourth_e; 255 const int BlockHashTest::block_of_fifth_e; 256 const int BlockHashTest::block_of_sixth_e; 257 const int BlockHashTest::block_of_y_in_only; 258 const int BlockHashTest::index_of_first_e; 259 const int BlockHashTest::index_of_fourth_e; 260 const int BlockHashTest::index_of_sixth_e; 261 const int BlockHashTest::index_of_y_in_only; 262 const int BlockHashTest::index_of_space_before_fear_is_fear; 263 const int BlockHashTest::index_of_longest_match_ear_is_fear; 264 const int BlockHashTest::index_of_i_in_fear_is_fear; 265 const int BlockHashTest::index_of_space_before_fear_itself; 266 const int BlockHashTest::index_of_space_before_itself; 267 const int BlockHashTest::index_of_ababc; 268 const int BlockHashTest::index_of_second_w_in_what_we; 269 const int BlockHashTest::index_of_second_e_in_what_we_hear; 270 const int BlockHashTest::index_of_f_in_fearsome; 271 const int BlockHashTest::index_of_space_in_eat_itself; 272 const int BlockHashTest::index_of_i_in_itself; 273 const int BlockHashTest::index_of_t_in_use_the; 274 const int BlockHashTest::index_of_o_in_online; 275 276 const char BlockHashTest::sample_text_without_spaces[] = 277 "The only thing we have to fear is fear itself"; 278 279 const char BlockHashTest::search_string_without_spaces[] = 280 "What we hear is fearsome"; 281 282 const char BlockHashTest::search_string_altered_without_spaces[] = 283 "Vhat ve hear is fearsomm"; 284 285 const char BlockHashTest::search_to_end_without_spaces[] = 286 "Pop will eat itself, eventually"; 287 288 const char BlockHashTest::search_to_beginning_without_spaces[] = 289 "Use The online dictionary"; 290 291 const char BlockHashTest::sample_text_many_matches_without_spaces[] = 292 "ababababcab"; 293 294 const char BlockHashTest::search_string_many_matches_without_spaces[] = 295 "ababc"; 296 297 const char* BlockHashTest::sample_text = NULL; 298 const char* BlockHashTest::search_string = NULL; 299 const char* BlockHashTest::search_string_altered = NULL; 300 const char* BlockHashTest::search_to_end_string = NULL; 301 const char* BlockHashTest::search_to_beginning_string = NULL; 302 const char* BlockHashTest::sample_text_many_matches = NULL; 303 const char* BlockHashTest::search_string_many_matches = NULL; 304 305 const char* BlockHashTest::test_string_y = NULL; 306 const char* BlockHashTest::test_string_e = NULL; 307 const char* BlockHashTest::test_string_unaligned_e = NULL; 308 const char* BlockHashTest::test_string_all_Qs = NULL; 309 310 uint32_t BlockHashTest::hashed_y = 0; 311 uint32_t BlockHashTest::hashed_e = 0; 312 uint32_t BlockHashTest::hashed_f = 0; 313 uint32_t BlockHashTest::hashed_unaligned_e = 0; 314 uint32_t BlockHashTest::hashed_all_Qs = 0; 315 316 void BlockHashTest::TestAndPrintTimesForCompareFunctions( 317 bool should_be_identical) { 318 CHECK(compare_buffer_1_ != NULL); 319 CHECK(compare_buffer_2_ != NULL); 320 // Prime the memory cache. 321 prime_result_ = 322 memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize); 323 const char* const block1_limit = 324 &compare_buffer_1_[kTimingTestSize - kBlockSize]; 325 int block_compare_words_result = 0; 326 CycleTimer block_compare_words_timer; 327 block_compare_words_timer.Start(); 328 for (int i = 0; i < kTimingTestIterations; ++i) { 329 const char* block1 = compare_buffer_1_; 330 const char* block2 = compare_buffer_2_; 331 while (block1 < block1_limit) { 332 if (!BlockHash::BlockCompareWords(block1, block2)) { 333 ++block_compare_words_result; 334 } 335 block1 += kBlockSize; 336 block2 += kBlockSize; 337 } 338 } 339 block_compare_words_timer.Stop(); 340 double time_for_block_compare_words = 341 static_cast<double>(block_compare_words_timer.GetInUsec()) 342 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); 343 int block_contents_match_result = 0; 344 CycleTimer block_contents_match_timer; 345 block_contents_match_timer.Start(); 346 for (int i = 0; i < kTimingTestIterations; ++i) { 347 const char* block1 = compare_buffer_1_; 348 const char* block2 = compare_buffer_2_; 349 while (block1 < block1_limit) { 350 if (!BlockHash::BlockContentsMatch(block1, block2)) { 351 ++block_contents_match_result; 352 } 353 block1 += kBlockSize; 354 block2 += kBlockSize; 355 } 356 } 357 block_contents_match_timer.Stop(); 358 double time_for_block_contents_match = 359 static_cast<double>(block_contents_match_timer.GetInUsec()) 360 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations); 361 EXPECT_EQ(block_contents_match_result, block_compare_words_result); 362 if (should_be_identical) { 363 CHECK_EQ(0, block_compare_words_result); 364 } else { 365 CHECK_GT(block_compare_words_result, 0); 366 } 367 std::cout << "BlockHash::BlockCompareWords: " 368 << time_for_block_compare_words << " us per operation" << std::endl; 369 std::cout << "BlockHash::BlockContentsMatch: " 370 << time_for_block_contents_match << " us per operation" 371 << std::endl; 372 if (time_for_block_compare_words > 0) { 373 double percent_change = 374 (((time_for_block_contents_match - time_for_block_compare_words) 375 / time_for_block_compare_words) * 100.0); 376 if (percent_change >= 0.0) { 377 std::cout << "BlockContentsMatch is " << percent_change << "%" 378 << " SLOWER than BlockCompareWords" << std::endl; 379 } else { 380 std::cout << "BlockContentsMatch is " << (-percent_change) << "%" 381 << " FASTER than BlockCompareWords" << std::endl; 382 } 383 } 384 #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS) 385 // Only check timings for optimized build. There's plenty of margin: this 386 // check will fail only if BlockContentsMatch is at least twice as slow as 387 // BlockCompareWords. 388 EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match); 389 #endif // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS 390 } 391 392 // The two strings passed to BlockHash::MatchingBytesToLeft do have matching 393 // characters -- in fact, they're the same string -- but since max_bytes is zero 394 // or negative, BlockHash::MatchingBytesToLeft should not read from the strings 395 // and should return 0. 396 TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) { 397 EXPECT_EQ(0, MatchingBytesToLeft( 398 &search_string[index_of_f_in_fearsome], 399 &search_string[index_of_f_in_fearsome], 400 0)); 401 EXPECT_EQ(0, MatchingBytesToRight( 402 &search_string[index_of_f_in_fearsome], 403 &search_string[index_of_f_in_fearsome], 404 0)); 405 } 406 407 TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) { 408 EXPECT_EQ(0, MatchingBytesToLeft( 409 &search_string[index_of_f_in_fearsome], 410 &search_string[index_of_f_in_fearsome], 411 -1)); 412 EXPECT_EQ(0, MatchingBytesToLeft( 413 &search_string[index_of_f_in_fearsome], 414 &search_string[index_of_f_in_fearsome], 415 INT_MIN)); 416 EXPECT_EQ(0, MatchingBytesToRight( 417 &search_string[index_of_f_in_fearsome], 418 &search_string[index_of_f_in_fearsome], 419 -1)); 420 EXPECT_EQ(0, MatchingBytesToRight( 421 &search_string[index_of_f_in_fearsome], 422 &search_string[index_of_f_in_fearsome], 423 INT_MIN)); 424 } 425 426 TEST_F(BlockHashTest, MaxBytesOneMatch) { 427 EXPECT_EQ(1, MatchingBytesToLeft( 428 &search_string[index_of_f_in_fearsome], 429 &search_string[index_of_f_in_fearsome], 430 1)); 431 EXPECT_EQ(1, MatchingBytesToRight( 432 &search_string[index_of_f_in_fearsome], 433 &search_string[index_of_f_in_fearsome], 434 1)); 435 } 436 437 TEST_F(BlockHashTest, MaxBytesOneNoMatch) { 438 EXPECT_EQ(0, MatchingBytesToLeft( 439 &search_string[index_of_f_in_fearsome], 440 &search_string[index_of_second_e_in_what_we_hear], 441 1)); 442 EXPECT_EQ(0, MatchingBytesToRight( 443 &search_string[index_of_f_in_fearsome], 444 &search_string[index_of_second_e_in_what_we_hear - 1], 445 1)); 446 } 447 448 TEST_F(BlockHashTest, LeftLimitedByMaxBytes) { 449 // The number of bytes that match between the original "we hear is fearsome" 450 // and the altered "ve hear is fearsome". 451 const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); 452 const int max_bytes = expected_length - 1; 453 EXPECT_EQ(max_bytes, MatchingBytesToLeft( 454 &search_string[index_of_f_in_fearsome], 455 &search_string_altered[index_of_f_in_fearsome], 456 max_bytes)); 457 } 458 459 TEST_F(BlockHashTest, LeftNotLimited) { 460 // The number of bytes that match between the original "we hear is fearsome" 461 // and the altered "ve hear is fearsome". 462 const int expected_length = kBlockSize * StringLengthAsInt("e hear is "); 463 const int max_bytes = expected_length + 1; 464 EXPECT_EQ(expected_length, MatchingBytesToLeft( 465 &search_string[index_of_f_in_fearsome], 466 &search_string_altered[index_of_f_in_fearsome], 467 max_bytes)); 468 EXPECT_EQ(expected_length, MatchingBytesToLeft( 469 &search_string[index_of_f_in_fearsome], 470 &search_string_altered[index_of_f_in_fearsome], 471 INT_MAX)); 472 } 473 474 TEST_F(BlockHashTest, RightLimitedByMaxBytes) { 475 // The number of bytes that match between the original "fearsome" 476 // and the altered "fearsomm". 477 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) 478 + (kBlockSize - 1); // spacing between letters 479 const int max_bytes = expected_length - 1; 480 EXPECT_EQ(max_bytes, MatchingBytesToRight( 481 &search_string[index_of_f_in_fearsome], 482 &search_string_altered[index_of_f_in_fearsome], 483 max_bytes)); 484 } 485 486 TEST_F(BlockHashTest, RightNotLimited) { 487 // The number of bytes that match between the original "we hear is fearsome" 488 // and the altered "ve hear is fearsome". 489 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom")) 490 + (kBlockSize - 1); // spacing between letters 491 const int max_bytes = expected_length + 1; 492 EXPECT_EQ(expected_length, MatchingBytesToRight( 493 &search_string[index_of_f_in_fearsome], 494 &search_string_altered[index_of_f_in_fearsome], 495 max_bytes)); 496 EXPECT_EQ(expected_length, MatchingBytesToRight( 497 &search_string[index_of_f_in_fearsome], 498 &search_string_altered[index_of_f_in_fearsome], 499 INT_MAX)); 500 } 501 502 // If this test fails in a non-x86 or non-gcc environment, consider adding 503 // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and 504 // Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc 505 // to use a special implementation (BlockCompareWords) to compare blocks 506 // rather than using standard memcmp. 507 TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) { 508 compare_buffer_1_ = new char[kTimingTestSize]; 509 compare_buffer_2_ = new char[kTimingTestSize]; 510 511 // The value 0xBE is arbitrarily chosen. First test with identical contents 512 // in the buffers, so that the comparison functions cannot short-circuit 513 // and will return true. 514 memset(compare_buffer_1_, 0xBE, kTimingTestSize); 515 memset(compare_buffer_2_, 0xBE, kTimingTestSize); 516 std::cout << "Comparing " 517 << (kTimingTestSize / kBlockSize) << " identical values:" 518 << std::endl; 519 TestAndPrintTimesForCompareFunctions(true); 520 521 // Now change one value in the middle of one buffer, so that the contents 522 // are no longer the same. 523 compare_buffer_1_[kTimingTestSize / 2] = 0x00; 524 std::cout << "Comparing " 525 << ((kTimingTestSize / kBlockSize) - 1) << " identical values" 526 << " and one mismatch:" << std::endl; 527 TestAndPrintTimesForCompareFunctions(false); 528 529 // Set one of the bytes of each block to differ so that 530 // none of the compare operations will return true, and run timing tests. 531 // In practice, BlockHash::BlockContentsMatch will only be called 532 // for two blocks whose hash values match, and the two most important 533 // cases are: (1) the blocks are identical, or (2) none of their bytes match. 534 TimingTestForBlocksThatDifferAtByte(0); 535 TimingTestForBlocksThatDifferAtByte(1); 536 TimingTestForBlocksThatDifferAtByte(kBlockSize / 2); 537 TimingTestForBlocksThatDifferAtByte(kBlockSize - 1); 538 539 delete[] compare_buffer_1_; 540 delete[] compare_buffer_2_; 541 } 542 543 TEST_F(BlockHashTest, FindFailsBeforeHashing) { 544 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); 545 } 546 547 TEST_F(BlockHashTest, HashOneFindOne) { 548 for (int i = 0; i <= index_of_y_in_only; ++i) { 549 th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i])); 550 } 551 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y, 552 test_string_y)); 553 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y)); 554 } 555 556 TEST_F(BlockHashTest, HashAllFindOne) { 557 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y, 558 test_string_y)); 559 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y)); 560 } 561 562 TEST_F(BlockHashTest, NonMatchingTextNotFound) { 563 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs)); 564 } 565 566 // Search for unaligned text. The test string is contained in the 567 // sample text (unlike the non-matching string in NonMatchingTextNotFound, 568 // above), but it is not aligned on a block boundary. FindMatchingBlock 569 // will only work if the test string is aligned on a block boundary. 570 // 571 // " T h e o n l y" 572 // ^^^^ Here is the test string 573 // 574 TEST_F(BlockHashTest, UnalignedTextNotFound) { 575 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e, 576 test_string_unaligned_e)); 577 } 578 579 TEST_F(BlockHashTest, FindSixMatches) { 580 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, 581 test_string_e)); 582 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e, 583 test_string_e)); 584 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e, 585 test_string_e)); 586 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e, 587 test_string_e)); 588 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e, 589 test_string_e)); 590 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e, 591 test_string_e)); 592 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e)); 593 594 // Starting over gives same result 595 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e, 596 test_string_e)); 597 } 598 599 TEST_F(BlockHashTest, AddRangeFindThreeMatches) { 600 // Add hash values only for those characters before the fourth instance 601 // of "e" in the sample text. Tests that the ending index 602 // of AddAllBlocksThroughIndex() is not inclusive: only three matches 603 // for "e" should be found. 604 th_->AddAllBlocksThroughIndex(index_of_fourth_e); 605 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 606 test_string_e)); 607 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 608 test_string_e)); 609 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 610 test_string_e)); 611 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); 612 613 // Starting over gives same result 614 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 615 test_string_e)); 616 } 617 618 // Try indices that are not even multiples of the block size. 619 // Add three ranges and verify the results after each 620 // call to AddAllBlocksThroughIndex(). 621 TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) { 622 th_->AddAllBlocksThroughIndex(index_of_first_e + 1); 623 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 624 test_string_e)); 625 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e)); 626 627 // Starting over gives same result 628 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 629 test_string_e)); 630 631 // Add the second range to expand the result set 632 th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3); 633 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 634 test_string_e)); 635 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 636 test_string_e)); 637 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 638 test_string_e)); 639 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e)); 640 641 // Starting over gives same result 642 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 643 test_string_e)); 644 645 // Add the third range to expand the result set 646 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); 647 648 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 649 test_string_e)); 650 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 651 test_string_e)); 652 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 653 test_string_e)); 654 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 655 test_string_e)); 656 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); 657 658 // Starting over gives same result 659 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 660 test_string_e)); 661 } 662 663 #ifdef GTEST_HAS_DEATH_TEST 664 TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) { 665 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1); 666 667 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 668 test_string_e)); 669 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 670 test_string_e)); 671 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 672 test_string_e)); 673 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 674 test_string_e)); 675 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); 676 677 // Starting over gives same result 678 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 679 test_string_e)); 680 681 // These calls will produce DFATAL error messages, and will do nothing, 682 // since the ranges have already been added. 683 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3), 684 "<"); 685 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1), 686 "<"); 687 688 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 689 test_string_e)); 690 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 691 test_string_e)); 692 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 693 test_string_e)); 694 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 695 test_string_e)); 696 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e)); 697 698 // Starting over gives same result 699 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 700 test_string_e)); 701 } 702 #endif // GTEST_HAS_DEATH_TEST 703 704 TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) { 705 th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text)); 706 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 707 test_string_e)); 708 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e, 709 test_string_e)); 710 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e, 711 test_string_e)); 712 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e, 713 test_string_e)); 714 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e, 715 test_string_e)); 716 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e, 717 test_string_e)); 718 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e)); 719 720 // Starting over gives same result 721 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e, 722 test_string_e)); 723 } 724 725 TEST_F(BlockHashTest, ZeroSizeSourceAccepted) { 726 BlockHash zero_sized_hash(sample_text, 0, 0); 727 EXPECT_EQ(true, zero_sized_hash.Init(true)); 728 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y)); 729 } 730 731 #ifdef GTEST_HAS_DEATH_TEST 732 TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) { 733 EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")), 734 "invalid"); 735 } 736 737 TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) { 738 BlockHash bh(sample_text, strlen(sample_text), 0); 739 EXPECT_TRUE(bh.Init(false)); 740 EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice"); 741 } 742 743 TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) { 744 BlockHash bh(sample_text, strlen(sample_text), 0); 745 EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e), 746 "called before"); 747 } 748 749 TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) { 750 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(strlen(sample_text) + 1), 751 "higher than end"); 752 } 753 #endif // GTEST_HAS_DEATH_TEST 754 755 TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) { 756 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA")); 757 } 758 759 TEST_F(BlockHashTest, FindBestMatch) { 760 dh_->FindBestMatch(hashed_f, 761 &search_string[index_of_f_in_fearsome], 762 search_string, 763 strlen(search_string), 764 &best_match_); 765 EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset()); 766 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); 767 // The match includes the spaces after the final character, 768 // which is why (kBlockSize - 1) is added to the expected best size. 769 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), 770 best_match_.size()); 771 } 772 773 TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) { 774 BlockHash th2(sample_text, strlen(sample_text), 0x10000); 775 th2.Init(true); // hash all blocks 776 th2.FindBestMatch(hashed_f, 777 &search_string[index_of_f_in_fearsome], 778 search_string, 779 strlen(search_string), 780 &best_match_); 781 // Offset should begin with dictionary_size 782 EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear), 783 best_match_.source_offset()); 784 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset()); 785 // The match includes the spaces after the final character, 786 // which is why (kBlockSize - 1) is added to the expected best size. 787 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1), 788 best_match_.size()); 789 } 790 791 TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) { 792 // Hash the "i" in "fear itself" 793 uint32_t hash_value = RollingHash<kBlockSize>::Hash( 794 &search_to_end_string[index_of_i_in_itself]); 795 dh_->FindBestMatch(hash_value, 796 &search_to_end_string[index_of_i_in_itself], 797 search_to_end_string, 798 strlen(search_to_end_string), 799 &best_match_); 800 EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset()); 801 EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset()); 802 EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size()); 803 } 804 805 TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) { 806 // Hash the "i" in "fear itself" 807 uint32_t hash_value = RollingHash<kBlockSize>::Hash( 808 &search_to_beginning_string[index_of_o_in_online]); 809 dh_->FindBestMatch(hash_value, 810 &search_to_beginning_string[index_of_o_in_online], 811 search_to_beginning_string, 812 strlen(search_to_beginning_string), 813 &best_match_); 814 EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary 815 EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset()); 816 // The match includes the spaces after the final character, 817 // which is why (kBlockSize - 1) is added to the expected best size. 818 EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1), 819 best_match_.size()); 820 } 821 822 TEST_F(BlockHashTest, BestMatchWithManyMatches) { 823 BlockHash many_matches_hash(sample_text_many_matches, 824 strlen(sample_text_many_matches), 825 0); 826 EXPECT_TRUE(many_matches_hash.Init(true)); 827 // Hash the " a" at the beginning of the search string "ababc" 828 uint32_t hash_value = 829 RollingHash<kBlockSize>::Hash(search_string_many_matches); 830 many_matches_hash.FindBestMatch(hash_value, 831 search_string_many_matches, 832 search_string_many_matches, 833 strlen(search_string_many_matches), 834 &best_match_); 835 EXPECT_EQ(index_of_ababc, best_match_.source_offset()); 836 EXPECT_EQ(0, best_match_.target_offset()); 837 EXPECT_EQ(strlen(search_string_many_matches), best_match_.size()); 838 } 839 840 TEST_F(BlockHashTest, HashCollisionFindsNoMatch) { 841 char* collision_search_string = new char[strlen(search_string) + 1]; 842 memcpy(collision_search_string, search_string, strlen(search_string) + 1); 843 char* fearsome_location = &collision_search_string[index_of_f_in_fearsome]; 844 845 // Tweak the collision string so that it has the same hash value 846 // but different text. The last four characters of the search string 847 // should be " f", and the bytes given below have the same hash value 848 // as those characters. 849 CHECK_GE(kBlockSize, 4); 850 fearsome_location[kBlockSize - 4] = 0x84; 851 fearsome_location[kBlockSize - 3] = 0xF1; 852 fearsome_location[kBlockSize - 2] = 0x51; 853 fearsome_location[kBlockSize - 1] = 0x00; 854 EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location)); 855 EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome], 856 fearsome_location, 857 kBlockSize)); 858 // No match should be found this time. 859 dh_->FindBestMatch(hashed_f, 860 fearsome_location, 861 collision_search_string, 862 strlen(search_string), // since collision_search_string has embedded \0 863 &best_match_); 864 EXPECT_EQ(-1, best_match_.source_offset()); 865 EXPECT_EQ(-1, best_match_.target_offset()); 866 EXPECT_EQ(0U, best_match_.size()); 867 delete[] collision_search_string; 868 } 869 870 // If the footprint passed to FindBestMatch does not actually match 871 // the search string, it should not find any matches. 872 TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) { 873 dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"! 874 &search_string[index_of_f_in_fearsome], 875 search_string, 876 strlen(search_string), 877 &best_match_); 878 EXPECT_EQ(-1, best_match_.source_offset()); 879 EXPECT_EQ(-1, best_match_.target_offset()); 880 EXPECT_EQ(0U, best_match_.size()); 881 } 882 883 // Use a dictionary containing 1M copies of the letter 'Q', 884 // and target data that also contains 1M Qs. If FindBestMatch 885 // is not throttled to find a maximum number of matches, this 886 // will take a very long time -- several seconds at least. 887 // If this test appears to hang, it is because the throttling code 888 // (see BlockHash::kMaxMatchesToCheck for details) is not working. 889 TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) { 890 const int kTestSize = 1 << 20; // 1M 891 char* huge_dictionary = new char[kTestSize]; 892 memset(huge_dictionary, 'Q', kTestSize); 893 BlockHash huge_bh(huge_dictionary, kTestSize, 0); 894 EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true)); 895 char* huge_target = new char[kTestSize]; 896 memset(huge_target, 'Q', kTestSize); 897 CycleTimer timer; 898 timer.Start(); 899 huge_bh.FindBestMatch(hashed_all_Qs, 900 huge_target + (kTestSize / 2), // middle of target 901 huge_target, 902 kTestSize, 903 &best_match_); 904 timer.Stop(); 905 double elapsed_time_in_us = static_cast<double>(timer.GetInUsec()); 906 std::cout << "Time to search for best match with 1M matches: " 907 << elapsed_time_in_us << " us" << std::endl; 908 // All blocks match the candidate block. FindBestMatch should have checked 909 // a certain number of matches before giving up. The best match 910 // should include at least half the source and target, since the candidate 911 // block was in the middle of the target data. 912 EXPECT_GT((kTestSize / 2), best_match_.source_offset()); 913 EXPECT_GT((kTestSize / 2), best_match_.target_offset()); 914 EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size()); 915 EXPECT_GT(5000000, elapsed_time_in_us); // < 5 seconds 916 #ifdef NDEBUG 917 EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second 918 #endif // NDEBUG 919 delete[] huge_target; 920 delete[] huge_dictionary; 921 } 922 923 #ifdef GTEST_HAS_DEATH_TEST 924 TEST_F(BlockHashDeathTest, AddTooManyBlocks) { 925 for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) { 926 th_->AddOneIndexHash(i * kBlockSize, hashed_e); 927 } 928 // Didn't expect another block to be added 929 EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text), 930 hashed_e), 931 "AddBlock"); 932 } 933 #endif // GTEST_HAS_DEATH_TEST 934 935 } // namespace open_vcdiff 936