Home | History | Annotate | Download | only in src
      1 // Copyright 2008 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include <config.h>
     17 #include "blockhash.h"
     18 #include <limits.h>  // INT_MIN
     19 #include <string.h>  // memcpy, memcmp, strlen
     20 #include <iostream>
     21 #include <memory>  // auto_ptr
     22 #include "encodetable.h"
     23 #include "rolling_hash.h"
     24 #include "testing.h"
     25 
     26 namespace open_vcdiff {
     27 
     28 const int kBlockSize = BlockHash::kBlockSize;
     29 
     30 class BlockHashTest : public testing::Test {
     31  protected:
     32   static const int kTimingTestSize = 1 << 21;  // 2M
     33   static const int kTimingTestIterations = 32;
     34 
     35   BlockHashTest() {
     36     dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
     37                                               strlen(sample_text)));
     38     th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
     39     EXPECT_TRUE(dh_.get() != NULL);
     40     EXPECT_TRUE(th_.get() != NULL);
     41   }
     42 
     43   // BlockHashTest is a friend to BlockHash.  Expose the protected functions
     44   // that will be tested by the children of BlockHashTest.
     45   static bool BlockContentsMatch(const char* block1, const char* block2) {
     46     return BlockHash::BlockContentsMatch(block1, block2);
     47   }
     48 
     49   int FirstMatchingBlock(const BlockHash& block_hash,
     50                          uint32_t hash_value,
     51                          const char* block_ptr) const {
     52     return block_hash.FirstMatchingBlock(hash_value, block_ptr);
     53   }
     54 
     55   int NextMatchingBlock(const BlockHash& block_hash,
     56                         int block_number,
     57                         const char* block_ptr) const {
     58     return block_hash.NextMatchingBlock(block_number, block_ptr);
     59   }
     60 
     61   static int MatchingBytesToLeft(const char* source_match_start,
     62                                  const char* target_match_start,
     63                                  int max_bytes) {
     64     return BlockHash::MatchingBytesToLeft(source_match_start,
     65                                           target_match_start,
     66                                           max_bytes);
     67   }
     68 
     69   static int MatchingBytesToRight(const char* source_match_end,
     70                                   const char* target_match_end,
     71                                   int max_bytes) {
     72     return BlockHash::MatchingBytesToRight(source_match_end,
     73                                            target_match_end,
     74                                            max_bytes);
     75   }
     76 
     77   static int StringLengthAsInt(const char* s) {
     78     return static_cast<int>(strlen(s));
     79   }
     80 
     81   void InitBlocksToDifferAtNthByte(int n) {
     82     CHECK(n < kBlockSize);
     83     memset(compare_buffer_1_, 0xBE, kTimingTestSize);
     84     memset(compare_buffer_2_, 0xBE, kTimingTestSize);
     85     for (int index = n; index < kTimingTestSize; index += kBlockSize) {
     86       compare_buffer_1_[index] = 0x00;
     87       compare_buffer_2_[index] = 0x01;
     88     }
     89   }
     90 
     91   void TestAndPrintTimesForCompareFunctions(bool should_be_identical);
     92 
     93   void TimingTestForBlocksThatDifferAtByte(int n) {
     94     InitBlocksToDifferAtNthByte(n);
     95     std::cout << "Comparing blocks that differ at byte " << n << std::endl;
     96     TestAndPrintTimesForCompareFunctions(false);
     97   }
     98 
     99   // Copy sample_text_without_spaces and search_string_without_spaces
    100   // into newly allocated sample_text and search_string buffers,
    101   // but pad them with space characters so that every character
    102   // in sample_text_without_spaces matches (kBlockSize - 1)
    103   // space characters in sample_text, followed by that character.
    104   // For example:
    105   // Since sample_text_without_spaces begins "The only thing"...,
    106   // if kBlockSize is 4, then 3 space characters will be inserted
    107   // between each letter of sample_text, as follows:
    108   // "   T   h   e       o   n   l   y       t   h   i   n   g"...
    109   // This makes testing simpler, because finding a kBlockSize-byte match
    110   // between the sample text and search string only depends on the
    111   // trailing letter in each block.
    112   static void MakeEachLetterABlock(const char* string_without_spaces,
    113                                    const char** result) {
    114     const size_t length_without_spaces = strlen(string_without_spaces);
    115     char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
    116     memset(padded_text, ' ', kBlockSize * length_without_spaces);
    117     char* padded_text_ptr = padded_text + (kBlockSize - 1);
    118     for (size_t i = 0; i < length_without_spaces; ++i) {
    119       *padded_text_ptr = string_without_spaces[i];
    120       padded_text_ptr += kBlockSize;
    121     }
    122     padded_text[kBlockSize * length_without_spaces] = '\0';
    123     *result = padded_text;
    124   }
    125 
    126   static void SetUpTestCase() {
    127     MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
    128     MakeEachLetterABlock(search_string_without_spaces, &search_string);
    129     MakeEachLetterABlock(search_string_altered_without_spaces,
    130                          &search_string_altered);
    131     MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
    132     MakeEachLetterABlock(search_to_beginning_without_spaces,
    133                          &search_to_beginning_string);
    134     MakeEachLetterABlock(sample_text_many_matches_without_spaces,
    135                          &sample_text_many_matches);
    136     MakeEachLetterABlock(search_string_many_matches_without_spaces,
    137                          &search_string_many_matches);
    138     MakeEachLetterABlock("y", &test_string_y);
    139     MakeEachLetterABlock("e", &test_string_e);
    140     char* new_test_string_unaligned_e = new char[kBlockSize];
    141     memset(new_test_string_unaligned_e, ' ', kBlockSize);
    142     new_test_string_unaligned_e[kBlockSize - 2] = 'e';
    143     test_string_unaligned_e = new_test_string_unaligned_e;
    144     char* new_test_string_all_Qs = new char[kBlockSize];
    145     memset(new_test_string_all_Qs, 'Q', kBlockSize);
    146     test_string_all_Qs = new_test_string_all_Qs;
    147     hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
    148     hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
    149     hashed_f =
    150         RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
    151     hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
    152     hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
    153   }
    154 
    155   static void TearDownTestCase() {
    156     delete[] sample_text;
    157     delete[] search_string;
    158     delete[] search_string_altered;
    159     delete[] search_to_end_string;
    160     delete[] search_to_beginning_string;
    161     delete[] sample_text_many_matches;
    162     delete[] search_string_many_matches;
    163     delete[] test_string_y;
    164     delete[] test_string_e;
    165     delete[] test_string_unaligned_e;
    166     delete[] test_string_all_Qs;
    167   }
    168 
    169   // Each block in the sample text and search string is kBlockSize bytes long,
    170   // and consists of (kBlockSize - 1) space characters
    171   // followed by a single letter of text.
    172 
    173   // Block numbers of certain characters within the sample text:
    174   // All six occurrences of "e", in order.
    175   static const int block_of_first_e = 2;
    176   static const int block_of_second_e = 16;
    177   static const int block_of_third_e = 21;
    178   static const int block_of_fourth_e = 27;
    179   static const int block_of_fifth_e = 35;
    180   static const int block_of_sixth_e = 42;
    181 
    182   static const int block_of_y_in_only = 7;
    183   // The block number is multiplied by kBlockSize to arrive at the
    184   // index, which points to the (kBlockSize - 1) space characters before
    185   // the letter specified.
    186   // Indices of certain characters within the sample text.
    187   static const int index_of_first_e = block_of_first_e * kBlockSize;
    188   static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
    189   static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
    190   static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
    191   static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
    192   static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
    193   static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
    194   static const int index_of_space_before_fear_itself = 33 * kBlockSize;
    195   static const int index_of_space_before_itself = 38 * kBlockSize;
    196   static const int index_of_ababc = 4 * kBlockSize;
    197 
    198   // Indices of certain characters within the search strings.
    199   static const int index_of_second_w_in_what_we = 5 * kBlockSize;
    200   static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
    201   static const int index_of_f_in_fearsome = 16 * kBlockSize;
    202   static const int index_of_space_in_eat_itself =  12 * kBlockSize;
    203   static const int index_of_i_in_itself = 13 * kBlockSize;
    204   static const int index_of_t_in_use_the = 4 * kBlockSize;
    205   static const int index_of_o_in_online = 8 * kBlockSize;
    206 
    207   static const char sample_text_without_spaces[];
    208   static const char search_string_without_spaces[];
    209   static const char search_string_altered_without_spaces[];
    210   static const char search_to_end_without_spaces[];
    211   static const char search_to_beginning_without_spaces[];
    212   static const char sample_text_many_matches_without_spaces[];
    213   static const char search_string_many_matches_without_spaces[];
    214 
    215   static const char* sample_text;
    216   static const char* search_string;
    217   static const char* search_string_altered;
    218   static const char* search_to_end_string;
    219   static const char* search_to_beginning_string;
    220   static const char* sample_text_many_matches;
    221   static const char* search_string_many_matches;
    222 
    223   static const char* test_string_y;
    224   static const char* test_string_e;
    225   static const char* test_string_all_Qs;
    226   static const char* test_string_unaligned_e;
    227 
    228   static uint32_t hashed_y;
    229   static uint32_t hashed_e;
    230   static uint32_t hashed_f;
    231   static uint32_t hashed_unaligned_e;
    232   static uint32_t hashed_all_Qs;
    233 
    234   // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
    235   std::auto_ptr<const BlockHash> dh_;  // hash table is populated at startup
    236   std::auto_ptr<BlockHash> th_;  // hash table not populated;
    237                                 // used to test incremental adds
    238 
    239   BlockHash::Match best_match_;
    240   char* compare_buffer_1_;
    241   char* compare_buffer_2_;
    242   int prime_result_;
    243 };
    244 
    245 #ifdef GTEST_HAS_DEATH_TEST
    246 typedef BlockHashTest BlockHashDeathTest;
    247 #endif  // GTEST_HAS_DEATH_TEST
    248 
    249 // The C++ standard requires a separate definition of these static const values,
    250 // even though their initializers are given within the class definition.
    251 const int BlockHashTest::block_of_first_e;
    252 const int BlockHashTest::block_of_second_e;
    253 const int BlockHashTest::block_of_third_e;
    254 const int BlockHashTest::block_of_fourth_e;
    255 const int BlockHashTest::block_of_fifth_e;
    256 const int BlockHashTest::block_of_sixth_e;
    257 const int BlockHashTest::block_of_y_in_only;
    258 const int BlockHashTest::index_of_first_e;
    259 const int BlockHashTest::index_of_fourth_e;
    260 const int BlockHashTest::index_of_sixth_e;
    261 const int BlockHashTest::index_of_y_in_only;
    262 const int BlockHashTest::index_of_space_before_fear_is_fear;
    263 const int BlockHashTest::index_of_longest_match_ear_is_fear;
    264 const int BlockHashTest::index_of_i_in_fear_is_fear;
    265 const int BlockHashTest::index_of_space_before_fear_itself;
    266 const int BlockHashTest::index_of_space_before_itself;
    267 const int BlockHashTest::index_of_ababc;
    268 const int BlockHashTest::index_of_second_w_in_what_we;
    269 const int BlockHashTest::index_of_second_e_in_what_we_hear;
    270 const int BlockHashTest::index_of_f_in_fearsome;
    271 const int BlockHashTest::index_of_space_in_eat_itself;
    272 const int BlockHashTest::index_of_i_in_itself;
    273 const int BlockHashTest::index_of_t_in_use_the;
    274 const int BlockHashTest::index_of_o_in_online;
    275 
    276 const char BlockHashTest::sample_text_without_spaces[] =
    277     "The only thing we have to fear is fear itself";
    278 
    279 const char BlockHashTest::search_string_without_spaces[] =
    280     "What we hear is fearsome";
    281 
    282 const char BlockHashTest::search_string_altered_without_spaces[] =
    283     "Vhat ve hear is fearsomm";
    284 
    285 const char BlockHashTest::search_to_end_without_spaces[] =
    286     "Pop will eat itself, eventually";
    287 
    288 const char BlockHashTest::search_to_beginning_without_spaces[] =
    289     "Use The online dictionary";
    290 
    291 const char BlockHashTest::sample_text_many_matches_without_spaces[] =
    292     "ababababcab";
    293 
    294 const char BlockHashTest::search_string_many_matches_without_spaces[] =
    295     "ababc";
    296 
    297 const char* BlockHashTest::sample_text = NULL;
    298 const char* BlockHashTest::search_string = NULL;
    299 const char* BlockHashTest::search_string_altered = NULL;
    300 const char* BlockHashTest::search_to_end_string = NULL;
    301 const char* BlockHashTest::search_to_beginning_string = NULL;
    302 const char* BlockHashTest::sample_text_many_matches = NULL;
    303 const char* BlockHashTest::search_string_many_matches = NULL;
    304 
    305 const char* BlockHashTest::test_string_y = NULL;
    306 const char* BlockHashTest::test_string_e = NULL;
    307 const char* BlockHashTest::test_string_unaligned_e = NULL;
    308 const char* BlockHashTest::test_string_all_Qs = NULL;
    309 
    310 uint32_t BlockHashTest::hashed_y = 0;
    311 uint32_t BlockHashTest::hashed_e = 0;
    312 uint32_t BlockHashTest::hashed_f = 0;
    313 uint32_t BlockHashTest::hashed_unaligned_e = 0;
    314 uint32_t BlockHashTest::hashed_all_Qs = 0;
    315 
    316 void BlockHashTest::TestAndPrintTimesForCompareFunctions(
    317     bool should_be_identical) {
    318   CHECK(compare_buffer_1_ != NULL);
    319   CHECK(compare_buffer_2_ != NULL);
    320   // Prime the memory cache.
    321   prime_result_ =
    322       memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
    323   const char* const block1_limit =
    324       &compare_buffer_1_[kTimingTestSize - kBlockSize];
    325   int block_compare_words_result = 0;
    326   CycleTimer block_compare_words_timer;
    327   block_compare_words_timer.Start();
    328   for (int i = 0; i < kTimingTestIterations; ++i) {
    329     const char* block1 = compare_buffer_1_;
    330     const char* block2 = compare_buffer_2_;
    331     while (block1 < block1_limit) {
    332       if (!BlockHash::BlockCompareWords(block1, block2)) {
    333         ++block_compare_words_result;
    334       }
    335       block1 += kBlockSize;
    336       block2 += kBlockSize;
    337     }
    338   }
    339   block_compare_words_timer.Stop();
    340   double time_for_block_compare_words =
    341       static_cast<double>(block_compare_words_timer.GetInUsec())
    342       / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
    343   int block_contents_match_result = 0;
    344   CycleTimer block_contents_match_timer;
    345   block_contents_match_timer.Start();
    346   for (int i = 0; i < kTimingTestIterations; ++i) {
    347     const char* block1 = compare_buffer_1_;
    348     const char* block2 = compare_buffer_2_;
    349     while (block1 < block1_limit) {
    350       if (!BlockHash::BlockContentsMatch(block1, block2)) {
    351         ++block_contents_match_result;
    352       }
    353       block1 += kBlockSize;
    354       block2 += kBlockSize;
    355     }
    356   }
    357   block_contents_match_timer.Stop();
    358   double time_for_block_contents_match =
    359       static_cast<double>(block_contents_match_timer.GetInUsec())
    360       / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
    361   EXPECT_EQ(block_contents_match_result, block_compare_words_result);
    362   if (should_be_identical) {
    363     CHECK_EQ(0, block_compare_words_result);
    364   } else {
    365     CHECK_GT(block_compare_words_result, 0);
    366   }
    367   std::cout << "BlockHash::BlockCompareWords: "
    368             << time_for_block_compare_words << " us per operation" << std::endl;
    369   std::cout << "BlockHash::BlockContentsMatch: "
    370             << time_for_block_contents_match << " us per operation"
    371             << std::endl;
    372   if (time_for_block_compare_words > 0) {
    373     double percent_change =
    374         (((time_for_block_contents_match - time_for_block_compare_words)
    375           / time_for_block_compare_words) * 100.0);
    376     if (percent_change >= 0.0) {
    377       std::cout << "BlockContentsMatch is " << percent_change << "%"
    378                 << " SLOWER than BlockCompareWords" << std::endl;
    379     } else {
    380       std::cout << "BlockContentsMatch is " << (-percent_change) << "%"
    381                 << " FASTER than BlockCompareWords" << std::endl;
    382     }
    383   }
    384 #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS)
    385   // Only check timings for optimized build.  There's plenty of margin: this
    386   // check will fail only if BlockContentsMatch is at least twice as slow as
    387   // BlockCompareWords.
    388   EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match);
    389 #endif  // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS
    390 }
    391 
    392 // The two strings passed to BlockHash::MatchingBytesToLeft do have matching
    393 // characters -- in fact, they're the same string -- but since max_bytes is zero
    394 // or negative, BlockHash::MatchingBytesToLeft should not read from the strings
    395 // and should return 0.
    396 TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
    397   EXPECT_EQ(0, MatchingBytesToLeft(
    398       &search_string[index_of_f_in_fearsome],
    399       &search_string[index_of_f_in_fearsome],
    400       0));
    401   EXPECT_EQ(0, MatchingBytesToRight(
    402       &search_string[index_of_f_in_fearsome],
    403       &search_string[index_of_f_in_fearsome],
    404       0));
    405 }
    406 
    407 TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
    408   EXPECT_EQ(0, MatchingBytesToLeft(
    409       &search_string[index_of_f_in_fearsome],
    410       &search_string[index_of_f_in_fearsome],
    411       -1));
    412   EXPECT_EQ(0, MatchingBytesToLeft(
    413       &search_string[index_of_f_in_fearsome],
    414       &search_string[index_of_f_in_fearsome],
    415       INT_MIN));
    416   EXPECT_EQ(0, MatchingBytesToRight(
    417       &search_string[index_of_f_in_fearsome],
    418       &search_string[index_of_f_in_fearsome],
    419       -1));
    420   EXPECT_EQ(0, MatchingBytesToRight(
    421       &search_string[index_of_f_in_fearsome],
    422       &search_string[index_of_f_in_fearsome],
    423       INT_MIN));
    424 }
    425 
    426 TEST_F(BlockHashTest, MaxBytesOneMatch) {
    427   EXPECT_EQ(1, MatchingBytesToLeft(
    428       &search_string[index_of_f_in_fearsome],
    429       &search_string[index_of_f_in_fearsome],
    430       1));
    431   EXPECT_EQ(1, MatchingBytesToRight(
    432       &search_string[index_of_f_in_fearsome],
    433       &search_string[index_of_f_in_fearsome],
    434       1));
    435 }
    436 
    437 TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
    438   EXPECT_EQ(0, MatchingBytesToLeft(
    439       &search_string[index_of_f_in_fearsome],
    440       &search_string[index_of_second_e_in_what_we_hear],
    441       1));
    442   EXPECT_EQ(0, MatchingBytesToRight(
    443       &search_string[index_of_f_in_fearsome],
    444       &search_string[index_of_second_e_in_what_we_hear - 1],
    445       1));
    446 }
    447 
    448 TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
    449   // The number of bytes that match between the original "we hear is fearsome"
    450   // and the altered "ve hear is fearsome".
    451   const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
    452   const int max_bytes = expected_length - 1;
    453   EXPECT_EQ(max_bytes, MatchingBytesToLeft(
    454       &search_string[index_of_f_in_fearsome],
    455       &search_string_altered[index_of_f_in_fearsome],
    456       max_bytes));
    457 }
    458 
    459 TEST_F(BlockHashTest, LeftNotLimited) {
    460   // The number of bytes that match between the original "we hear is fearsome"
    461   // and the altered "ve hear is fearsome".
    462   const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
    463   const int max_bytes = expected_length + 1;
    464   EXPECT_EQ(expected_length, MatchingBytesToLeft(
    465       &search_string[index_of_f_in_fearsome],
    466       &search_string_altered[index_of_f_in_fearsome],
    467       max_bytes));
    468   EXPECT_EQ(expected_length, MatchingBytesToLeft(
    469       &search_string[index_of_f_in_fearsome],
    470       &search_string_altered[index_of_f_in_fearsome],
    471       INT_MAX));
    472 }
    473 
    474 TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
    475   // The number of bytes that match between the original "fearsome"
    476   // and the altered "fearsomm".
    477   const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
    478                               + (kBlockSize - 1);  // spacing between letters
    479   const int max_bytes = expected_length - 1;
    480   EXPECT_EQ(max_bytes, MatchingBytesToRight(
    481       &search_string[index_of_f_in_fearsome],
    482       &search_string_altered[index_of_f_in_fearsome],
    483       max_bytes));
    484 }
    485 
    486 TEST_F(BlockHashTest, RightNotLimited) {
    487   // The number of bytes that match between the original "we hear is fearsome"
    488   // and the altered "ve hear is fearsome".
    489   const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
    490                               + (kBlockSize - 1);  // spacing between letters
    491   const int max_bytes = expected_length + 1;
    492   EXPECT_EQ(expected_length, MatchingBytesToRight(
    493       &search_string[index_of_f_in_fearsome],
    494       &search_string_altered[index_of_f_in_fearsome],
    495       max_bytes));
    496   EXPECT_EQ(expected_length, MatchingBytesToRight(
    497       &search_string[index_of_f_in_fearsome],
    498       &search_string_altered[index_of_f_in_fearsome],
    499       INT_MAX));
    500 }
    501 
    502 // If this test fails in a non-x86 or non-gcc environment, consider adding
    503 // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and
    504 // Makefile.in, and reconstructing the Makefile.  That will cause blockhash.cc
    505 // to use a special implementation (BlockCompareWords) to compare blocks
    506 // rather than using standard memcmp.
    507 TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) {
    508   compare_buffer_1_ = new char[kTimingTestSize];
    509   compare_buffer_2_ = new char[kTimingTestSize];
    510 
    511   // The value 0xBE is arbitrarily chosen.  First test with identical contents
    512   // in the buffers, so that the comparison functions cannot short-circuit
    513   // and will return true.
    514   memset(compare_buffer_1_, 0xBE, kTimingTestSize);
    515   memset(compare_buffer_2_, 0xBE, kTimingTestSize);
    516   std::cout << "Comparing "
    517             << (kTimingTestSize / kBlockSize) << " identical values:"
    518             << std::endl;
    519   TestAndPrintTimesForCompareFunctions(true);
    520 
    521   // Now change one value in the middle of one buffer, so that the contents
    522   // are no longer the same.
    523   compare_buffer_1_[kTimingTestSize / 2] = 0x00;
    524   std::cout << "Comparing "
    525             << ((kTimingTestSize / kBlockSize) - 1) << " identical values"
    526             << " and one mismatch:" << std::endl;
    527   TestAndPrintTimesForCompareFunctions(false);
    528 
    529   // Set one of the bytes of each block to differ so that
    530   // none of the compare operations will return true, and run timing tests.
    531   // In practice, BlockHash::BlockContentsMatch will only be called
    532   // for two blocks whose hash values match, and the two most important
    533   // cases are: (1) the blocks are identical, or (2) none of their bytes match.
    534   TimingTestForBlocksThatDifferAtByte(0);
    535   TimingTestForBlocksThatDifferAtByte(1);
    536   TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
    537   TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);
    538 
    539   delete[] compare_buffer_1_;
    540   delete[] compare_buffer_2_;
    541 }
    542 
    543 TEST_F(BlockHashTest, FindFailsBeforeHashing) {
    544   EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
    545 }
    546 
    547 TEST_F(BlockHashTest, HashOneFindOne) {
    548   for (int i = 0; i <= index_of_y_in_only; ++i) {
    549     th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
    550   }
    551   EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
    552                                                    test_string_y));
    553   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
    554 }
    555 
    556 TEST_F(BlockHashTest, HashAllFindOne) {
    557   EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
    558                                                    test_string_y));
    559   EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
    560 }
    561 
    562 TEST_F(BlockHashTest, NonMatchingTextNotFound) {
    563   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
    564 }
    565 
    566 // Search for unaligned text.  The test string is contained in the
    567 // sample text (unlike the non-matching string in NonMatchingTextNotFound,
    568 // above), but it is not aligned on a block boundary.  FindMatchingBlock
    569 // will only work if the test string is aligned on a block boundary.
    570 //
    571 //    "   T   h   e       o   n   l   y"
    572 //              ^^^^ Here is the test string
    573 //
    574 TEST_F(BlockHashTest, UnalignedTextNotFound) {
    575   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
    576                                    test_string_unaligned_e));
    577 }
    578 
    579 TEST_F(BlockHashTest, FindSixMatches) {
    580   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
    581                                                  test_string_e));
    582   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
    583                                                  test_string_e));
    584   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
    585                                                 test_string_e));
    586   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
    587                                                  test_string_e));
    588   EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
    589                                                 test_string_e));
    590   EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
    591                                                 test_string_e));
    592   EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));
    593 
    594   // Starting over gives same result
    595   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
    596                                                  test_string_e));
    597 }
    598 
    599 TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
    600   // Add hash values only for those characters before the fourth instance
    601   // of "e" in the sample text.  Tests that the ending index
    602   // of AddAllBlocksThroughIndex() is not inclusive: only three matches
    603   // for "e" should be found.
    604   th_->AddAllBlocksThroughIndex(index_of_fourth_e);
    605   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    606                                                  test_string_e));
    607   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
    608                                                  test_string_e));
    609   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
    610                                                 test_string_e));
    611   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
    612 
    613   // Starting over gives same result
    614   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    615                                                  test_string_e));
    616 }
    617 
    618 // Try indices that are not even multiples of the block size.
    619 // Add three ranges and verify the results after each
    620 // call to AddAllBlocksThroughIndex().
    621 TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
    622   th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
    623   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    624                                                  test_string_e));
    625   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));
    626 
    627   // Starting over gives same result
    628   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    629                                                  test_string_e));
    630 
    631   // Add the second range to expand the result set
    632   th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
    633   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    634                                                  test_string_e));
    635   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
    636                                                  test_string_e));
    637   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
    638                                                 test_string_e));
    639   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
    640 
    641   // Starting over gives same result
    642   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    643                                                  test_string_e));
    644 
    645   // Add the third range to expand the result set
    646   th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
    647 
    648   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    649                                                  test_string_e));
    650   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
    651                                                  test_string_e));
    652   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
    653                                                 test_string_e));
    654   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
    655                                                  test_string_e));
    656   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
    657 
    658   // Starting over gives same result
    659   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    660                                                  test_string_e));
    661 }
    662 
    663 #ifdef GTEST_HAS_DEATH_TEST
    664 TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
    665   th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
    666 
    667   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    668                                                  test_string_e));
    669   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
    670                                                  test_string_e));
    671   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
    672                                                 test_string_e));
    673   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
    674                                                  test_string_e));
    675   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
    676 
    677   // Starting over gives same result
    678   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    679                                                  test_string_e));
    680 
    681   // These calls will produce DFATAL error messages, and will do nothing,
    682   // since the ranges have already been added.
    683   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
    684                      "<");
    685   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
    686                      "<");
    687 
    688   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    689                                                  test_string_e));
    690   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
    691                                                  test_string_e));
    692   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
    693                                                 test_string_e));
    694   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
    695                                                  test_string_e));
    696   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
    697 
    698   // Starting over gives same result
    699   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    700                                                  test_string_e));
    701 }
    702 #endif  // GTEST_HAS_DEATH_TEST
    703 
    704 TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
    705   th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
    706   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    707                                                  test_string_e));
    708   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
    709                                                  test_string_e));
    710   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
    711                                                 test_string_e));
    712   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
    713                                                  test_string_e));
    714   EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
    715                                                 test_string_e));
    716   EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
    717                                                 test_string_e));
    718   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));
    719 
    720   // Starting over gives same result
    721   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
    722                                                  test_string_e));
    723 }
    724 
    725 TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
    726   BlockHash zero_sized_hash(sample_text, 0, 0);
    727   EXPECT_EQ(true, zero_sized_hash.Init(true));
    728   EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
    729 }
    730 
    731 #ifdef GTEST_HAS_DEATH_TEST
    732 TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
    733   EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, "    ")),
    734                      "invalid");
    735 }
    736 
    737 TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) {
    738   BlockHash bh(sample_text, strlen(sample_text), 0);
    739   EXPECT_TRUE(bh.Init(false));
    740   EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice");
    741 }
    742 
    743 TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) {
    744   BlockHash bh(sample_text, strlen(sample_text), 0);
    745   EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e),
    746                      "called before");
    747 }
    748 
    749 TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) {
    750   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(strlen(sample_text) + 1),
    751                      "higher than end");
    752 }
    753 #endif  // GTEST_HAS_DEATH_TEST
    754 
    755 TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
    756   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
    757 }
    758 
    759 TEST_F(BlockHashTest, FindBestMatch) {
    760   dh_->FindBestMatch(hashed_f,
    761                      &search_string[index_of_f_in_fearsome],
    762                      search_string,
    763                      strlen(search_string),
    764                      &best_match_);
    765   EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
    766   EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
    767   // The match includes the spaces after the final character,
    768   // which is why (kBlockSize - 1) is added to the expected best size.
    769   EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
    770             best_match_.size());
    771 }
    772 
    773 TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
    774   BlockHash th2(sample_text, strlen(sample_text), 0x10000);
    775   th2.Init(true);  // hash all blocks
    776   th2.FindBestMatch(hashed_f,
    777                     &search_string[index_of_f_in_fearsome],
    778                     search_string,
    779                     strlen(search_string),
    780                     &best_match_);
    781   // Offset should begin with dictionary_size
    782   EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
    783             best_match_.source_offset());
    784   EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
    785   // The match includes the spaces after the final character,
    786   // which is why (kBlockSize - 1) is added to the expected best size.
    787   EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
    788             best_match_.size());
    789 }
    790 
    791 TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
    792   // Hash the "i" in "fear itself"
    793   uint32_t hash_value = RollingHash<kBlockSize>::Hash(
    794       &search_to_end_string[index_of_i_in_itself]);
    795   dh_->FindBestMatch(hash_value,
    796                      &search_to_end_string[index_of_i_in_itself],
    797                      search_to_end_string,
    798                      strlen(search_to_end_string),
    799                      &best_match_);
    800   EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
    801   EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
    802   EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
    803 }
    804 
    805 TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
    806   // Hash the "i" in "fear itself"
    807   uint32_t hash_value = RollingHash<kBlockSize>::Hash(
    808       &search_to_beginning_string[index_of_o_in_online]);
    809   dh_->FindBestMatch(hash_value,
    810                      &search_to_beginning_string[index_of_o_in_online],
    811                      search_to_beginning_string,
    812                      strlen(search_to_beginning_string),
    813                      &best_match_);
    814   EXPECT_EQ(0, best_match_.source_offset());  // beginning of dictionary
    815   EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
    816   // The match includes the spaces after the final character,
    817   // which is why (kBlockSize - 1) is added to the expected best size.
    818   EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
    819             best_match_.size());
    820 }
    821 
    822 TEST_F(BlockHashTest, BestMatchWithManyMatches) {
    823   BlockHash many_matches_hash(sample_text_many_matches,
    824                               strlen(sample_text_many_matches),
    825                               0);
    826   EXPECT_TRUE(many_matches_hash.Init(true));
    827   // Hash the "   a" at the beginning of the search string "ababc"
    828   uint32_t hash_value =
    829       RollingHash<kBlockSize>::Hash(search_string_many_matches);
    830   many_matches_hash.FindBestMatch(hash_value,
    831                                   search_string_many_matches,
    832                                   search_string_many_matches,
    833                                   strlen(search_string_many_matches),
    834                                   &best_match_);
    835   EXPECT_EQ(index_of_ababc, best_match_.source_offset());
    836   EXPECT_EQ(0, best_match_.target_offset());
    837   EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
    838 }
    839 
    840 TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
    841   char* collision_search_string = new char[strlen(search_string) + 1];
    842   memcpy(collision_search_string, search_string, strlen(search_string) + 1);
    843   char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];
    844 
    845   // Tweak the collision string so that it has the same hash value
    846   // but different text.  The last four characters of the search string
    847   // should be "   f", and the bytes given below have the same hash value
    848   // as those characters.
    849   CHECK_GE(kBlockSize, 4);
    850   fearsome_location[kBlockSize - 4] = 0x84;
    851   fearsome_location[kBlockSize - 3] = 0xF1;
    852   fearsome_location[kBlockSize - 2] = 0x51;
    853   fearsome_location[kBlockSize - 1] = 0x00;
    854   EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
    855   EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
    856                       fearsome_location,
    857                       kBlockSize));
    858   // No match should be found this time.
    859   dh_->FindBestMatch(hashed_f,
    860       fearsome_location,
    861       collision_search_string,
    862       strlen(search_string),  // since collision_search_string has embedded \0
    863       &best_match_);
    864   EXPECT_EQ(-1, best_match_.source_offset());
    865   EXPECT_EQ(-1, best_match_.target_offset());
    866   EXPECT_EQ(0U, best_match_.size());
    867   delete[] collision_search_string;
    868 }
    869 
    870 // If the footprint passed to FindBestMatch does not actually match
    871 // the search string, it should not find any matches.
    872 TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
    873   dh_->FindBestMatch(hashed_e,  // Using hashed value of "e" instead of "f"!
    874                      &search_string[index_of_f_in_fearsome],
    875                      search_string,
    876                      strlen(search_string),
    877                      &best_match_);
    878   EXPECT_EQ(-1, best_match_.source_offset());
    879   EXPECT_EQ(-1, best_match_.target_offset());
    880   EXPECT_EQ(0U, best_match_.size());
    881 }
    882 
    883 // Use a dictionary containing 1M copies of the letter 'Q',
    884 // and target data that also contains 1M Qs.  If FindBestMatch
    885 // is not throttled to find a maximum number of matches, this
    886 // will take a very long time -- several seconds at least.
    887 // If this test appears to hang, it is because the throttling code
    888 // (see BlockHash::kMaxMatchesToCheck for details) is not working.
    889 TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
    890   const int kTestSize = 1 << 20;  // 1M
    891   char* huge_dictionary = new char[kTestSize];
    892   memset(huge_dictionary, 'Q', kTestSize);
    893   BlockHash huge_bh(huge_dictionary, kTestSize, 0);
    894   EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
    895   char* huge_target = new char[kTestSize];
    896   memset(huge_target, 'Q', kTestSize);
    897   CycleTimer timer;
    898   timer.Start();
    899   huge_bh.FindBestMatch(hashed_all_Qs,
    900                         huge_target + (kTestSize / 2),  // middle of target
    901                         huge_target,
    902                         kTestSize,
    903                         &best_match_);
    904   timer.Stop();
    905   double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
    906   std::cout << "Time to search for best match with 1M matches: "
    907             << elapsed_time_in_us << " us" << std::endl;
    908   // All blocks match the candidate block.  FindBestMatch should have checked
    909   // a certain number of matches before giving up.  The best match
    910   // should include at least half the source and target, since the candidate
    911   // block was in the middle of the target data.
    912   EXPECT_GT((kTestSize / 2), best_match_.source_offset());
    913   EXPECT_GT((kTestSize / 2), best_match_.target_offset());
    914   EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
    915   EXPECT_GT(5000000, elapsed_time_in_us);  // < 5 seconds
    916 #ifdef NDEBUG
    917   EXPECT_GT(1000000, elapsed_time_in_us);  // < 1 second
    918 #endif  // NDEBUG
    919   delete[] huge_target;
    920   delete[] huge_dictionary;
    921 }
    922 
    923 #ifdef GTEST_HAS_DEATH_TEST
    924 TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
    925   for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
    926     th_->AddOneIndexHash(i * kBlockSize, hashed_e);
    927   }
    928   // Didn't expect another block to be added
    929   EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
    930                                           hashed_e),
    931                      "AddBlock");
    932 }
    933 #endif  // GTEST_HAS_DEATH_TEST
    934 
    935 }  //  namespace open_vcdiff
    936