Home | History | Annotate | Download | only in src
      1 // Copyright 2008 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include <config.h>
     17 #include "vcdiffengine.h"
     18 #include <string.h>  // memset, strlen
     19 #include <algorithm>
     20 #include <string>
     21 #include <vector>
     22 #include "addrcache.h"
     23 #include "blockhash.h"
     24 #include "encodetable.h"
     25 #include "google/output_string.h"
     26 #include "rolling_hash.h"
     27 #include "testing.h"
     28 #include "varint_bigendian.h"
     29 #include "vcdiff_defs.h"
     30 
     31 namespace open_vcdiff {
     32 
     33 namespace {
     34 
     35 class VCDiffEngineTestBase : public testing::Test {
     36  protected:
     37   typedef std::string string;
     38 
     39   // Some common definitions and helper functions used in the various tests
     40   // for VCDiffEngine.
     41   static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
     42 
     43   VCDiffEngineTestBase() : interleaved_(false),
     44                            diff_output_string_(&diff_),
     45                            verify_position_(0),
     46                            saved_total_size_position_(0),
     47                            saved_delta_encoding_position_(0),
     48                            saved_section_sizes_position_(0),
     49                            data_bytes_(0),
     50                            instruction_bytes_(0),
     51                            address_bytes_(0) { }
     52 
     53   virtual ~VCDiffEngineTestBase() { }
     54 
     55   virtual void TearDown() {
     56     VerifyMatchCounts();
     57   }
     58 
     59   // Copy string_without_spaces into newly allocated result buffer,
     60   // but pad its contents with space characters so that every character
     61   // in string_without_spaces corresponds to (block_size - 1)
     62   // spaces in the result, followed by that character.
     63   // For example:
     64   // If string_without_spaces begins "The only thing"... and block_size is 4,
     65   // then 3 space characters will be inserted
     66   // between each letter in the result, as follows:
     67   // "   T   h   e       o   n   l   y       t   h   i   n   g"...
     68   // This makes testing simpler, because finding a block_size-byte match
     69   // between the dictionary and target only depends on the
     70   // trailing letter in each block.
     71   // If no_initial_padding is true, then the first letter will not have
     72   // spaces added in front of it.
     73   static void MakeEachLetterABlock(const char* string_without_spaces,
     74                                    const char** result,
     75                                    int block_size,
     76                                    bool no_initial_padding) {
     77     const size_t length_without_spaces = strlen(string_without_spaces);
     78     char* padded_text = new char[(block_size * length_without_spaces) + 1];
     79     memset(padded_text, ' ', block_size * length_without_spaces);
     80     char* padded_text_ptr = padded_text;
     81     if (!no_initial_padding) {
     82       padded_text_ptr += block_size - 1;
     83     }
     84     for (size_t i = 0; i < length_without_spaces; ++i) {
     85       *padded_text_ptr = string_without_spaces[i];
     86       padded_text_ptr += block_size;
     87     }
     88     *(padded_text_ptr - block_size + 1)  = '\0';
     89     *result = padded_text;
     90   }
     91 
     92   // These functions iterate through the decoded output and expect
     93   // simple elements: bytes or variable-length integers.
     94   void ExpectByte(char byte) {
     95     EXPECT_GT(diff_.size(), verify_position_);
     96     EXPECT_EQ(byte, diff_[verify_position_]);
     97     ++verify_position_;
     98   }
     99 
    100   size_t ExpectVarint(int32_t expected_value) {
    101     EXPECT_GT(diff_.size(), verify_position_);
    102     const char* const original_position = &diff_[verify_position_];
    103     const char* new_position = original_position;
    104     const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
    105     int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
    106                                                     &new_position);
    107     EXPECT_LE(0, parsed_value);
    108     size_t parsed_length = new_position - original_position;
    109     EXPECT_EQ(expected_value, parsed_value);
    110     EXPECT_EQ(expected_length, parsed_length);
    111     verify_position_ += parsed_length;
    112     return parsed_length;
    113   }
    114 
    115   size_t ExpectSize(size_t size) {
    116     return ExpectVarint(static_cast<int32_t>(size));
    117   }
    118 
    119   size_t ExpectStringLength(const char* s) {
    120     return ExpectSize(strlen(s));
    121   }
    122 
    123   void SkipVarint() {
    124     EXPECT_GT(diff_.size(), verify_position_);
    125     const char* const original_position = &diff_[verify_position_];
    126     const char* new_position = original_position;
    127     VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
    128     size_t parsed_length = new_position - original_position;
    129     verify_position_ += parsed_length;
    130   }
    131 
    132   void ExpectDataByte(char byte) {
    133     ExpectByte(byte);
    134     if (interleaved_) {
    135       ++instruction_bytes_;
    136     } else {
    137       ++data_bytes_;
    138     }
    139   }
    140 
    141   void ExpectDataString(const char *expected_string) {
    142     const char* ptr = expected_string;
    143     while (*ptr) {
    144       ExpectDataByte(*ptr);
    145       ++ptr;
    146     }
    147   }
    148 
    149   void ExpectDataStringWithBlockSpacing(const char *expected_string,
    150                                         bool trailing_spaces) {
    151     const char* ptr = expected_string;
    152     while (*ptr) {
    153       for (int i = 0; i < (kBlockSize - 1); ++i) {
    154         ExpectDataByte(' ');
    155       }
    156       ExpectDataByte(*ptr);
    157       ++ptr;
    158     }
    159     if (trailing_spaces) {
    160       for (int i = 0; i < (kBlockSize - 1); ++i) {
    161         ExpectDataByte(' ');
    162       }
    163     }
    164   }
    165 
    166   void ExpectInstructionByte(char byte) {
    167     ExpectByte(byte);
    168     ++instruction_bytes_;
    169   }
    170 
    171   void ExpectInstructionVarint(int32_t value) {
    172     instruction_bytes_ += ExpectVarint(value);
    173   }
    174 
    175   void ExpectAddressByte(char byte) {
    176     ExpectByte(byte);
    177     if (interleaved_) {
    178       ++instruction_bytes_;
    179     } else {
    180       ++address_bytes_;
    181     }
    182   }
    183 
    184   void ExpectAddressVarint(int32_t value) {
    185     if (interleaved_) {
    186       instruction_bytes_ += ExpectVarint(value);
    187     } else {
    188       address_bytes_ += ExpectVarint(value);
    189     }
    190   }
    191 
    192   // The following functions leverage the fact that the encoder uses
    193   // the default code table and cache sizes.  They are able to search for
    194   // instructions of a particular size.  The logic for mapping from
    195   // instruction type, mode, and size to opcode value is very different here
    196   // from the logic used in encodetable.{h,cc}, so hopefully
    197   // this version will help validate that the other is correct.
    198   // This version uses conditional statements, while encodetable.h
    199   // looks up values in a mapping table.
    200   void ExpectAddress(int32_t address, int copy_mode) {
    201     if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
    202       ExpectAddressVarint(address);
    203     } else {
    204       ExpectAddressByte(address);
    205     }
    206   }
    207 
    208   void ExpectAddInstruction(int size) {
    209     if (size <= 18) {
    210       ExpectInstructionByte(0x01 + size);
    211     } else {
    212       ExpectInstructionByte(0x01);
    213       ExpectInstructionVarint(size);
    214     }
    215   }
    216 
    217   void ExpectCopyInstruction(int size, int mode) {
    218     if ((size >= 4) && (size <= 16)) {
    219       ExpectInstructionByte(0x10 + (0x10 * mode) + size);
    220     } else {
    221       ExpectInstructionByte(0x13 + (0x10 * mode));
    222       ExpectInstructionVarint(size);
    223     }
    224     ExpectMatch(size);
    225   }
    226 
    227   bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
    228     if (!default_cache_.IsSameMode(copy_mode) &&
    229         (add_size <= 4) &&
    230         (copy_size >= 4) &&
    231         (copy_size <= 6)) {
    232       ExpectInstructionByte(0x9C +
    233                             (0x0C * copy_mode) +
    234                             (0x03 * add_size) +
    235                             copy_size);
    236       ExpectMatch(copy_size);
    237       return true;
    238     } else if (default_cache_.IsSameMode(copy_mode) &&
    239                (add_size <= 4) &&
    240                (copy_size == 4)) {
    241       ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
    242       ExpectMatch(copy_size);
    243       return true;
    244     } else {
    245       ExpectAddInstruction(add_size);
    246       return false;
    247     }
    248   }
    249 
    250   void ExpectAddInstructionForStringLength(const char* s) {
    251     ExpectAddInstruction(static_cast<int>(strlen(s)));
    252   }
    253 
    254   // Call this function before beginning to iterate through the diff string
    255   // using the Expect... functions.
    256   // text must be NULL-terminated.
    257   void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
    258                                               const char* target_text) {
    259     ExpectByte(0x01);  // Win_Indicator: VCD_SOURCE (dictionary)
    260     ExpectStringLength(dictionary);
    261     ExpectByte(0x00);  // Source segment position: start of dictionary
    262     saved_total_size_position_ = verify_position_;
    263     SkipVarint();  // Length of the delta encoding
    264     saved_delta_encoding_position_ = verify_position_;
    265     ExpectStringLength(target_text);
    266     ExpectByte(0x00);  // Delta_indicator (no compression)
    267     saved_section_sizes_position_ = verify_position_;
    268     SkipVarint();  // length of data for ADDs and RUNs
    269     SkipVarint();  // length of instructions section
    270     SkipVarint();  // length of addresses for COPYs
    271   }
    272 
    273   // Call this function before beginning to iterating through the entire
    274   // diff string using the Expect... functions.  It makes sure that the
    275   // size totals in the window header match the number of bytes that
    276   // were parsed.
    277   void VerifySizes() {
    278     EXPECT_EQ(verify_position_, diff_.size());
    279     const size_t delta_encoding_size = verify_position_ -
    280                                        saved_delta_encoding_position_;
    281     verify_position_ = saved_total_size_position_;
    282     ExpectSize(delta_encoding_size);
    283     verify_position_ = saved_section_sizes_position_;
    284     ExpectSize(data_bytes_);
    285     ExpectSize(instruction_bytes_);
    286     ExpectSize(address_bytes_);
    287   }
    288 
    289   void ExpectMatch(size_t match_size) {
    290     if (match_size >= expected_match_counts_.size()) {
    291       // Be generous to avoid resizing again
    292       expected_match_counts_.resize(match_size * 2, 0);
    293     }
    294     ++expected_match_counts_[match_size];
    295   }
    296 
    297   void VerifyMatchCounts() {
    298     EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
    299                            expected_match_counts_.end(),
    300                            actual_match_counts_.begin()));
    301   }
    302 
    303   bool interleaved_;
    304   string diff_;
    305   OutputString<string> diff_output_string_;
    306   size_t verify_position_;
    307   size_t saved_total_size_position_;
    308   size_t saved_delta_encoding_position_;
    309   size_t saved_section_sizes_position_;
    310   size_t data_bytes_;
    311   size_t instruction_bytes_;
    312   size_t address_bytes_;
    313   VCDiffAddressCache default_cache_;  // Used for finding mode values
    314   std::vector<int> expected_match_counts_;
    315   std::vector<int> actual_match_counts_;
    316 };
    317 
    318 class VCDiffEngineTest : public VCDiffEngineTestBase {
    319  protected:
    320   VCDiffEngineTest() :
    321       engine_(dictionary_, strlen(dictionary_)) {
    322     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
    323   }
    324 
    325   virtual ~VCDiffEngineTest() { }
    326 
    327 
    328   static void SetUpTestCase() {
    329     MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
    330                          kBlockSize, false);
    331     MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
    332   }
    333 
    334   static void TearDownTestCase() {
    335     delete[] dictionary_;
    336     delete[] target_;
    337   }
    338 
    339   void EncodeNothing(bool interleaved, bool target_matching) {
    340     interleaved_ = interleaved;
    341     VCDiffCodeTableWriter coder(interleaved);
    342     coder.Init(engine_.dictionary_size());
    343     engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
    344     EXPECT_TRUE(diff_.empty());
    345     actual_match_counts_ = coder.match_counts();
    346   }
    347 
    348   // text must be NULL-terminated
    349   void EncodeText(const char* text, bool interleaved, bool target_matching) {
    350     interleaved_ = interleaved;
    351     VCDiffCodeTableWriter coder(interleaved);
    352     coder.Init(engine_.dictionary_size());
    353     engine_.Encode(text,
    354                    strlen(text),
    355                    target_matching,
    356                    &diff_output_string_,
    357                    &coder);
    358     actual_match_counts_ = coder.match_counts();
    359   }
    360 
    361   void Encode(bool interleaved, bool target_matching) {
    362     EncodeText(target_, interleaved, target_matching);
    363     VerifyHeader();
    364   }
    365 
    366   void VerifyHeader() {
    367     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
    368   }
    369 
    370   static const char dictionary_without_spaces_[];
    371   static const char target_without_spaces_[];
    372 
    373   static const char* dictionary_;
    374   static const char* target_;
    375 
    376   const VCDiffEngine engine_;
    377 };
    378 
    379 #ifdef GTEST_HAS_DEATH_TEST
    380 typedef VCDiffEngineTest VCDiffEngineDeathTest;
    381 #endif  // GTEST_HAS_DEATH_TEST
    382 
    383 const char VCDiffEngineTest::dictionary_without_spaces_[] =
    384     "The only thing we have to fear is fear itself";
    385 
    386 const char VCDiffEngineTest::target_without_spaces_[] =
    387     "What we hear is fearsome";
    388 
    389 const char* VCDiffEngineTest::dictionary_ = NULL;
    390 const char* VCDiffEngineTest::target_ = NULL;
    391 
    392 #ifdef GTEST_HAS_DEATH_TEST
    393 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
    394   EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
    395                      "twice");
    396 }
    397 #endif  // GTEST_HAS_DEATH_TEST
    398 
    399 TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
    400   EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
    401 }
    402 
    403 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
    404   EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
    405 }
    406 
    407 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
    408   EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
    409 }
    410 
    411 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
    412   EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
    413 }
    414 
    415 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
    416   const char* small_text = "  ";
    417   EncodeText(small_text,
    418              /* interleaved = */ false,
    419              /* target matching = */ false);
    420   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
    421   // Data for ADDs
    422   ExpectDataString(small_text);
    423   // Instructions and sizes
    424   ExpectAddInstructionForStringLength(small_text);
    425 }
    426 
    427 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
    428   const char* small_text = "  ";
    429   EncodeText(small_text,
    430              /* interleaved = */ true,
    431              /* target matching = */ false);
    432   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
    433   // Interleaved section
    434   ExpectAddInstructionForStringLength(small_text);
    435   ExpectDataString(small_text);
    436 }
    437 
    438 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
    439   Encode(/* interleaved = */ false, /* target matching = */ false);
    440   // Data for ADDs
    441   ExpectDataStringWithBlockSpacing("W", false);
    442   ExpectDataByte('t');
    443   ExpectDataByte('s');
    444   ExpectDataByte('m');
    445   // Instructions and sizes
    446   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    447                                 VCD_SELF_MODE)) {
    448     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    449   }
    450   ExpectAddInstruction(1);
    451   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    452   ExpectCopyInstruction(11 * kBlockSize,
    453                         default_cache_.FirstNearMode());
    454   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    455     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    456   }
    457   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    458     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    459   }
    460   // Addresses for COPY
    461   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    462   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    463   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    464   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    465   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    466   VerifySizes();
    467 }
    468 
    469 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
    470   Encode(/* interleaved = */ true, /* target matching = */ false);
    471   // Interleaved section
    472   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    473                                 VCD_SELF_MODE)) {
    474     ExpectDataStringWithBlockSpacing("W", false);
    475     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    476   } else {
    477     ExpectDataStringWithBlockSpacing("W", false);
    478   }
    479   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    480   ExpectAddInstruction(1);
    481   ExpectDataByte('t');
    482   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    483   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    484   ExpectCopyInstruction(11 * kBlockSize,
    485                         default_cache_.FirstNearMode());
    486   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    487   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    488     ExpectDataByte('s');
    489     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    490   } else {
    491     ExpectDataByte('s');
    492   }
    493   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    494   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    495     ExpectDataByte('m');
    496     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    497   } else {
    498     ExpectDataByte('m');
    499   }
    500   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    501   VerifySizes();
    502 }
    503 
    504 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
    505   Encode(/* interleaved = */ false, /* target matching = */ true);
    506   // Data for ADDs
    507   ExpectDataStringWithBlockSpacing("W", false);
    508   ExpectDataByte('t');
    509   ExpectDataByte('s');
    510   ExpectDataByte('m');
    511   // Instructions and sizes
    512   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    513                                 VCD_SELF_MODE)) {
    514     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    515   }
    516   ExpectAddInstruction(1);
    517   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    518   ExpectCopyInstruction(11 * kBlockSize,
    519                         default_cache_.FirstNearMode());
    520   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    521     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    522   }
    523   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    524     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    525   }
    526   // Addresses for COPY
    527   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    528   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    529   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    530   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    531   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    532   VerifySizes();
    533 }
    534 
    535 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
    536   Encode(/* interleaved = */ true, /* target matching = */ false);
    537   // Interleaved section
    538   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    539                                 VCD_SELF_MODE)) {
    540     ExpectDataStringWithBlockSpacing("W", false);
    541     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    542   } else {
    543     ExpectDataStringWithBlockSpacing("W", false);
    544   }
    545   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    546   ExpectAddInstruction(1);
    547   ExpectDataByte('t');
    548   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    549   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    550   ExpectCopyInstruction(11 * kBlockSize,
    551                         default_cache_.FirstNearMode());
    552   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    553   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    554     ExpectDataByte('s');
    555     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    556   } else {
    557     ExpectDataByte('s');
    558   }
    559   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    560   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    561     ExpectDataByte('m');
    562     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    563   } else {
    564     ExpectDataByte('m');
    565   }
    566   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    567   VerifySizes();
    568 }
    569 
    570 // This test case takes a dictionary containing several instances of the string
    571 // "weasel", and a target string which is identical to the dictionary
    572 // except that all instances of "weasel" have been replaced with the string
    573 // "moon-pie".  It tests that COPY instructions are generated for all
    574 // boilerplate text (that is, the text between the "moon-pie" instances in
    575 // the target) and, if target matching is enabled, that each instance of
    576 // "moon-pie" (except the first one) is encoded using a COPY instruction
    577 // rather than an ADD.
    578 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
    579  protected:
    580   // kCompressibleTestBlockSize:
    581   // The size of the block to create for each letter in the
    582   // dictionary and search string for the "compressible text" test.
    583   // See MakeEachLetterABlock, below.
    584   // If we use kCompressibleTestBlockSize = kBlockSize, then the
    585   // encoder will find one match per unique letter in the HTML text.
    586   // There are too many examples of "<" in the text for the encoder
    587   // to iterate through them all, and some matches are not found.
    588   // If we use kCompressibleTextBlockSize = 1, then the boilerplate
    589   // text between "weasel" strings in the dictionary and "moon-pie"
    590   // strings in the target may not be long enough to be found by
    591   // the encoder's block-hash algorithm.  A good value, that will give
    592   // reproducible results across all block sizes, will be somewhere
    593   // in between these extremes.
    594   static const int kCompressibleTestBlockSize = kBlockSize / 4;
    595   static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
    596 
    597   WeaselsToMoonpiesTest() :
    598       engine_(dictionary_, strlen(dictionary_)),
    599       match_index_(0),
    600       search_dictionary_(dictionary_, strlen(dictionary_)),
    601       copied_moonpie_address_(0) {
    602     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
    603     weasel_positions_[0] = 0;
    604     after_weasel_[0] = 0;
    605     moonpie_positions_[0] = 0;
    606     after_moonpie_[0] = 0;
    607   }
    608 
    609   virtual ~WeaselsToMoonpiesTest() { }
    610 
    611   static void SetUpTestCase() {
    612     MakeEachLetterABlock(dictionary_without_spaces_,
    613                          &dictionary_,
    614                          kCompressibleTestBlockSize,
    615                          false);
    616     MakeEachLetterABlock(target_without_spaces_,
    617                          &target_,
    618                          kCompressibleTestBlockSize,
    619                          false);
    620     MakeEachLetterABlock(weasel_text_without_spaces_,
    621                          &weasel_text_,
    622                          kCompressibleTestBlockSize,
    623                          true);
    624     MakeEachLetterABlock(moonpie_text_without_spaces_,
    625                          &moonpie_text_,
    626                          kCompressibleTestBlockSize,
    627                          true);
    628   }
    629 
    630   static void TearDownTestCase() {
    631     delete[] dictionary_;
    632     delete[] target_;
    633     delete[] weasel_text_;
    634     delete[] moonpie_text_;
    635   }
    636 
    637   // text must be NULL-terminated
    638   void EncodeText(const char* text, bool interleaved, bool target_matching) {
    639     interleaved_ = interleaved;
    640     VCDiffCodeTableWriter coder(interleaved);
    641     coder.Init(engine_.dictionary_size());
    642     engine_.Encode(text,
    643                    strlen(text),
    644                    target_matching,
    645                    &diff_output_string_,
    646                    &coder);
    647     actual_match_counts_ = coder.match_counts();
    648   }
    649 
    650   void Encode(bool interleaved, bool target_matching) {
    651     EncodeText(target_, interleaved, target_matching);
    652     VerifyHeader();
    653   }
    654 
    655   void VerifyHeader() {
    656     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
    657   }
    658 
    659   void ExpectCopyForSize(size_t size, int mode) {
    660     ExpectCopyInstruction(static_cast<int>(size), mode);
    661   }
    662 
    663   void ExpectAddForSize(size_t size) {
    664     ExpectAddInstruction(static_cast<int>(size));
    665   }
    666 
    667   void ExpectAddressVarintForSize(size_t value) {
    668     ExpectAddressVarint(static_cast<int32_t>(value));
    669   }
    670 
    671   void FindNextMoonpie(bool include_trailing_spaces) {
    672     ++match_index_;
    673     SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
    674                                                      AfterLastWeasel()));
    675     if (CurrentWeaselPosition() == string::npos) {
    676       SetCurrentMoonpiePosition(string::npos);
    677     } else {
    678       SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
    679                                         + strlen(weasel_text_)
    680                                         + (include_trailing_spaces ?
    681                                             kTrailingSpaces : 0));
    682       SetCurrentMoonpiePosition(AfterLastMoonpie()
    683                                     + CurrentBoilerplateLength());
    684       SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
    685                                         + strlen(moonpie_text_)
    686                                         + (include_trailing_spaces ?
    687                                             kTrailingSpaces : 0));
    688     }
    689   }
    690   bool NoMoreMoonpies() const {
    691     return CurrentMoonpiePosition() == string::npos;
    692   }
    693   size_t CurrentWeaselPosition() const {
    694     return weasel_positions_[match_index_];
    695   }
    696   size_t LastWeaselPosition() const {
    697     return weasel_positions_[match_index_ - 1];
    698   }
    699   size_t CurrentMoonpiePosition() const {
    700     return moonpie_positions_[match_index_];
    701   }
    702   size_t LastMoonpiePosition() const {
    703     return moonpie_positions_[match_index_ - 1];
    704   }
    705   size_t AfterLastWeasel() const {
    706     CHECK_GE(match_index_, 1);
    707     return after_weasel_[match_index_ - 1];
    708   }
    709   size_t AfterPreviousWeasel() const {
    710     CHECK_GE(match_index_, 2);
    711     return after_weasel_[match_index_ - 2];
    712   }
    713   size_t AfterLastMoonpie() const {
    714     CHECK_GE(match_index_, 1);
    715     return after_moonpie_[match_index_ - 1];
    716   }
    717   size_t AfterPreviousMoonpie() const {
    718     CHECK_GE(match_index_, 2);
    719     return after_moonpie_[match_index_ - 2];
    720   }
    721 
    722   void SetCurrentWeaselPosition(size_t value) {
    723     weasel_positions_[match_index_] = value;
    724   }
    725   void SetCurrentAfterWeaselPosition(size_t value) {
    726     after_weasel_[match_index_] = value;
    727   }
    728   void SetCurrentMoonpiePosition(size_t value) {
    729     moonpie_positions_[match_index_] = value;
    730   }
    731   void SetCurrentAfterMoonpiePosition(size_t value) {
    732     after_moonpie_[match_index_] = value;
    733   }
    734 
    735   // Find the length of the text in between the "weasel" strings in the
    736   // compressible dictionary, which is the same as the text between
    737   // the "moon-pie" strings in the compressible target.
    738   size_t CurrentBoilerplateLength() const {
    739     CHECK_GE(match_index_, 1);
    740     return CurrentWeaselPosition() - AfterLastWeasel();
    741   }
    742   size_t DistanceFromLastWeasel() const {
    743     CHECK_GE(match_index_, 1);
    744     return CurrentWeaselPosition() - LastWeaselPosition();
    745   }
    746   size_t DistanceFromLastMoonpie() const {
    747     CHECK_GE(match_index_, 1);
    748     return CurrentMoonpiePosition() - LastMoonpiePosition();
    749   }
    750   size_t DistanceBetweenLastTwoWeasels() const {
    751     CHECK_GE(match_index_, 2);
    752     return AfterLastWeasel() - AfterPreviousWeasel();
    753   }
    754   size_t DistanceBetweenLastTwoMoonpies() const {
    755     CHECK_GE(match_index_, 2);
    756     return AfterLastMoonpie() - AfterPreviousMoonpie();
    757   }
    758 
    759   int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
    760   int UpdateCopyModeForMoonpie(int copy_mode) const;
    761   int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
    762 
    763   void CopyBoilerplateAndAddMoonpie(int copy_mode);
    764   void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
    765 
    766   static const char dictionary_without_spaces_[];
    767   static const char target_without_spaces_[];
    768   static const char weasel_text_without_spaces_[];
    769   static const char moonpie_text_without_spaces_[];
    770 
    771   static const char* dictionary_;
    772   static const char* target_;
    773   static const char* weasel_text_;
    774   static const char* moonpie_text_;
    775 
    776   const VCDiffEngine engine_;
    777   size_t weasel_positions_[128];
    778   size_t after_weasel_[128];
    779   size_t moonpie_positions_[128];
    780   size_t after_moonpie_[128];
    781   int match_index_;
    782   string search_dictionary_;
    783   size_t copied_moonpie_address_;
    784 };
    785 
    786 // Care is taken in the formulation of the dictionary
    787 // to ensure that the surrounding letters do not match; for example,
    788 // there are not two instances of the string "weasels".  Otherwise,
    789 // the matching behavior would not be as predictable.
    790 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
    791   "<html>\n"
    792   "<head>\n"
    793   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
    794   "http-equiv=\"content-type\">\n"
    795   "<title>All about weasels</title>\n"
    796   "</head>\n"
    797   "<!-- You will notice that the word \"weasel\" may be replaced"
    798   " with something else -->\n"
    799   "<body>\n"
    800   "<h1>All about the weasel: highly compressible HTML text</h1>"
    801   "<ul>\n"
    802   "<li>Don\'t look a gift weasel in its mouth.</li>\n"
    803   "<li>This item makes sure the next occurrence is found.</li>\n"
    804   "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
    805   "</ul>\n"
    806   "<br>\n"
    807   "</body>\n"
    808   "</html>\n";
    809 
    810 const char WeaselsToMoonpiesTest::target_without_spaces_[] =
    811   "<html>\n"
    812   "<head>\n"
    813   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
    814   "http-equiv=\"content-type\">\n"
    815   "<title>All about moon-pies</title>\n"
    816   "</head>\n"
    817   "<!-- You will notice that the word \"moon-pie\" may be replaced"
    818   " with something else -->\n"
    819   "<body>\n"
    820   "<h1>All about the moon-pie: highly compressible HTML text</h1>"
    821   "<ul>\n"
    822   "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
    823   "<li>This item makes sure the next occurrence is found.</li>\n"
    824   "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
    825   "</ul>\n"
    826   "<br>\n"
    827   "</body>\n"
    828   "</html>\n";
    829 
    830 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
    831 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
    832 
    833 const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
    834 const char* WeaselsToMoonpiesTest::target_ = NULL;
    835 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
    836 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
    837 
    838 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
    839     int copy_mode) const {
    840   size_t copy_address = 0;
    841   if (default_cache_.IsSelfMode(copy_mode)) {
    842     copy_address = AfterLastWeasel();
    843   } else if (default_cache_.IsNearMode(copy_mode)) {
    844     copy_address = DistanceBetweenLastTwoWeasels();
    845   } else if (default_cache_.IsSameMode(copy_mode)) {
    846     copy_address = AfterLastWeasel() % 256;
    847   }
    848   return static_cast<int32_t>(copy_address);
    849 }
    850 
    851 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
    852   if (copy_mode == default_cache_.FirstSameMode()) {
    853     return default_cache_.FirstSameMode()
    854         + static_cast<int>((copied_moonpie_address_ / 256) % 3);
    855   } else {
    856     return copy_mode;
    857   }
    858 }
    859 
    860 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
    861     int copy_mode) const {
    862   size_t copy_address = 0;
    863   if (default_cache_.IsHereMode(copy_mode)) {
    864     copy_address = DistanceFromLastMoonpie();
    865   } else if (default_cache_.IsNearMode(copy_mode)) {
    866     copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
    867   } else if (default_cache_.IsSameMode(copy_mode)) {
    868     copy_address = copied_moonpie_address_ % 256;
    869   }
    870   return static_cast<int32_t>(copy_address);
    871 }
    872 
    873 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
    874 // in the encoding.
    875 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
    876   EXPECT_FALSE(NoMoreMoonpies());
    877   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
    878   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
    879   ExpectAddInstructionForStringLength(moonpie_text_);
    880   ExpectDataString(moonpie_text_);
    881 }
    882 
    883 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
    884 // in the encoding.  The "moon-pie" text will be copied from the previously
    885 // encoded target.
    886 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
    887     int copy_mode,
    888     int moonpie_copy_mode) {
    889   EXPECT_FALSE(NoMoreMoonpies());
    890   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
    891   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
    892   moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
    893   ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
    894   ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
    895                 moonpie_copy_mode);
    896   copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
    897 }
    898 
    899 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
    900   Encode(/* interleaved = */ true, /* target matching = */ false);
    901   FindNextMoonpie(false);
    902   // Expect all five "weasel"s to be replaced with "moon-pie"s
    903   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
    904   FindNextMoonpie(false);
    905   CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
    906   FindNextMoonpie(false);
    907   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
    908   FindNextMoonpie(false);
    909   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
    910   FindNextMoonpie(false);
    911   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
    912   FindNextMoonpie(false);
    913   EXPECT_TRUE(NoMoreMoonpies());
    914   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
    915                     default_cache_.FirstNearMode());
    916   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
    917   VerifySizes();
    918 }
    919 
    920 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
    921   Encode(/* interleaved = */ true, /* target matching = */ true);
    922   // Expect all five "weasel"s to be replaced with "moon-pie"s.
    923   // Every "moon-pie" after the first one should be copied from the
    924   // previously encoded target text.
    925   FindNextMoonpie(false);
    926   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
    927   FindNextMoonpie(true);
    928   CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
    929   FindNextMoonpie(true);
    930   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
    931                                 default_cache_.FirstSameMode());
    932   FindNextMoonpie(true);
    933   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
    934                                 VCD_HERE_MODE);
    935   FindNextMoonpie(true);
    936   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
    937                                 default_cache_.FirstSameMode());
    938   FindNextMoonpie(true);
    939   EXPECT_TRUE(NoMoreMoonpies());
    940   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
    941                     default_cache_.FirstNearMode() + 3);
    942   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
    943   VerifySizes();
    944 }
    945 
    946 }  //  anonymous namespace
    947 }  //  namespace open-vcdiff
    948