Home | History | Annotate | Download | only in src
      1 // Copyright 2008 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include <config.h>
     17 #include "vcdiffengine.h"
     18 #include <string.h>  // memset, strlen
     19 #include <algorithm>
     20 #include <string>
     21 #include "addrcache.h"
     22 #include "blockhash.h"
     23 #include "encodetable.h"
     24 #include "google/output_string.h"
     25 #include "rolling_hash.h"
     26 #include "testing.h"
     27 #include "varint_bigendian.h"
     28 #include "vcdiff_defs.h"
     29 
     30 namespace open_vcdiff {
     31 
     32 namespace {
     33 
     34 class VCDiffEngineTestBase : public testing::Test {
     35  protected:
     36   typedef std::string string;
     37 
     38   // Some common definitions and helper functions used in the various tests
     39   // for VCDiffEngine.
     40   static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
     41 
     42   VCDiffEngineTestBase() : interleaved_(false),
     43                            diff_output_string_(&diff_),
     44                            verify_position_(0),
     45                            saved_total_size_position_(0),
     46                            saved_delta_encoding_position_(0),
     47                            saved_section_sizes_position_(0),
     48                            data_bytes_(0),
     49                            instruction_bytes_(0),
     50                            address_bytes_(0) { }
     51 
     52   virtual ~VCDiffEngineTestBase() { }
     53 
     54   virtual void TearDown() {
     55   }
     56 
     57   // Copy string_without_spaces into newly allocated result buffer,
     58   // but pad its contents with space characters so that every character
     59   // in string_without_spaces corresponds to (block_size - 1)
     60   // spaces in the result, followed by that character.
     61   // For example:
     62   // If string_without_spaces begins "The only thing"... and block_size is 4,
     63   // then 3 space characters will be inserted
     64   // between each letter in the result, as follows:
     65   // "   T   h   e       o   n   l   y       t   h   i   n   g"...
     66   // This makes testing simpler, because finding a block_size-byte match
     67   // between the dictionary and target only depends on the
     68   // trailing letter in each block.
     69   // If no_initial_padding is true, then the first letter will not have
     70   // spaces added in front of it.
     71   static void MakeEachLetterABlock(const char* string_without_spaces,
     72                                    const char** result,
     73                                    int block_size,
     74                                    bool no_initial_padding) {
     75     const size_t length_without_spaces = strlen(string_without_spaces);
     76     char* padded_text = new char[(block_size * length_without_spaces) + 1];
     77     memset(padded_text, ' ', block_size * length_without_spaces);
     78     char* padded_text_ptr = padded_text;
     79     if (!no_initial_padding) {
     80       padded_text_ptr += block_size - 1;
     81     }
     82     for (size_t i = 0; i < length_without_spaces; ++i) {
     83       *padded_text_ptr = string_without_spaces[i];
     84       padded_text_ptr += block_size;
     85     }
     86     *(padded_text_ptr - block_size + 1)  = '\0';
     87     *result = padded_text;
     88   }
     89 
     90   // These functions iterate through the decoded output and expect
     91   // simple elements: bytes or variable-length integers.
     92   void ExpectByte(char byte) {
     93     EXPECT_GT(diff_.size(), verify_position_);
     94     EXPECT_EQ(byte, diff_[verify_position_]);
     95     ++verify_position_;
     96   }
     97 
     98   size_t ExpectVarint(int32_t expected_value) {
     99     EXPECT_GT(diff_.size(), verify_position_);
    100     const char* const original_position = &diff_[verify_position_];
    101     const char* new_position = original_position;
    102     const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
    103     int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
    104                                                     &new_position);
    105     EXPECT_LE(0, parsed_value);
    106     size_t parsed_length = new_position - original_position;
    107     EXPECT_EQ(expected_value, parsed_value);
    108     EXPECT_EQ(expected_length, parsed_length);
    109     verify_position_ += parsed_length;
    110     return parsed_length;
    111   }
    112 
    113   size_t ExpectSize(size_t size) {
    114     return ExpectVarint(static_cast<int32_t>(size));
    115   }
    116 
    117   size_t ExpectStringLength(const char* s) {
    118     return ExpectSize(strlen(s));
    119   }
    120 
    121   void SkipVarint() {
    122     EXPECT_GT(diff_.size(), verify_position_);
    123     const char* const original_position = &diff_[verify_position_];
    124     const char* new_position = original_position;
    125     VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
    126     size_t parsed_length = new_position - original_position;
    127     verify_position_ += parsed_length;
    128   }
    129 
    130   void ExpectDataByte(char byte) {
    131     ExpectByte(byte);
    132     if (interleaved_) {
    133       ++instruction_bytes_;
    134     } else {
    135       ++data_bytes_;
    136     }
    137   }
    138 
    139   void ExpectDataString(const char *expected_string) {
    140     const char* ptr = expected_string;
    141     while (*ptr) {
    142       ExpectDataByte(*ptr);
    143       ++ptr;
    144     }
    145   }
    146 
    147   void ExpectDataStringWithBlockSpacing(const char *expected_string,
    148                                         bool trailing_spaces) {
    149     const char* ptr = expected_string;
    150     while (*ptr) {
    151       for (int i = 0; i < (kBlockSize - 1); ++i) {
    152         ExpectDataByte(' ');
    153       }
    154       ExpectDataByte(*ptr);
    155       ++ptr;
    156     }
    157     if (trailing_spaces) {
    158       for (int i = 0; i < (kBlockSize - 1); ++i) {
    159         ExpectDataByte(' ');
    160       }
    161     }
    162   }
    163 
    164   void ExpectInstructionByte(char byte) {
    165     ExpectByte(byte);
    166     ++instruction_bytes_;
    167   }
    168 
    169   void ExpectInstructionVarint(int32_t value) {
    170     instruction_bytes_ += ExpectVarint(value);
    171   }
    172 
    173   void ExpectAddressByte(char byte) {
    174     ExpectByte(byte);
    175     if (interleaved_) {
    176       ++instruction_bytes_;
    177     } else {
    178       ++address_bytes_;
    179     }
    180   }
    181 
    182   void ExpectAddressVarint(int32_t value) {
    183     if (interleaved_) {
    184       instruction_bytes_ += ExpectVarint(value);
    185     } else {
    186       address_bytes_ += ExpectVarint(value);
    187     }
    188   }
    189 
    190   // The following functions leverage the fact that the encoder uses
    191   // the default code table and cache sizes.  They are able to search for
    192   // instructions of a particular size.  The logic for mapping from
    193   // instruction type, mode, and size to opcode value is very different here
    194   // from the logic used in encodetable.{h,cc}, so hopefully
    195   // this version will help validate that the other is correct.
    196   // This version uses conditional statements, while encodetable.h
    197   // looks up values in a mapping table.
    198   void ExpectAddress(int32_t address, int copy_mode) {
    199     if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
    200       ExpectAddressVarint(address);
    201     } else {
    202       ExpectAddressByte(address);
    203     }
    204   }
    205 
    206   void ExpectAddInstruction(int size) {
    207     if (size <= 18) {
    208       ExpectInstructionByte(0x01 + size);
    209     } else {
    210       ExpectInstructionByte(0x01);
    211       ExpectInstructionVarint(size);
    212     }
    213   }
    214 
    215   void ExpectCopyInstruction(int size, int mode) {
    216     if ((size >= 4) && (size <= 16)) {
    217       ExpectInstructionByte(0x10 + (0x10 * mode) + size);
    218     } else {
    219       ExpectInstructionByte(0x13 + (0x10 * mode));
    220       ExpectInstructionVarint(size);
    221     }
    222   }
    223 
    224   bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
    225     if (!default_cache_.IsSameMode(copy_mode) &&
    226         (add_size <= 4) &&
    227         (copy_size >= 4) &&
    228         (copy_size <= 6)) {
    229       ExpectInstructionByte(0x9C +
    230                             (0x0C * copy_mode) +
    231                             (0x03 * add_size) +
    232                             copy_size);
    233       return true;
    234     } else if (default_cache_.IsSameMode(copy_mode) &&
    235                (add_size <= 4) &&
    236                (copy_size == 4)) {
    237       ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
    238       return true;
    239     } else {
    240       ExpectAddInstruction(add_size);
    241       return false;
    242     }
    243   }
    244 
    245   void ExpectAddInstructionForStringLength(const char* s) {
    246     ExpectAddInstruction(static_cast<int>(strlen(s)));
    247   }
    248 
    249   // Call this function before beginning to iterate through the diff string
    250   // using the Expect... functions.
    251   // text must be NULL-terminated.
    252   void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
    253                                               const char* target_text) {
    254     ExpectByte(0x01);  // Win_Indicator: VCD_SOURCE (dictionary)
    255     ExpectStringLength(dictionary);
    256     ExpectByte(0x00);  // Source segment position: start of dictionary
    257     saved_total_size_position_ = verify_position_;
    258     SkipVarint();  // Length of the delta encoding
    259     saved_delta_encoding_position_ = verify_position_;
    260     ExpectStringLength(target_text);
    261     ExpectByte(0x00);  // Delta_indicator (no compression)
    262     saved_section_sizes_position_ = verify_position_;
    263     SkipVarint();  // length of data for ADDs and RUNs
    264     SkipVarint();  // length of instructions section
    265     SkipVarint();  // length of addresses for COPYs
    266   }
    267 
    268   // Call this function before beginning to iterating through the entire
    269   // diff string using the Expect... functions.  It makes sure that the
    270   // size totals in the window header match the number of bytes that
    271   // were parsed.
    272   void VerifySizes() {
    273     EXPECT_EQ(verify_position_, diff_.size());
    274     const size_t delta_encoding_size = verify_position_ -
    275                                        saved_delta_encoding_position_;
    276     verify_position_ = saved_total_size_position_;
    277     ExpectSize(delta_encoding_size);
    278     verify_position_ = saved_section_sizes_position_;
    279     ExpectSize(data_bytes_);
    280     ExpectSize(instruction_bytes_);
    281     ExpectSize(address_bytes_);
    282   }
    283 
    284   bool interleaved_;
    285   string diff_;
    286   OutputString<string> diff_output_string_;
    287   size_t verify_position_;
    288   size_t saved_total_size_position_;
    289   size_t saved_delta_encoding_position_;
    290   size_t saved_section_sizes_position_;
    291   size_t data_bytes_;
    292   size_t instruction_bytes_;
    293   size_t address_bytes_;
    294   VCDiffAddressCache default_cache_;  // Used for finding mode values
    295 };
    296 
    297 class VCDiffEngineTest : public VCDiffEngineTestBase {
    298  protected:
    299   VCDiffEngineTest() :
    300       engine_(dictionary_, strlen(dictionary_)) {
    301     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
    302   }
    303 
    304   virtual ~VCDiffEngineTest() { }
    305 
    306 
    307   static void SetUpTestCase() {
    308     MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
    309                          kBlockSize, false);
    310     MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
    311   }
    312 
    313   static void TearDownTestCase() {
    314     delete[] dictionary_;
    315     delete[] target_;
    316   }
    317 
    318   void EncodeNothing(bool interleaved, bool target_matching) {
    319     interleaved_ = interleaved;
    320     VCDiffCodeTableWriter coder(interleaved);
    321     coder.Init(engine_.dictionary_size());
    322     engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
    323     EXPECT_TRUE(diff_.empty());
    324   }
    325 
    326   // text must be NULL-terminated
    327   void EncodeText(const char* text, bool interleaved, bool target_matching) {
    328     interleaved_ = interleaved;
    329     VCDiffCodeTableWriter coder(interleaved);
    330     coder.Init(engine_.dictionary_size());
    331     engine_.Encode(text,
    332                    strlen(text),
    333                    target_matching,
    334                    &diff_output_string_,
    335                    &coder);
    336   }
    337 
    338   void Encode(bool interleaved, bool target_matching) {
    339     EncodeText(target_, interleaved, target_matching);
    340     VerifyHeader();
    341   }
    342 
    343   void VerifyHeader() {
    344     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
    345   }
    346 
    347   static const char dictionary_without_spaces_[];
    348   static const char target_without_spaces_[];
    349 
    350   static const char* dictionary_;
    351   static const char* target_;
    352 
    353   const VCDiffEngine engine_;
    354 };
    355 
    356 #ifdef GTEST_HAS_DEATH_TEST
    357 typedef VCDiffEngineTest VCDiffEngineDeathTest;
    358 #endif  // GTEST_HAS_DEATH_TEST
    359 
    360 const char VCDiffEngineTest::dictionary_without_spaces_[] =
    361     "The only thing we have to fear is fear itself";
    362 
    363 const char VCDiffEngineTest::target_without_spaces_[] =
    364     "What we hear is fearsome";
    365 
    366 const char* VCDiffEngineTest::dictionary_ = NULL;
    367 const char* VCDiffEngineTest::target_ = NULL;
    368 
    369 #ifdef GTEST_HAS_DEATH_TEST
    370 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
    371   EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
    372                      "twice");
    373 }
    374 #endif  // GTEST_HAS_DEATH_TEST
    375 
    376 TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
    377   EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
    378 }
    379 
    380 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
    381   EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
    382 }
    383 
    384 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
    385   EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
    386 }
    387 
    388 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
    389   EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
    390 }
    391 
    392 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
    393   const char* small_text = "  ";
    394   EncodeText(small_text,
    395              /* interleaved = */ false,
    396              /* target matching = */ false);
    397   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
    398   // Data for ADDs
    399   ExpectDataString(small_text);
    400   // Instructions and sizes
    401   ExpectAddInstructionForStringLength(small_text);
    402 }
    403 
    404 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
    405   const char* small_text = "  ";
    406   EncodeText(small_text,
    407              /* interleaved = */ true,
    408              /* target matching = */ false);
    409   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
    410   // Interleaved section
    411   ExpectAddInstructionForStringLength(small_text);
    412   ExpectDataString(small_text);
    413 }
    414 
    415 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
    416   Encode(/* interleaved = */ false, /* target matching = */ false);
    417   // Data for ADDs
    418   ExpectDataStringWithBlockSpacing("W", false);
    419   ExpectDataByte('t');
    420   ExpectDataByte('s');
    421   ExpectDataByte('m');
    422   // Instructions and sizes
    423   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    424                                 VCD_SELF_MODE)) {
    425     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    426   }
    427   ExpectAddInstruction(1);
    428   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    429   ExpectCopyInstruction(11 * kBlockSize,
    430                         default_cache_.FirstNearMode());
    431   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    432     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    433   }
    434   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    435     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    436   }
    437   // Addresses for COPY
    438   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    439   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    440   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    441   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    442   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    443   VerifySizes();
    444 }
    445 
    446 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
    447   Encode(/* interleaved = */ true, /* target matching = */ false);
    448   // Interleaved section
    449   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    450                                 VCD_SELF_MODE)) {
    451     ExpectDataStringWithBlockSpacing("W", false);
    452     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    453   } else {
    454     ExpectDataStringWithBlockSpacing("W", false);
    455   }
    456   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    457   ExpectAddInstruction(1);
    458   ExpectDataByte('t');
    459   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    460   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    461   ExpectCopyInstruction(11 * kBlockSize,
    462                         default_cache_.FirstNearMode());
    463   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    464   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    465     ExpectDataByte('s');
    466     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    467   } else {
    468     ExpectDataByte('s');
    469   }
    470   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    471   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    472     ExpectDataByte('m');
    473     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    474   } else {
    475     ExpectDataByte('m');
    476   }
    477   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    478   VerifySizes();
    479 }
    480 
    481 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
    482   Encode(/* interleaved = */ false, /* target matching = */ true);
    483   // Data for ADDs
    484   ExpectDataStringWithBlockSpacing("W", false);
    485   ExpectDataByte('t');
    486   ExpectDataByte('s');
    487   ExpectDataByte('m');
    488   // Instructions and sizes
    489   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    490                                 VCD_SELF_MODE)) {
    491     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    492   }
    493   ExpectAddInstruction(1);
    494   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    495   ExpectCopyInstruction(11 * kBlockSize,
    496                         default_cache_.FirstNearMode());
    497   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    498     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    499   }
    500   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    501     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    502   }
    503   // Addresses for COPY
    504   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    505   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    506   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    507   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    508   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    509   VerifySizes();
    510 }
    511 
    512 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
    513   Encode(/* interleaved = */ true, /* target matching = */ true);
    514   // Interleaved section
    515   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
    516                                 VCD_SELF_MODE)) {
    517     ExpectDataStringWithBlockSpacing("W", false);
    518     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
    519   } else {
    520     ExpectDataStringWithBlockSpacing("W", false);
    521   }
    522   ExpectAddressVarint(18 * kBlockSize);  // "ha"
    523   ExpectAddInstruction(1);
    524   ExpectDataByte('t');
    525   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
    526   ExpectAddressVarint(14 * kBlockSize);  // " we h"
    527   ExpectCopyInstruction(11 * kBlockSize,
    528                         default_cache_.FirstNearMode());
    529   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
    530   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
    531     ExpectDataByte('s');
    532     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
    533   } else {
    534     ExpectDataByte('s');
    535   }
    536   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
    537   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
    538     ExpectDataByte('m');
    539     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
    540   } else {
    541     ExpectDataByte('m');
    542   }
    543   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
    544   VerifySizes();
    545 }
    546 
    547 // This test case takes a dictionary containing several instances of the string
    548 // "weasel", and a target string which is identical to the dictionary
    549 // except that all instances of "weasel" have been replaced with the string
    550 // "moon-pie".  It tests that COPY instructions are generated for all
    551 // boilerplate text (that is, the text between the "moon-pie" instances in
    552 // the target) and, if target matching is enabled, that each instance of
    553 // "moon-pie" (except the first one) is encoded using a COPY instruction
    554 // rather than an ADD.
    555 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
    556  protected:
    557   // kCompressibleTestBlockSize:
    558   // The size of the block to create for each letter in the
    559   // dictionary and search string for the "compressible text" test.
    560   // See MakeEachLetterABlock, below.
    561   // If we use kCompressibleTestBlockSize = kBlockSize, then the
    562   // encoder will find one match per unique letter in the HTML text.
    563   // There are too many examples of "<" in the text for the encoder
    564   // to iterate through them all, and some matches are not found.
    565   // If we use kCompressibleTextBlockSize = 1, then the boilerplate
    566   // text between "weasel" strings in the dictionary and "moon-pie"
    567   // strings in the target may not be long enough to be found by
    568   // the encoder's block-hash algorithm.  A good value, that will give
    569   // reproducible results across all block sizes, will be somewhere
    570   // in between these extremes.
    571   static const int kCompressibleTestBlockSize = kBlockSize / 4;
    572   static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
    573 
    574   WeaselsToMoonpiesTest() :
    575       engine_(dictionary_, strlen(dictionary_)),
    576       match_index_(0),
    577       search_dictionary_(dictionary_, strlen(dictionary_)),
    578       copied_moonpie_address_(0) {
    579     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
    580     weasel_positions_[0] = 0;
    581     after_weasel_[0] = 0;
    582     moonpie_positions_[0] = 0;
    583     after_moonpie_[0] = 0;
    584   }
    585 
    586   virtual ~WeaselsToMoonpiesTest() { }
    587 
    588   static void SetUpTestCase() {
    589     MakeEachLetterABlock(dictionary_without_spaces_,
    590                          &dictionary_,
    591                          kCompressibleTestBlockSize,
    592                          false);
    593     MakeEachLetterABlock(target_without_spaces_,
    594                          &target_,
    595                          kCompressibleTestBlockSize,
    596                          false);
    597     MakeEachLetterABlock(weasel_text_without_spaces_,
    598                          &weasel_text_,
    599                          kCompressibleTestBlockSize,
    600                          true);
    601     MakeEachLetterABlock(moonpie_text_without_spaces_,
    602                          &moonpie_text_,
    603                          kCompressibleTestBlockSize,
    604                          true);
    605   }
    606 
    607   static void TearDownTestCase() {
    608     delete[] dictionary_;
    609     delete[] target_;
    610     delete[] weasel_text_;
    611     delete[] moonpie_text_;
    612   }
    613 
    614   // text must be NULL-terminated
    615   void EncodeText(const char* text, bool interleaved, bool target_matching) {
    616     interleaved_ = interleaved;
    617     VCDiffCodeTableWriter coder(interleaved);
    618     coder.Init(engine_.dictionary_size());
    619     engine_.Encode(text,
    620                    strlen(text),
    621                    target_matching,
    622                    &diff_output_string_,
    623                    &coder);
    624   }
    625 
    626   void Encode(bool interleaved, bool target_matching) {
    627     EncodeText(target_, interleaved, target_matching);
    628     VerifyHeader();
    629   }
    630 
    631   void VerifyHeader() {
    632     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
    633   }
    634 
    635   void ExpectCopyForSize(size_t size, int mode) {
    636     ExpectCopyInstruction(static_cast<int>(size), mode);
    637   }
    638 
    639   void ExpectAddForSize(size_t size) {
    640     ExpectAddInstruction(static_cast<int>(size));
    641   }
    642 
    643   void ExpectAddressVarintForSize(size_t value) {
    644     ExpectAddressVarint(static_cast<int32_t>(value));
    645   }
    646 
    647   void FindNextMoonpie(bool include_trailing_spaces) {
    648     ++match_index_;
    649     SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
    650                                                      AfterLastWeasel()));
    651     if (CurrentWeaselPosition() == string::npos) {
    652       SetCurrentMoonpiePosition(string::npos);
    653     } else {
    654       SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
    655                                         + strlen(weasel_text_)
    656                                         + (include_trailing_spaces ?
    657                                             kTrailingSpaces : 0));
    658       SetCurrentMoonpiePosition(AfterLastMoonpie()
    659                                     + CurrentBoilerplateLength());
    660       SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
    661                                         + strlen(moonpie_text_)
    662                                         + (include_trailing_spaces ?
    663                                             kTrailingSpaces : 0));
    664     }
    665   }
    666   bool NoMoreMoonpies() const {
    667     return CurrentMoonpiePosition() == string::npos;
    668   }
    669   size_t CurrentWeaselPosition() const {
    670     return weasel_positions_[match_index_];
    671   }
    672   size_t LastWeaselPosition() const {
    673     return weasel_positions_[match_index_ - 1];
    674   }
    675   size_t CurrentMoonpiePosition() const {
    676     return moonpie_positions_[match_index_];
    677   }
    678   size_t LastMoonpiePosition() const {
    679     return moonpie_positions_[match_index_ - 1];
    680   }
    681   size_t AfterLastWeasel() const {
    682     CHECK_GE(match_index_, 1);
    683     return after_weasel_[match_index_ - 1];
    684   }
    685   size_t AfterPreviousWeasel() const {
    686     CHECK_GE(match_index_, 2);
    687     return after_weasel_[match_index_ - 2];
    688   }
    689   size_t AfterLastMoonpie() const {
    690     CHECK_GE(match_index_, 1);
    691     return after_moonpie_[match_index_ - 1];
    692   }
    693   size_t AfterPreviousMoonpie() const {
    694     CHECK_GE(match_index_, 2);
    695     return after_moonpie_[match_index_ - 2];
    696   }
    697 
    698   void SetCurrentWeaselPosition(size_t value) {
    699     weasel_positions_[match_index_] = value;
    700   }
    701   void SetCurrentAfterWeaselPosition(size_t value) {
    702     after_weasel_[match_index_] = value;
    703   }
    704   void SetCurrentMoonpiePosition(size_t value) {
    705     moonpie_positions_[match_index_] = value;
    706   }
    707   void SetCurrentAfterMoonpiePosition(size_t value) {
    708     after_moonpie_[match_index_] = value;
    709   }
    710 
    711   // Find the length of the text in between the "weasel" strings in the
    712   // compressible dictionary, which is the same as the text between
    713   // the "moon-pie" strings in the compressible target.
    714   size_t CurrentBoilerplateLength() const {
    715     CHECK_GE(match_index_, 1);
    716     return CurrentWeaselPosition() - AfterLastWeasel();
    717   }
    718   size_t DistanceFromLastWeasel() const {
    719     CHECK_GE(match_index_, 1);
    720     return CurrentWeaselPosition() - LastWeaselPosition();
    721   }
    722   size_t DistanceFromLastMoonpie() const {
    723     CHECK_GE(match_index_, 1);
    724     return CurrentMoonpiePosition() - LastMoonpiePosition();
    725   }
    726   size_t DistanceBetweenLastTwoWeasels() const {
    727     CHECK_GE(match_index_, 2);
    728     return AfterLastWeasel() - AfterPreviousWeasel();
    729   }
    730   size_t DistanceBetweenLastTwoMoonpies() const {
    731     CHECK_GE(match_index_, 2);
    732     return AfterLastMoonpie() - AfterPreviousMoonpie();
    733   }
    734 
    735   int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
    736   int UpdateCopyModeForMoonpie(int copy_mode) const;
    737   int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
    738 
    739   void CopyBoilerplateAndAddMoonpie(int copy_mode);
    740   void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
    741 
    742   static const char dictionary_without_spaces_[];
    743   static const char target_without_spaces_[];
    744   static const char weasel_text_without_spaces_[];
    745   static const char moonpie_text_without_spaces_[];
    746 
    747   static const char* dictionary_;
    748   static const char* target_;
    749   static const char* weasel_text_;
    750   static const char* moonpie_text_;
    751 
    752   const VCDiffEngine engine_;
    753   size_t weasel_positions_[128];
    754   size_t after_weasel_[128];
    755   size_t moonpie_positions_[128];
    756   size_t after_moonpie_[128];
    757   int match_index_;
    758   string search_dictionary_;
    759   size_t copied_moonpie_address_;
    760 };
    761 
    762 // Care is taken in the formulation of the dictionary
    763 // to ensure that the surrounding letters do not match; for example,
    764 // there are not two instances of the string "weasels".  Otherwise,
    765 // the matching behavior would not be as predictable.
    766 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
    767   "<html>\n"
    768   "<head>\n"
    769   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
    770   "http-equiv=\"content-type\">\n"
    771   "<title>All about weasels</title>\n"
    772   "</head>\n"
    773   "<!-- You will notice that the word \"weasel\" may be replaced"
    774   " with something else -->\n"
    775   "<body>\n"
    776   "<h1>All about the weasel: highly compressible HTML text</h1>"
    777   "<ul>\n"
    778   "<li>Don\'t look a gift weasel in its mouth.</li>\n"
    779   "<li>This item makes sure the next occurrence is found.</li>\n"
    780   "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
    781   "</ul>\n"
    782   "<br>\n"
    783   "</body>\n"
    784   "</html>\n";
    785 
    786 const char WeaselsToMoonpiesTest::target_without_spaces_[] =
    787   "<html>\n"
    788   "<head>\n"
    789   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
    790   "http-equiv=\"content-type\">\n"
    791   "<title>All about moon-pies</title>\n"
    792   "</head>\n"
    793   "<!-- You will notice that the word \"moon-pie\" may be replaced"
    794   " with something else -->\n"
    795   "<body>\n"
    796   "<h1>All about the moon-pie: highly compressible HTML text</h1>"
    797   "<ul>\n"
    798   "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
    799   "<li>This item makes sure the next occurrence is found.</li>\n"
    800   "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
    801   "</ul>\n"
    802   "<br>\n"
    803   "</body>\n"
    804   "</html>\n";
    805 
    806 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
    807 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
    808 
    809 const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
    810 const char* WeaselsToMoonpiesTest::target_ = NULL;
    811 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
    812 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
    813 
    814 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
    815     int copy_mode) const {
    816   size_t copy_address = 0;
    817   if (default_cache_.IsSelfMode(copy_mode)) {
    818     copy_address = AfterLastWeasel();
    819   } else if (default_cache_.IsNearMode(copy_mode)) {
    820     copy_address = DistanceBetweenLastTwoWeasels();
    821   } else if (default_cache_.IsSameMode(copy_mode)) {
    822     copy_address = AfterLastWeasel() % 256;
    823   }
    824   return static_cast<int32_t>(copy_address);
    825 }
    826 
    827 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
    828   if (copy_mode == default_cache_.FirstSameMode()) {
    829     return default_cache_.FirstSameMode()
    830         + static_cast<int>((copied_moonpie_address_ / 256) % 3);
    831   } else {
    832     return copy_mode;
    833   }
    834 }
    835 
    836 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
    837     int copy_mode) const {
    838   size_t copy_address = 0;
    839   if (default_cache_.IsHereMode(copy_mode)) {
    840     copy_address = DistanceFromLastMoonpie();
    841   } else if (default_cache_.IsNearMode(copy_mode)) {
    842     copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
    843   } else if (default_cache_.IsSameMode(copy_mode)) {
    844     copy_address = copied_moonpie_address_ % 256;
    845   }
    846   return static_cast<int32_t>(copy_address);
    847 }
    848 
    849 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
    850 // in the encoding.
    851 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
    852   EXPECT_FALSE(NoMoreMoonpies());
    853   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
    854   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
    855   ExpectAddInstructionForStringLength(moonpie_text_);
    856   ExpectDataString(moonpie_text_);
    857 }
    858 
    859 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
    860 // in the encoding.  The "moon-pie" text will be copied from the previously
    861 // encoded target.
    862 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
    863     int copy_mode,
    864     int moonpie_copy_mode) {
    865   EXPECT_FALSE(NoMoreMoonpies());
    866   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
    867   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
    868   moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
    869   ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
    870   ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
    871                 moonpie_copy_mode);
    872   copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
    873 }
    874 
    875 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
    876   Encode(/* interleaved = */ true, /* target matching = */ false);
    877   FindNextMoonpie(false);
    878   // Expect all five "weasel"s to be replaced with "moon-pie"s
    879   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
    880   FindNextMoonpie(false);
    881   CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
    882   FindNextMoonpie(false);
    883   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
    884   FindNextMoonpie(false);
    885   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
    886   FindNextMoonpie(false);
    887   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
    888   FindNextMoonpie(false);
    889   EXPECT_TRUE(NoMoreMoonpies());
    890   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
    891                     default_cache_.FirstNearMode());
    892   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
    893   VerifySizes();
    894 }
    895 
    896 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
    897   Encode(/* interleaved = */ true, /* target matching = */ true);
    898   // Expect all five "weasel"s to be replaced with "moon-pie"s.
    899   // Every "moon-pie" after the first one should be copied from the
    900   // previously encoded target text.
    901   FindNextMoonpie(false);
    902   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
    903   FindNextMoonpie(true);
    904   CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
    905   FindNextMoonpie(true);
    906   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
    907                                 default_cache_.FirstSameMode());
    908   FindNextMoonpie(true);
    909   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
    910                                 VCD_HERE_MODE);
    911   FindNextMoonpie(true);
    912   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
    913                                 default_cache_.FirstSameMode());
    914   FindNextMoonpie(true);
    915   EXPECT_TRUE(NoMoreMoonpies());
    916   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
    917                     default_cache_.FirstNearMode() + 3);
    918   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
    919   VerifySizes();
    920 }
    921 
    922 }  //  anonymous namespace
    923 }  //  namespace open-vcdiff
    924