Home | History | Annotate | Download | only in src
      1 // Copyright 2008 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include <config.h>
     17 #include "google/vcencoder.h"
     18 #include <stdlib.h>  // free, posix_memalign
     19 #include <string.h>  // memcpy
     20 #include <algorithm>
     21 #include <string>
     22 #include <vector>
     23 #include "blockhash.h"
     24 #include "checksum.h"
     25 #include "testing.h"
     26 #include "varint_bigendian.h"
     27 #include "google/vcdecoder.h"
     28 #include "vcdiff_defs.h"
     29 
     30 #ifdef HAVE_EXT_ROPE
     31 #include <ext/rope>
     32 #include "output_string_crope.h"
     33 using __gnu_cxx::crope;
     34 #endif  // HAVE_EXT_ROPE
     35 
     36 #ifdef HAVE_MALLOC_H
     37 #include <malloc.h>
     38 #endif  // HAVE_MALLOC_H
     39 
     40 #ifdef HAVE_SYS_MMAN_H
     41 #define _XOPEN_SOURCE 600  // posix_memalign
     42 #include <sys/mman.h>  // mprotect
     43 #endif  // HAVE_SYS_MMAN_H
     44 
     45 #ifdef HAVE_UNISTD_H
     46 #include <unistd.h>  // getpagesize
     47 #endif  // HAVE_UNISTD_H
     48 
     49 namespace open_vcdiff {
     50 namespace {
     51 
     52 static const size_t kFileHeaderSize = sizeof(DeltaFileHeader);
     53 
     54 // This is to check the maximum possible encoding size
     55 // if using a single ADD instruction, so assume that the
     56 // dictionary size, the length of the ADD data, the size
     57 // of the target window, and the length of the delta window
     58 // are all two-byte Varints, that is, 128 <= length < 4096.
     59 // This figure includes three extra bytes for a zero-sized
     60 // ADD instruction with a two-byte Varint explicit size.
     61 // Any additional COPY & ADD instructions must reduce
     62 // the length of the encoding from this maximum.
     63 static const size_t kWindowHeaderSize = 21;
     64 
     65 class VerifyEncodedBytesTest : public testing::Test {
     66  public:
     67   typedef std::string string;
     68 
     69   VerifyEncodedBytesTest() : delta_index_(0) { }
     70   virtual ~VerifyEncodedBytesTest() { }
     71 
     72   void ExpectByte(unsigned char b) {
     73     EXPECT_EQ(b, static_cast<unsigned char>(delta_[delta_index_]));
     74     ++delta_index_;
     75   }
     76 
     77   void ExpectString(const char* s) {
     78     const size_t size = strlen(s);  // don't include terminating NULL char
     79     EXPECT_EQ(string(s, size),
     80               string(delta_data() + delta_index_, size));
     81     delta_index_ += size;
     82   }
     83 
     84   void ExpectNoMoreBytes() {
     85     EXPECT_EQ(delta_index_, delta_size());
     86   }
     87 
     88   void ExpectSize(size_t size) {
     89     const char* delta_size_pos = &delta_[delta_index_];
     90     EXPECT_EQ(size,
     91               static_cast<size_t>(
     92                   VarintBE<int32_t>::Parse(delta_data() + delta_size(),
     93                                            &delta_size_pos)));
     94     delta_index_ = delta_size_pos - delta_data();
     95   }
     96 
     97   void ExpectChecksum(VCDChecksum checksum) {
     98     const char* delta_checksum_pos = &delta_[delta_index_];
     99     EXPECT_EQ(checksum,
    100               static_cast<VCDChecksum>(
    101                   VarintBE<int64_t>::Parse(delta_data() + delta_size(),
    102                                            &delta_checksum_pos)));
    103     delta_index_ = delta_checksum_pos - delta_data();
    104   }
    105 
    106   const string& delta_as_const() const { return delta_; }
    107   string* delta() { return &delta_; }
    108 
    109   const char* delta_data() const { return delta_as_const().data(); }
    110   size_t delta_size() const { return delta_as_const().size(); }
    111 
    112  private:
    113   string delta_;
    114   size_t delta_index_;
    115 };
    116 
    117 class VCDiffEncoderTest : public VerifyEncodedBytesTest {
    118  protected:
    119   static const char kDictionary[];
    120   static const char kTarget[];
    121 
    122   VCDiffEncoderTest();
    123   virtual ~VCDiffEncoderTest() { }
    124 
    125   void TestWithFixedChunkSize(size_t chunk_size);
    126   void TestWithEncodedChunkVector(size_t chunk_size);
    127 
    128   HashedDictionary hashed_dictionary_;
    129   VCDiffStreamingEncoder encoder_;
    130   VCDiffStreamingDecoder decoder_;
    131   VCDiffEncoder simple_encoder_;
    132   VCDiffDecoder simple_decoder_;
    133 
    134   string result_target_;
    135 };
    136 
    137 const char VCDiffEncoderTest::kDictionary[] =
    138     "\"Just the place for a Snark!\" the Bellman cried,\n"
    139     "As he landed his crew with care;\n"
    140     "Supporting each man on the top of the tide\n"
    141     "By a finger entwined in his hair.\n";
    142 
    143 const char VCDiffEncoderTest::kTarget[] =
    144     "\"Just the place for a Snark! I have said it twice:\n"
    145     "That alone should encourage the crew.\n"
    146     "Just the place for a Snark! I have said it thrice:\n"
    147     "What I tell you three times is true.\"\n";
    148 
    149 VCDiffEncoderTest::VCDiffEncoderTest()
    150     : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
    151       encoder_(&hashed_dictionary_,
    152                VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
    153                /* look_for_target_matches = */ true),
    154       simple_encoder_(kDictionary, sizeof(kDictionary)) {
    155   EXPECT_TRUE(hashed_dictionary_.Init());
    156 }
    157 
    158 TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) {
    159   EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
    160 }
    161 
    162 TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) {
    163   EXPECT_FALSE(encoder_.FinishEncoding(delta()));
    164 }
    165 
    166 TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) {
    167   HashedDictionary nothing_dictionary("", 0);
    168   EXPECT_TRUE(nothing_dictionary.Init());
    169   VCDiffStreamingEncoder nothing_encoder(&nothing_dictionary,
    170                                          VCD_STANDARD_FORMAT,
    171                                          false);
    172   EXPECT_TRUE(nothing_encoder.StartEncoding(delta()));
    173   EXPECT_TRUE(nothing_encoder.FinishEncoding(delta()));
    174   decoder_.StartDecoding("", 0);
    175   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
    176                                    delta_size(),
    177                                    &result_target_));
    178   EXPECT_TRUE(decoder_.FinishDecoding());
    179   EXPECT_TRUE(result_target_.empty());
    180 }
    181 
    182 // A NULL dictionary pointer is legal as long as the dictionary size is 0.
    183 TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) {
    184   HashedDictionary null_dictionary(NULL, 0);
    185   EXPECT_TRUE(null_dictionary.Init());
    186   VCDiffStreamingEncoder null_encoder(&null_dictionary,
    187                                       VCD_STANDARD_FORMAT,
    188                                       false);
    189   EXPECT_TRUE(null_encoder.StartEncoding(delta()));
    190   EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta()));
    191   EXPECT_TRUE(null_encoder.FinishEncoding(delta()));
    192   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    193             delta_size());
    194   decoder_.StartDecoding(NULL, 0);
    195   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
    196                                    delta_size(),
    197                                    &result_target_));
    198   EXPECT_TRUE(decoder_.FinishDecoding());
    199   EXPECT_EQ(kTarget, result_target_);
    200 }
    201 
    202 TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) {
    203   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
    204   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    205             delta_size());
    206   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    207                                      sizeof(kDictionary),
    208                                      delta_as_const(),
    209                                      &result_target_));
    210   EXPECT_EQ(kTarget, result_target_);
    211 }
    212 
    213 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) {
    214   simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED);
    215   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
    216   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    217             delta_size());
    218   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    219                                      sizeof(kDictionary),
    220                                      delta_as_const(),
    221                                      &result_target_));
    222   EXPECT_EQ(kTarget, result_target_);
    223 }
    224 
    225 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) {
    226   simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM);
    227   EXPECT_TRUE(simple_encoder_.Encode(kTarget,
    228                                      strlen(kTarget),
    229                                      delta()));
    230   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    231             delta_size());
    232   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    233                                      sizeof(kDictionary),
    234                                      delta_as_const(),
    235                                      &result_target_));
    236   EXPECT_EQ(kTarget, result_target_);
    237 }
    238 
    239 TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) {
    240   EXPECT_TRUE(encoder_.StartEncoding(delta()));
    241   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
    242   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
    243   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    244             delta_size());
    245   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
    246   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
    247                                    delta_size(),
    248                                    &result_target_));
    249   EXPECT_TRUE(decoder_.FinishDecoding());
    250   EXPECT_EQ(kTarget, result_target_);
    251 }
    252 
    253 TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) {
    254   string delta_start, delta_encode, delta_finish;
    255   EXPECT_TRUE(encoder_.StartEncoding(&delta_start));
    256   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode));
    257   EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish));
    258   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    259             delta_start.size() + delta_encode.size() + delta_finish.size());
    260   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
    261   EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(),
    262                                    delta_start.size(),
    263                                    &result_target_));
    264   EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(),
    265                                    delta_encode.size(),
    266                                    &result_target_));
    267   EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(),
    268                                    delta_finish.size(),
    269                                    &result_target_));
    270   EXPECT_TRUE(decoder_.FinishDecoding());
    271   EXPECT_EQ(kTarget, result_target_);
    272 }
    273 
    274 #ifdef HAVE_EXT_ROPE
    275 // Test that the crope class can be used in place of a string for encoding
    276 // and decoding.
    277 TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) {
    278   crope delta_crope, result_crope;
    279   EXPECT_TRUE(encoder_.StartEncoding(&delta_crope));
    280   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope));
    281   EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope));
    282   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    283             delta_crope.size());
    284   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
    285   // crope can't guarantee that its characters are contiguous, so the decoding
    286   // has to be done byte-by-byte.
    287   for (crope::const_iterator it = delta_crope.begin();
    288        it != delta_crope.end(); it++) {
    289     const char this_char = *it;
    290     EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope));
    291   }
    292   EXPECT_TRUE(decoder_.FinishDecoding());
    293   crope expected_target(kTarget);
    294   EXPECT_EQ(expected_target, result_crope);
    295 }
    296 #endif  // HAVE_EXT_ROPE
    297 
    298 void VCDiffEncoderTest::TestWithFixedChunkSize(size_t chunk_size) {
    299   delta()->clear();
    300   EXPECT_TRUE(encoder_.StartEncoding(delta()));
    301   for (size_t chunk_start_index = 0;
    302        chunk_start_index < strlen(kTarget);
    303        chunk_start_index += chunk_size) {
    304     size_t this_chunk_size = chunk_size;
    305     const size_t bytes_available = strlen(kTarget) - chunk_start_index;
    306     if (this_chunk_size > bytes_available) {
    307       this_chunk_size = bytes_available;
    308     }
    309     EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
    310                                      this_chunk_size,
    311                                      delta()));
    312   }
    313   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
    314   const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
    315   const size_t size_of_windows =
    316       strlen(kTarget) + (kWindowHeaderSize * num_windows);
    317   EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size());
    318   result_target_.clear();
    319   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
    320   for (size_t chunk_start_index = 0;
    321        chunk_start_index < delta_size();
    322        chunk_start_index += chunk_size) {
    323     size_t this_chunk_size = chunk_size;
    324     const size_t bytes_available = delta_size() - chunk_start_index;
    325     if (this_chunk_size > bytes_available) {
    326       this_chunk_size = bytes_available;
    327     }
    328     EXPECT_TRUE(decoder_.DecodeChunk(delta_data() + chunk_start_index,
    329                                      this_chunk_size,
    330                                      &result_target_));
    331   }
    332   EXPECT_TRUE(decoder_.FinishDecoding());
    333   EXPECT_EQ(kTarget, result_target_);
    334 }
    335 
    336 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) {
    337   // These specific chunk sizes have failed in the past
    338   TestWithFixedChunkSize(6);
    339   TestWithFixedChunkSize(45);
    340   TestWithFixedChunkSize(60);
    341 
    342   // Now loop through all possible chunk sizes
    343   for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
    344     TestWithFixedChunkSize(chunk_size);
    345   }
    346 }
    347 
    348 // If --allow_vcd_target=false is specified, the decoder will throw away some of
    349 // the internally-stored decoded target beyond the current window.  Try
    350 // different numbers of encoded window sizes to make sure that this behavior
    351 // does not affect the results.
    352 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizesNoVcdTarget) {
    353   decoder_.SetAllowVcdTarget(false);
    354   // Loop through all possible chunk sizes
    355   for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
    356     TestWithFixedChunkSize(chunk_size);
    357   }
    358 }
    359 
    360 // Splits the text to be encoded into fixed-size chunks.  Encodes each
    361 // chunk and puts it into a vector of strings.  Then decodes each string
    362 // in the vector and appends the result into result_target_.
    363 void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) {
    364   std::vector<string> encoded_chunks;
    365   string this_encoded_chunk;
    366   size_t total_chunk_size = 0;
    367   EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk));
    368   encoded_chunks.push_back(this_encoded_chunk);
    369   total_chunk_size += this_encoded_chunk.size();
    370   for (size_t chunk_start_index = 0;
    371        chunk_start_index < strlen(kTarget);
    372        chunk_start_index += chunk_size) {
    373     size_t this_chunk_size = chunk_size;
    374     const size_t bytes_available = strlen(kTarget) - chunk_start_index;
    375     if (this_chunk_size > bytes_available) {
    376       this_chunk_size = bytes_available;
    377     }
    378     this_encoded_chunk.clear();
    379     EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
    380                                      this_chunk_size,
    381                                      &this_encoded_chunk));
    382     encoded_chunks.push_back(this_encoded_chunk);
    383     total_chunk_size += this_encoded_chunk.size();
    384   }
    385   this_encoded_chunk.clear();
    386   EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk));
    387   encoded_chunks.push_back(this_encoded_chunk);
    388   total_chunk_size += this_encoded_chunk.size();
    389   const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
    390   const size_t size_of_windows =
    391       strlen(kTarget) + (kWindowHeaderSize * num_windows);
    392   EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size);
    393   result_target_.clear();
    394   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
    395   for (std::vector<string>::iterator it = encoded_chunks.begin();
    396        it != encoded_chunks.end(); ++it) {
    397     EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_));
    398   }
    399   EXPECT_TRUE(decoder_.FinishDecoding());
    400   EXPECT_EQ(kTarget, result_target_);
    401 }
    402 
    403 TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) {
    404   // Loop through all possible chunk sizes
    405   for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
    406     TestWithEncodedChunkVector(chunk_size);
    407   }
    408 }
    409 
    410 // Verify that HashedDictionary stores a copy of the dictionary text,
    411 // rather than just storing a pointer to it.  If the dictionary buffer
    412 // is overwritten after creating a HashedDictionary from it, it shouldn't
    413 // affect an encoder that uses that HashedDictionary.
    414 TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) {
    415   string dictionary_copy(kDictionary, sizeof(kDictionary));
    416   HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size());
    417   EXPECT_TRUE(hd_copy.Init());
    418   VCDiffStreamingEncoder copy_encoder(&hd_copy,
    419                                       VCD_FORMAT_INTERLEAVED
    420                                           | VCD_FORMAT_CHECKSUM,
    421                                       /* look_for_target_matches = */ true);
    422   // Produce a reference version of the encoded text.
    423   string delta_before;
    424   EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before));
    425   EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget,
    426                                        strlen(kTarget),
    427                                        &delta_before));
    428   EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before));
    429   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    430             delta_before.size());
    431 
    432   // Overwrite the dictionary text with all 'Q' characters.
    433   dictionary_copy.replace(0,
    434                           dictionary_copy.size(),
    435                           dictionary_copy.size(),
    436                           'Q');
    437   // When the encoder is used on the same target text after overwriting
    438   // the dictionary, it should produce the same encoded output.
    439   string delta_after;
    440   EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after));
    441   EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after));
    442   EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after));
    443   EXPECT_EQ(delta_before, delta_after);
    444 }
    445 
    446 // Binary data test part 1: The dictionary and target data should not
    447 // be treated as NULL-terminated.  An embedded NULL should be handled like
    448 // any other byte of data.
    449 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) {
    450   const char embedded_null_dictionary_text[] =
    451       { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 };
    452   const char embedded_null_target[] =
    453       { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 };
    454   CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
    455   CHECK_EQ(8, sizeof(embedded_null_target));
    456   HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
    457       sizeof(embedded_null_dictionary_text));
    458   EXPECT_TRUE(embedded_null_dictionary.Init());
    459   VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
    460       VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
    461       /* look_for_target_matches = */ true);
    462   EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
    463   EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
    464                                                 sizeof(embedded_null_target),
    465                                                 delta()));
    466   EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
    467   decoder_.StartDecoding(embedded_null_dictionary_text,
    468                          sizeof(embedded_null_dictionary_text));
    469   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
    470                                    delta_size(),
    471                                    &result_target_));
    472   EXPECT_TRUE(decoder_.FinishDecoding());
    473   EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
    474   EXPECT_EQ(string(embedded_null_target,
    475                    sizeof(embedded_null_target)),
    476             result_target_);
    477 }
    478 
    479 // Binary data test part 2: An embedded CR or LF should be handled like
    480 // any other byte of data.  No text-processing of the data should occur.
    481 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) {
    482   const char embedded_null_dictionary_text[] =
    483       { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A };
    484   const char embedded_null_target[] =
    485       { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 };
    486   CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
    487   CHECK_EQ(8, sizeof(embedded_null_target));
    488   HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
    489       sizeof(embedded_null_dictionary_text));
    490   EXPECT_TRUE(embedded_null_dictionary.Init());
    491   VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
    492       VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
    493       /* look_for_target_matches = */ true);
    494   EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
    495   EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
    496                                                 sizeof(embedded_null_target),
    497                                                 delta()));
    498   EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
    499   decoder_.StartDecoding(embedded_null_dictionary_text,
    500                          sizeof(embedded_null_dictionary_text));
    501   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
    502                                    delta_size(),
    503                                    &result_target_));
    504   EXPECT_TRUE(decoder_.FinishDecoding());
    505   EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
    506   EXPECT_EQ(string(embedded_null_target,
    507                    sizeof(embedded_null_target)),
    508             result_target_);
    509 }
    510 
    511 TEST_F(VCDiffEncoderTest, UsingWideCharacters) {
    512   const wchar_t wchar_dictionary_text[] =
    513       L"\"Just the place for a Snark!\" the Bellman cried,\n"
    514       L"As he landed his crew with care;\n"
    515       L"Supporting each man on the top of the tide\n"
    516       L"By a finger entwined in his hair.\n";
    517 
    518   const wchar_t wchar_target[] =
    519       L"\"Just the place for a Snark! I have said it twice:\n"
    520       L"That alone should encourage the crew.\n"
    521       L"Just the place for a Snark! I have said it thrice:\n"
    522       L"What I tell you three times is true.\"\n";
    523 
    524   HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text,
    525                                     sizeof(wchar_dictionary_text));
    526   EXPECT_TRUE(wchar_dictionary.Init());
    527   VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary,
    528                                        VCD_FORMAT_INTERLEAVED
    529                                            | VCD_FORMAT_CHECKSUM,
    530                                        /* look_for_target_matches = */ false);
    531   EXPECT_TRUE(wchar_encoder.StartEncoding(delta()));
    532   EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target,
    533                                         sizeof(wchar_target),
    534                                         delta()));
    535   EXPECT_TRUE(wchar_encoder.FinishEncoding(delta()));
    536   decoder_.StartDecoding((const char*) wchar_dictionary_text,
    537                          sizeof(wchar_dictionary_text));
    538   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
    539                                    delta_size(),
    540                                    &result_target_));
    541   EXPECT_TRUE(decoder_.FinishDecoding());
    542   const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data();
    543   EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar));
    544   EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar));
    545 }
    546 
    547 #if defined(HAVE_MPROTECT) && \
    548    (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN))
    549 // Bug 1220602: Make sure the encoder doesn't read past the end of the input
    550 // buffer.
    551 TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) {
    552   const size_t target_size = strlen(kTarget);
    553 
    554   // Allocate two memory pages.
    555   const int page_size = getpagesize();
    556   void* two_pages = NULL;
    557 #ifdef HAVE_POSIX_MEMALIGN
    558   posix_memalign(&two_pages, page_size, 2 * page_size);
    559 #else  // !HAVE_POSIX_MEMALIGN
    560   two_pages = memalign(page_size, 2 * page_size);
    561 #endif  // HAVE_POSIX_MEMALIGN
    562   char* const first_page = reinterpret_cast<char*>(two_pages);
    563   char* const second_page = first_page + page_size;
    564 
    565   // Place the target string at the end of the first page.
    566   char* const target_with_guard = second_page - target_size;
    567   memcpy(target_with_guard, kTarget, target_size);
    568 
    569   // Make the second page unreadable.
    570   mprotect(second_page, page_size, PROT_NONE);
    571 
    572   // Now perform the encode operation, which will cause a segmentation fault
    573   // if it reads past the end of the buffer.
    574   EXPECT_TRUE(encoder_.StartEncoding(delta()));
    575   EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
    576   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
    577 
    578   // Undo the mprotect.
    579   mprotect(second_page, page_size, PROT_READ|PROT_WRITE);
    580   free(two_pages);
    581 }
    582 
    583 TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) {
    584   const size_t target_size = strlen(kTarget);
    585 
    586   // Allocate two memory pages.
    587   const int page_size = getpagesize();
    588   void* two_pages = NULL;
    589 #ifdef HAVE_POSIX_MEMALIGN
    590   posix_memalign(&two_pages, page_size, 2 * page_size);
    591 #else  // !HAVE_POSIX_MEMALIGN
    592   two_pages = memalign(page_size, 2 * page_size);
    593 #endif  // HAVE_POSIX_MEMALIGN
    594   char* const first_page = reinterpret_cast<char*>(two_pages);
    595   char* const second_page = first_page + page_size;
    596 
    597   // Make the first page unreadable.
    598   mprotect(first_page, page_size, PROT_NONE);
    599 
    600   // Place the target string at the beginning of the second page.
    601   char* const target_with_guard = second_page;
    602   memcpy(target_with_guard, kTarget, target_size);
    603 
    604   // Now perform the encode operation, which will cause a segmentation fault
    605   // if it reads past the beginning of the buffer.
    606   EXPECT_TRUE(encoder_.StartEncoding(delta()));
    607   EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
    608   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
    609 
    610   // Undo the mprotect.
    611   mprotect(first_page, page_size, PROT_READ|PROT_WRITE);
    612   free(two_pages);
    613 }
    614 #endif  // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN)
    615 
    616 class VCDiffMatchCountTest : public VerifyEncodedBytesTest {
    617  protected:
    618   virtual ~VCDiffMatchCountTest() { }
    619 
    620   void ExpectMatch(size_t match_size) {
    621     if (match_size >= expected_match_counts_.size()) {
    622       // Be generous to avoid resizing again
    623       expected_match_counts_.resize(match_size * 2, 0);
    624     }
    625     ++expected_match_counts_[match_size];
    626   }
    627 
    628   void VerifyMatchCounts() {
    629     EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
    630                            expected_match_counts_.end(),
    631                            actual_match_counts_.begin()));
    632   }
    633 
    634   std::vector<int> expected_match_counts_;
    635   std::vector<int> actual_match_counts_;
    636 };
    637 
    638 class VCDiffHTML1Test : public VCDiffMatchCountTest {
    639  protected:
    640   static const char kDictionary[];
    641   static const char kTarget[];
    642   static const char kRedundantTarget[];
    643 
    644   VCDiffHTML1Test();
    645   virtual ~VCDiffHTML1Test() { }
    646 
    647   void SimpleEncode();
    648   void StreamingEncode();
    649 
    650   HashedDictionary hashed_dictionary_;
    651   VCDiffStreamingEncoder encoder_;
    652   VCDiffStreamingDecoder decoder_;
    653   VCDiffEncoder simple_encoder_;
    654   VCDiffDecoder simple_decoder_;
    655 
    656   string result_target_;
    657 };
    658 
    659 const char VCDiffHTML1Test::kDictionary[] =
    660     "<html><font color=red>This part from the dict</font><br>";
    661 
    662 const char VCDiffHTML1Test::kTarget[] =
    663     "<html><font color=red>This part from the dict</font><br>\n"
    664     "And this part is not...</html>";
    665 
    666 const char VCDiffHTML1Test::kRedundantTarget[] =
    667     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
    668     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
    669     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
    670     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";  // 256
    671 
    672 VCDiffHTML1Test::VCDiffHTML1Test()
    673     : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
    674       encoder_(&hashed_dictionary_,
    675                VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
    676                /* look_for_target_matches = */ true),
    677       simple_encoder_(kDictionary, sizeof(kDictionary)) {
    678   EXPECT_TRUE(hashed_dictionary_.Init());
    679 }
    680 
    681 void VCDiffHTML1Test::SimpleEncode() {
    682   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
    683   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    684             delta_size());
    685   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    686                                      sizeof(kDictionary),
    687                                      delta_as_const(),
    688                                      &result_target_));
    689   EXPECT_EQ(kTarget, result_target_);
    690 }
    691 
    692 void VCDiffHTML1Test::StreamingEncode() {
    693   EXPECT_TRUE(encoder_.StartEncoding(delta()));
    694   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
    695   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
    696 }
    697 
    698 TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) {
    699   SimpleEncode();
    700   // These values do not depend on the block size used for encoding
    701   ExpectByte(0xD6);  // 'V' | 0x80
    702   ExpectByte(0xC3);  // 'C' | 0x80
    703   ExpectByte(0xC4);  // 'D' | 0x80
    704   ExpectByte(0x00);  // Simple encoder never uses interleaved format
    705   ExpectByte(0x00);  // Hdr_Indicator
    706   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
    707   ExpectByte(sizeof(kDictionary));  // Dictionary length
    708   ExpectByte(0x00);  // Source segment position: start of dictionary
    709   if (BlockHash::kBlockSize < 16) {
    710     // A medium block size will catch the "his part " match.
    711     ExpectByte(0x22);  // Length of the delta encoding
    712     ExpectSize(strlen(kTarget));  // Size of the target window
    713     ExpectByte(0x00);  // Delta_indicator (no compression)
    714     ExpectByte(0x16);  // Length of the data section
    715     ExpectByte(0x05);  // Length of the instructions section
    716     ExpectByte(0x02);  // Length of the address section
    717     // Data section
    718     ExpectString("\nAnd t");      // Data for 1st ADD
    719     ExpectString("is not...</html>");  // Data for 2nd ADD
    720     // Instructions section
    721     ExpectByte(0x73);  // COPY size 0 mode VCD_SAME(0)
    722     ExpectByte(0x38);  // COPY size (56)
    723     ExpectByte(0x07);  // ADD size 6
    724     ExpectByte(0x19);  // COPY size 9 mode VCD_SELF
    725     ExpectByte(0x11);  // ADD size 16
    726     // Address section
    727     ExpectByte(0x00);  // COPY address (0) mode VCD_SAME(0)
    728     ExpectByte(0x17);  // COPY address (23) mode VCD_SELF
    729   } else if (BlockHash::kBlockSize <= 56) {
    730     // Any block size up to 56 will catch the matching prefix string.
    731     ExpectByte(0x29);  // Length of the delta encoding
    732     ExpectSize(strlen(kTarget));  // Size of the target window
    733     ExpectByte(0x00);  // Delta_indicator (no compression)
    734     ExpectByte(0x1F);  // Length of the data section
    735     ExpectByte(0x04);  // Length of the instructions section
    736     ExpectByte(0x01);  // Length of the address section
    737     ExpectString("\nAnd this part is not...</html>");  // Data for ADD
    738     // Instructions section
    739     ExpectByte(0x73);  // COPY size 0 mode VCD_SAME(0)
    740     ExpectByte(0x38);  // COPY size (56)
    741     ExpectByte(0x01);  // ADD size 0
    742     ExpectByte(0x1F);  // Size of ADD (31)
    743     // Address section
    744     ExpectByte(0x00);  // COPY address (0) mode VCD_SAME(0)
    745   } else {
    746     // The matching string is 56 characters long, and the block size is
    747     // 64 or greater, so no match should be found.
    748     ExpectSize(strlen(kTarget) + 7);  // Delta encoding len
    749     ExpectSize(strlen(kTarget));  // Size of the target window
    750     ExpectByte(0x00);  // Delta_indicator (no compression)
    751     ExpectSize(strlen(kTarget));  // Length of the data section
    752     ExpectByte(0x02);  // Length of the instructions section
    753     ExpectByte(0x00);  // Length of the address section
    754     // Data section
    755     ExpectString(kTarget);
    756     ExpectByte(0x01);  // ADD size 0
    757     ExpectSize(strlen(kTarget));
    758   }
    759   ExpectNoMoreBytes();
    760 }
    761 
    762 TEST_F(VCDiffHTML1Test, MatchCounts) {
    763   StreamingEncode();
    764   encoder_.GetMatchCounts(&actual_match_counts_);
    765   if (BlockHash::kBlockSize < 16) {
    766     // A medium block size will catch the "his part " match.
    767     ExpectMatch(56);
    768     ExpectMatch(9);
    769   } else if (BlockHash::kBlockSize <= 56) {
    770     // Any block size up to 56 will catch the matching prefix string.
    771     ExpectMatch(56);
    772   }
    773   VerifyMatchCounts();
    774 }
    775 
    776 TEST_F(VCDiffHTML1Test, SimpleEncoderPerformsTargetMatching) {
    777   EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
    778                                      strlen(kRedundantTarget),
    779                                      delta()));
    780   EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
    781             delta_size());
    782   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    783                                      sizeof(kDictionary),
    784                                      delta_as_const(),
    785                                      &result_target_));
    786   EXPECT_EQ(kRedundantTarget, result_target_);
    787   // These values do not depend on the block size used for encoding
    788   ExpectByte(0xD6);  // 'V' | 0x80
    789   ExpectByte(0xC3);  // 'C' | 0x80
    790   ExpectByte(0xC4);  // 'D' | 0x80
    791   ExpectByte(0x00);  // Simple encoder never uses interleaved format
    792   ExpectByte(0x00);  // Hdr_Indicator
    793   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
    794   ExpectByte(sizeof(kDictionary));  // Dictionary length
    795   ExpectByte(0x00);  // Source segment position: start of dictionary
    796   ExpectByte(0x0C);  // Length of the delta encoding
    797   ExpectSize(strlen(kRedundantTarget));  // Size of the target window
    798   ExpectByte(0x00);  // Delta_indicator (no compression)
    799   ExpectByte(0x01);  // Length of the data section
    800   ExpectByte(0x04);  // Length of the instructions section
    801   ExpectByte(0x01);  // Length of the address section
    802   // Data section
    803   ExpectString("A");      // Data for ADD
    804   // Instructions section
    805   ExpectByte(0x02);  // ADD size 1
    806   ExpectByte(0x23);  // COPY size 0 mode VCD_HERE
    807   ExpectSize(strlen(kRedundantTarget) - 1);  // COPY size 255
    808   // Address section
    809   ExpectByte(0x01);  // COPY address (1) mode VCD_HERE
    810   ExpectNoMoreBytes();
    811 }
    812 
    813 TEST_F(VCDiffHTML1Test, SimpleEncoderWithoutTargetMatching) {
    814   simple_encoder_.SetTargetMatching(false);
    815   EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
    816                                      strlen(kRedundantTarget),
    817                                      delta()));
    818   EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
    819             delta_size());
    820   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    821                                      sizeof(kDictionary),
    822                                      delta_as_const(),
    823                                      &result_target_));
    824   EXPECT_EQ(kRedundantTarget, result_target_);
    825   // These values do not depend on the block size used for encoding
    826   ExpectByte(0xD6);  // 'V' | 0x80
    827   ExpectByte(0xC3);  // 'C' | 0x80
    828   ExpectByte(0xC4);  // 'D' | 0x80
    829   ExpectByte(0x00);  // Simple encoder never uses interleaved format
    830   ExpectByte(0x00);  // Hdr_Indicator
    831   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
    832   ExpectByte(sizeof(kDictionary));  // Dictionary length
    833   ExpectByte(0x00);  // Source segment position: start of dictionary
    834   ExpectSize(strlen(kRedundantTarget) + 0x0A);  // Length of the delta encoding
    835   ExpectSize(strlen(kRedundantTarget));  // Size of the target window
    836   ExpectByte(0x00);  // Delta_indicator (no compression)
    837   ExpectSize(strlen(kRedundantTarget));  // Length of the data section
    838   ExpectByte(0x03);  // Length of the instructions section
    839   ExpectByte(0x00);  // Length of the address section
    840   // Data section
    841   ExpectString(kRedundantTarget);      // Data for ADD
    842   // Instructions section
    843   ExpectByte(0x01);  // ADD size 0
    844   ExpectSize(strlen(kRedundantTarget));  // ADD size
    845   // Address section empty
    846   ExpectNoMoreBytes();
    847 }
    848 
    849 #ifdef GTEST_HAS_DEATH_TEST
    850 typedef VCDiffHTML1Test VCDiffHTML1DeathTest;
    851 
    852 TEST_F(VCDiffHTML1DeathTest, NullMatchCounts) {
    853   EXPECT_DEBUG_DEATH(encoder_.GetMatchCounts(NULL), "GetMatchCounts");
    854 }
    855 #endif  // GTEST_HAS_DEATH_TEST
    856 
    857 class VCDiffHTML2Test : public VCDiffMatchCountTest {
    858  protected:
    859   static const char kDictionary[];
    860   static const char kTarget[];
    861 
    862   VCDiffHTML2Test();
    863   virtual ~VCDiffHTML2Test() { }
    864 
    865   void SimpleEncode();
    866   void StreamingEncode();
    867 
    868   HashedDictionary hashed_dictionary_;
    869   VCDiffStreamingEncoder encoder_;
    870   VCDiffStreamingDecoder decoder_;
    871   VCDiffEncoder simple_encoder_;
    872   VCDiffDecoder simple_decoder_;
    873 
    874   string result_target_;
    875 };
    876 
    877 const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test";
    878 
    879 const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n";
    880 
    881 VCDiffHTML2Test::VCDiffHTML2Test()
    882     : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
    883       encoder_(&hashed_dictionary_,
    884                VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
    885                /* look_for_target_matches = */ true),
    886       simple_encoder_(kDictionary, sizeof(kDictionary)) {
    887   EXPECT_TRUE(hashed_dictionary_.Init());
    888 }
    889 
    890 void VCDiffHTML2Test::SimpleEncode() {
    891   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
    892   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    893             delta_size());
    894   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    895                                      sizeof(kDictionary),
    896                                      delta_as_const(),
    897                                      &result_target_));
    898   EXPECT_EQ(kTarget, result_target_);
    899 }
    900 
    901 void VCDiffHTML2Test::StreamingEncode() {
    902   EXPECT_TRUE(encoder_.StartEncoding(delta()));
    903   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
    904   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
    905             delta_size());
    906   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
    907                                      sizeof(kDictionary),
    908                                      delta_as_const(),
    909                                      &result_target_));
    910   EXPECT_EQ(kTarget, result_target_);
    911 }
    912 
    913 TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) {
    914   SimpleEncode();
    915   // These values do not depend on the block size used for encoding
    916   ExpectByte(0xD6);  // 'V' | 0x80
    917   ExpectByte(0xC3);  // 'C' | 0x80
    918   ExpectByte(0xC4);  // 'D' | 0x80
    919   ExpectByte(0x00);  // Simple encoder never uses interleaved format
    920   ExpectByte(0x00);  // Hdr_Indicator
    921   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
    922   ExpectByte(sizeof(kDictionary));  // Dictionary length
    923   ExpectByte(0x00);  // Source segment position: start of dictionary
    924   if (BlockHash::kBlockSize <= 8) {
    925     ExpectByte(12);  // Length of the delta encoding
    926     ExpectSize(strlen(kTarget));  // Size of the target window
    927     ExpectByte(0x00);  // Delta_indicator (no compression)
    928     ExpectByte(0x04);  // Length of the data section
    929     ExpectByte(0x02);  // Length of the instructions section
    930     ExpectByte(0x01);  // Length of the address section
    931     ExpectByte('!');
    932     ExpectByte('!');
    933     ExpectByte('!');
    934     ExpectByte('\n');
    935     ExpectByte(0x1E);  // COPY size 14 mode VCD_SELF
    936     ExpectByte(0x05);  // ADD size 4
    937     ExpectByte(0x03);  // COPY address (3) mode VCD_SELF
    938   } else {
    939     // Larger block sizes will not catch any matches.
    940     ExpectSize(strlen(kTarget) + 7);  // Delta encoding len
    941     ExpectSize(strlen(kTarget));  // Size of the target window
    942     ExpectByte(0x00);  // Delta_indicator (no compression)
    943     ExpectSize(strlen(kTarget));  // Length of the data section
    944     ExpectByte(0x02);  // Length of the instructions section
    945     ExpectByte(0x00);  // Length of the address section
    946     // Data section
    947     ExpectString(kTarget);
    948     ExpectByte(0x01);  // ADD size 0
    949     ExpectSize(strlen(kTarget));
    950   }
    951   ExpectNoMoreBytes();
    952 }
    953 
    954 TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) {
    955   StreamingEncode();
    956   const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget));
    957   CHECK_EQ(5, VarintBE<int64_t>::Length(html2_checksum));
    958   // These values do not depend on the block size used for encoding
    959   ExpectByte(0xD6);  // 'V' | 0x80
    960   ExpectByte(0xC3);  // 'C' | 0x80
    961   ExpectByte(0xC4);  // 'D' | 0x80
    962   ExpectByte('S');  // Format extensions
    963   ExpectByte(0x00);  // Hdr_Indicator
    964   ExpectByte(VCD_SOURCE | VCD_CHECKSUM);  // Win_Indicator
    965   ExpectByte(sizeof(kDictionary));  // Dictionary length
    966   ExpectByte(0x00);  // Source segment position: start of dictionary
    967   if (BlockHash::kBlockSize <= 8) {
    968     ExpectByte(17);  // Length of the delta encoding
    969     ExpectSize(strlen(kTarget));  // Size of the target window
    970     ExpectByte(0x00);  // Delta_indicator (no compression)
    971     ExpectByte(0x00);  // Length of the data section
    972     ExpectByte(0x07);  // Length of the instructions section
    973     ExpectByte(0x00);  // Length of the address section
    974     ExpectChecksum(html2_checksum);
    975     ExpectByte(0x1E);  // COPY size 14 mode VCD_SELF
    976     ExpectByte(0x03);  // COPY address (3) mode VCD_SELF
    977     ExpectByte(0x05);  // ADD size 4
    978     ExpectByte('!');
    979     ExpectByte('!');
    980     ExpectByte('!');
    981     ExpectByte('\n');
    982   } else {
    983     // Larger block sizes will not catch any matches.
    984     ExpectSize(strlen(kTarget) + 12);  // Delta encoding len
    985     ExpectSize(strlen(kTarget));  // Size of the target window
    986     ExpectByte(0x00);  // Delta_indicator (no compression)
    987     ExpectByte(0x00);  // Length of the data section
    988     ExpectSize(0x02 + strlen(kTarget));  // Interleaved
    989     ExpectByte(0x00);  // Length of the address section
    990     ExpectChecksum(html2_checksum);
    991     // Data section
    992     ExpectByte(0x01);  // ADD size 0
    993     ExpectSize(strlen(kTarget));
    994     ExpectString(kTarget);
    995   }
    996   ExpectNoMoreBytes();
    997 }
    998 
    999 TEST_F(VCDiffHTML2Test, MatchCounts) {
   1000   StreamingEncode();
   1001   encoder_.GetMatchCounts(&actual_match_counts_);
   1002   if (BlockHash::kBlockSize <= 8) {
   1003     ExpectMatch(14);
   1004   }
   1005   VerifyMatchCounts();
   1006 }
   1007 
   1008 }  // anonymous namespace
   1009 }  // namespace open_vcdiff
   1010