1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <config.h> 17 #include "google/vcencoder.h" 18 #include <stdlib.h> // free, posix_memalign 19 #include <string.h> // memcpy 20 #include <algorithm> 21 #include <string> 22 #include <vector> 23 #include "blockhash.h" 24 #include "checksum.h" 25 #include "testing.h" 26 #include "varint_bigendian.h" 27 #include "google/vcdecoder.h" 28 #include "vcdiff_defs.h" 29 30 #ifdef HAVE_EXT_ROPE 31 #include <ext/rope> 32 #include "output_string_crope.h" 33 using __gnu_cxx::crope; 34 #endif // HAVE_EXT_ROPE 35 36 #ifdef HAVE_MALLOC_H 37 #include <malloc.h> 38 #endif // HAVE_MALLOC_H 39 40 #ifdef HAVE_SYS_MMAN_H 41 #if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600 42 #undef _XOPEN_SOURCE 43 #define _XOPEN_SOURCE 600 // posix_memalign 44 #endif 45 #include <sys/mman.h> // mprotect 46 #endif // HAVE_SYS_MMAN_H 47 48 #ifdef HAVE_UNISTD_H 49 #include <unistd.h> // getpagesize 50 #endif // HAVE_UNISTD_H 51 52 namespace open_vcdiff { 53 namespace { 54 55 static const size_t kFileHeaderSize = sizeof(DeltaFileHeader); 56 57 // This is to check the maximum possible encoding size 58 // if using a single ADD instruction, so assume that the 59 // dictionary size, the length of the ADD data, the size 60 // of the target window, and the length of the delta window 61 // are all two-byte Varints, that is, 128 <= length < 4096. 62 // This figure includes three extra bytes for a zero-sized 63 // ADD instruction with a two-byte Varint explicit size. 64 // Any additional COPY & ADD instructions must reduce 65 // the length of the encoding from this maximum. 66 static const size_t kWindowHeaderSize = 21; 67 68 class VerifyEncodedBytesTest : public testing::Test { 69 public: 70 typedef std::string string; 71 72 VerifyEncodedBytesTest() : delta_index_(0) { } 73 virtual ~VerifyEncodedBytesTest() { } 74 75 void ExpectByte(unsigned char b) { 76 EXPECT_EQ(b, static_cast<unsigned char>(delta_[delta_index_])); 77 ++delta_index_; 78 } 79 80 void ExpectString(const char* s) { 81 const size_t size = strlen(s); // don't include terminating NULL char 82 EXPECT_EQ(s, string(delta_data() + delta_index_, size)); 83 delta_index_ += size; 84 } 85 86 void ExpectNoMoreBytes() { 87 EXPECT_EQ(delta_index_, delta_size()); 88 } 89 90 void ExpectSize(size_t size) { 91 const char* delta_size_pos = &delta_[delta_index_]; 92 EXPECT_EQ(size, 93 static_cast<size_t>( 94 VarintBE<int32_t>::Parse(delta_data() + delta_size(), 95 &delta_size_pos))); 96 delta_index_ = delta_size_pos - delta_data(); 97 } 98 99 void ExpectChecksum(VCDChecksum checksum) { 100 const char* delta_checksum_pos = &delta_[delta_index_]; 101 EXPECT_EQ(checksum, 102 static_cast<VCDChecksum>( 103 VarintBE<int64_t>::Parse(delta_data() + delta_size(), 104 &delta_checksum_pos))); 105 delta_index_ = delta_checksum_pos - delta_data(); 106 } 107 108 const string& delta_as_const() const { return delta_; } 109 string* delta() { return &delta_; } 110 111 const char* delta_data() const { return delta_as_const().data(); } 112 size_t delta_size() const { return delta_as_const().size(); } 113 114 private: 115 string delta_; 116 size_t delta_index_; 117 }; 118 119 class VCDiffEncoderTest : public VerifyEncodedBytesTest { 120 protected: 121 static const char kDictionary[]; 122 static const char kTarget[]; 123 static const char kJSONDiff[]; 124 125 VCDiffEncoderTest(); 126 virtual ~VCDiffEncoderTest() { } 127 128 void TestWithFixedChunkSize(VCDiffStreamingEncoder *encoder, 129 VCDiffStreamingDecoder *decoder, 130 size_t chunk_size); 131 void TestWithEncodedChunkVector(size_t chunk_size); 132 133 HashedDictionary hashed_dictionary_; 134 VCDiffStreamingEncoder encoder_; 135 VCDiffStreamingDecoder decoder_; 136 VCDiffEncoder simple_encoder_; 137 VCDiffDecoder simple_decoder_; 138 VCDiffStreamingEncoder json_encoder_; 139 140 string result_target_; 141 }; 142 143 const char VCDiffEncoderTest::kDictionary[] = 144 "\"Just the place for a Snark!\" the Bellman cried,\n" 145 "As he landed his crew with care;\n" 146 "Supporting each man on the top of the tide\n" 147 "By a finger entwined in his hair.\n"; 148 149 const char VCDiffEncoderTest::kTarget[] = 150 "\"Just the place for a Snark! I have said it twice:\n" 151 "That alone should encourage the crew.\n" 152 "Just the place for a Snark! I have said it thrice:\n" 153 "What I tell you three times is true.\"\n"; 154 155 const char VCDiffEncoderTest::kJSONDiff[] = 156 "[\"\\\"Just the place for a Snark! I have said it twice:\\n" 157 "That alone should encourage the crew.\\n\"," 158 "161,44," 159 "\"hrice:\\nWhat I tell you three times is true.\\\"\\n\",]"; 160 161 VCDiffEncoderTest::VCDiffEncoderTest() 162 : hashed_dictionary_(kDictionary, sizeof(kDictionary)), 163 encoder_(&hashed_dictionary_, 164 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, 165 /* look_for_target_matches = */ true), 166 simple_encoder_(kDictionary, sizeof(kDictionary)), 167 json_encoder_(&hashed_dictionary_, 168 VCD_FORMAT_JSON, 169 /* look_for_target_matches = */ true) { 170 EXPECT_TRUE(hashed_dictionary_.Init()); 171 } 172 173 TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) { 174 EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); 175 } 176 177 TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) { 178 EXPECT_FALSE(encoder_.FinishEncoding(delta())); 179 } 180 181 TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) { 182 HashedDictionary nothing_dictionary("", 0); 183 EXPECT_TRUE(nothing_dictionary.Init()); 184 VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary, 185 VCD_STANDARD_FORMAT, 186 false); 187 EXPECT_TRUE(nothing_encoder.StartEncoding(delta())); 188 EXPECT_TRUE(nothing_encoder.FinishEncoding(delta())); 189 decoder_.StartDecoding("", 0); 190 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), 191 delta_size(), 192 &result_target_)); 193 EXPECT_TRUE(decoder_.FinishDecoding()); 194 EXPECT_TRUE(result_target_.empty()); 195 } 196 197 TEST_F(VCDiffEncoderTest, EncodeNothingJSON) { 198 HashedDictionary nothing_dictionary("", 0); 199 EXPECT_TRUE(nothing_dictionary.Init()); 200 VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary, 201 VCD_FORMAT_JSON, 202 false); 203 EXPECT_TRUE(nothing_encoder.StartEncoding(delta())); 204 EXPECT_TRUE(nothing_encoder.FinishEncoding(delta())); 205 EXPECT_EQ("", delta_as_const()); 206 } 207 208 // A NULL dictionary pointer is legal as long as the dictionary size is 0. 209 TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) { 210 HashedDictionary null_dictionary(NULL, 0); 211 EXPECT_TRUE(null_dictionary.Init()); 212 VCDiffStreamingEncoder null_encoder(&null_dictionary, 213 VCD_STANDARD_FORMAT, 214 false); 215 EXPECT_TRUE(null_encoder.StartEncoding(delta())); 216 EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta())); 217 EXPECT_TRUE(null_encoder.FinishEncoding(delta())); 218 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 219 delta_size()); 220 decoder_.StartDecoding(NULL, 0); 221 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), 222 delta_size(), 223 &result_target_)); 224 EXPECT_TRUE(decoder_.FinishDecoding()); 225 EXPECT_EQ(kTarget, result_target_); 226 } 227 228 TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) { 229 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); 230 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 231 delta_size()); 232 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 233 sizeof(kDictionary), 234 delta_as_const(), 235 &result_target_)); 236 EXPECT_EQ(kTarget, result_target_); 237 } 238 239 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) { 240 simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED); 241 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); 242 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 243 delta_size()); 244 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 245 sizeof(kDictionary), 246 delta_as_const(), 247 &result_target_)); 248 EXPECT_EQ(kTarget, result_target_); 249 } 250 251 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) { 252 simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM); 253 EXPECT_TRUE(simple_encoder_.Encode(kTarget, 254 strlen(kTarget), 255 delta())); 256 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 257 delta_size()); 258 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 259 sizeof(kDictionary), 260 delta_as_const(), 261 &result_target_)); 262 EXPECT_EQ(kTarget, result_target_); 263 } 264 265 TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) { 266 EXPECT_TRUE(encoder_.StartEncoding(delta())); 267 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); 268 EXPECT_TRUE(encoder_.FinishEncoding(delta())); 269 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 270 delta_size()); 271 decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); 272 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), 273 delta_size(), 274 &result_target_)); 275 EXPECT_TRUE(decoder_.FinishDecoding()); 276 EXPECT_EQ(kTarget, result_target_); 277 } 278 279 TEST_F(VCDiffEncoderTest, EncodeSimpleJSON) { 280 EXPECT_TRUE(json_encoder_.StartEncoding(delta())); 281 EXPECT_TRUE(json_encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); 282 EXPECT_TRUE(json_encoder_.FinishEncoding(delta())); 283 EXPECT_EQ(kJSONDiff, delta_as_const()); 284 } 285 286 TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) { 287 string delta_start, delta_encode, delta_finish; 288 EXPECT_TRUE(encoder_.StartEncoding(&delta_start)); 289 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode)); 290 EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish)); 291 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 292 delta_start.size() + delta_encode.size() + delta_finish.size()); 293 decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); 294 EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(), 295 delta_start.size(), 296 &result_target_)); 297 EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(), 298 delta_encode.size(), 299 &result_target_)); 300 EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(), 301 delta_finish.size(), 302 &result_target_)); 303 EXPECT_TRUE(decoder_.FinishDecoding()); 304 EXPECT_EQ(kTarget, result_target_); 305 } 306 307 #ifdef HAVE_EXT_ROPE 308 // Test that the crope class can be used in place of a string for encoding 309 // and decoding. 310 TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) { 311 crope delta_crope, result_crope; 312 EXPECT_TRUE(encoder_.StartEncoding(&delta_crope)); 313 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope)); 314 EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope)); 315 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 316 delta_crope.size()); 317 decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); 318 // crope can't guarantee that its characters are contiguous, so the decoding 319 // has to be done byte-by-byte. 320 for (crope::const_iterator it = delta_crope.begin(); 321 it != delta_crope.end(); it++) { 322 const char this_char = *it; 323 EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope)); 324 } 325 EXPECT_TRUE(decoder_.FinishDecoding()); 326 crope expected_target(kTarget); 327 EXPECT_EQ(expected_target, result_crope); 328 } 329 #endif // HAVE_EXT_ROPE 330 331 // Test the encoding and decoding with a fixed chunk size. 332 // If decoder is null, only test the encoding. 333 void VCDiffEncoderTest::TestWithFixedChunkSize(VCDiffStreamingEncoder *encoder, 334 VCDiffStreamingDecoder *decoder, 335 size_t chunk_size) { 336 delta()->clear(); 337 EXPECT_TRUE(encoder->StartEncoding(delta())); 338 for (size_t chunk_start_index = 0; 339 chunk_start_index < strlen(kTarget); 340 chunk_start_index += chunk_size) { 341 size_t this_chunk_size = chunk_size; 342 const size_t bytes_available = strlen(kTarget) - chunk_start_index; 343 if (this_chunk_size > bytes_available) { 344 this_chunk_size = bytes_available; 345 } 346 EXPECT_TRUE(encoder->EncodeChunk(&kTarget[chunk_start_index], 347 this_chunk_size, 348 delta())); 349 } 350 EXPECT_TRUE(encoder->FinishEncoding(delta())); 351 const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; 352 const size_t size_of_windows = 353 strlen(kTarget) + (kWindowHeaderSize * num_windows); 354 EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size()); 355 result_target_.clear(); 356 357 if (!decoder) return; 358 359 decoder->StartDecoding(kDictionary, sizeof(kDictionary)); 360 for (size_t chunk_start_index = 0; 361 chunk_start_index < delta_size(); 362 chunk_start_index += chunk_size) { 363 size_t this_chunk_size = chunk_size; 364 const size_t bytes_available = delta_size() - chunk_start_index; 365 if (this_chunk_size > bytes_available) { 366 this_chunk_size = bytes_available; 367 } 368 EXPECT_TRUE(decoder->DecodeChunk(delta_data() + chunk_start_index, 369 this_chunk_size, 370 &result_target_)); 371 } 372 EXPECT_TRUE(decoder->FinishDecoding()); 373 EXPECT_EQ(kTarget, result_target_); 374 } 375 376 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) { 377 // These specific chunk sizes have failed in the past 378 TestWithFixedChunkSize(&encoder_, &decoder_, 6); 379 TestWithFixedChunkSize(&encoder_, &decoder_, 45); 380 TestWithFixedChunkSize(&encoder_, &decoder_, 60); 381 382 // Now loop through all possible chunk sizes 383 for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { 384 TestWithFixedChunkSize(&encoder_, &decoder_, chunk_size); 385 } 386 } 387 388 TEST_F(VCDiffEncoderTest, EncodeFixedChunkSizesJSON) { 389 // There is no JSON decoder; these diffs are created by hand. 390 TestWithFixedChunkSize(&json_encoder_, NULL, 6); 391 EXPECT_EQ("[\"\\\"Just \",\"the pl\",\"ace fo\",\"r a Sn\",\"ark! I\"," 392 "\" have \",\"said i\",\"t twic\",\"e:\\nTha\",\"t alon\"," 393 "\"e shou\",\"ld enc\",\"ourage\",\" the c\",\"rew.\\nJ\"," 394 "\"ust th\",\"e plac\",\"e for \",\"a Snar\",\"k! I h\"," 395 "\"ave sa\",\"id it \",\"thrice\",\":\\nWhat\",\" I tel\"," 396 "\"l you \",\"three \",\"times \",\"is tru\",\"e.\\\"\\n\",]", 397 delta_as_const()); 398 TestWithFixedChunkSize(&json_encoder_, NULL, 45); 399 EXPECT_EQ("[\"\\\"Just the place for a Snark! I have said it t\"," 400 "\"wice:\\nThat alone should encourage the crew.\\nJ\"," 401 "\"ust the place for a Snark! I have said it thr\",\"ice:\\n" 402 "What I tell you three times is true.\\\"\\n\",]", 403 delta_as_const()); 404 TestWithFixedChunkSize(&json_encoder_, NULL, 60); 405 EXPECT_EQ("[\"\\\"Just the place for a Snark! I have said it twice:\\n" 406 "That alon\",\"e should encourage the crew.\\n" 407 "Just the place for a Snark! I h\",\"ave said it thrice:\\n" 408 "What I tell you three times is true.\\\"\\n\",]", 409 delta_as_const()); 410 } 411 412 413 // If --allow_vcd_target=false is specified, the decoder will throw away some of 414 // the internally-stored decoded target beyond the current window. Try 415 // different numbers of encoded window sizes to make sure that this behavior 416 // does not affect the results. 417 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizesNoVcdTarget) { 418 decoder_.SetAllowVcdTarget(false); 419 // Loop through all possible chunk sizes 420 for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { 421 TestWithFixedChunkSize(&encoder_, &decoder_, chunk_size); 422 } 423 } 424 425 // Splits the text to be encoded into fixed-size chunks. Encodes each 426 // chunk and puts it into a vector of strings. Then decodes each string 427 // in the vector and appends the result into result_target_. 428 void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) { 429 std::vector<string> encoded_chunks; 430 string this_encoded_chunk; 431 size_t total_chunk_size = 0; 432 EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk)); 433 encoded_chunks.push_back(this_encoded_chunk); 434 total_chunk_size += this_encoded_chunk.size(); 435 for (size_t chunk_start_index = 0; 436 chunk_start_index < strlen(kTarget); 437 chunk_start_index += chunk_size) { 438 size_t this_chunk_size = chunk_size; 439 const size_t bytes_available = strlen(kTarget) - chunk_start_index; 440 if (this_chunk_size > bytes_available) { 441 this_chunk_size = bytes_available; 442 } 443 this_encoded_chunk.clear(); 444 EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index], 445 this_chunk_size, 446 &this_encoded_chunk)); 447 encoded_chunks.push_back(this_encoded_chunk); 448 total_chunk_size += this_encoded_chunk.size(); 449 } 450 this_encoded_chunk.clear(); 451 EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk)); 452 encoded_chunks.push_back(this_encoded_chunk); 453 total_chunk_size += this_encoded_chunk.size(); 454 const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; 455 const size_t size_of_windows = 456 strlen(kTarget) + (kWindowHeaderSize * num_windows); 457 EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size); 458 result_target_.clear(); 459 decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); 460 for (std::vector<string>::iterator it = encoded_chunks.begin(); 461 it != encoded_chunks.end(); ++it) { 462 EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_)); 463 } 464 EXPECT_TRUE(decoder_.FinishDecoding()); 465 EXPECT_EQ(kTarget, result_target_); 466 } 467 468 TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) { 469 // Loop through all possible chunk sizes 470 for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { 471 TestWithEncodedChunkVector(chunk_size); 472 } 473 } 474 475 // Verify that HashedDictionary stores a copy of the dictionary text, 476 // rather than just storing a pointer to it. If the dictionary buffer 477 // is overwritten after creating a HashedDictionary from it, it shouldn't 478 // affect an encoder that uses that HashedDictionary. 479 TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) { 480 string dictionary_copy(kDictionary, sizeof(kDictionary)); 481 HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size()); 482 EXPECT_TRUE(hd_copy.Init()); 483 VCDiffStreamingEncoder copy_encoder(&hd_copy, 484 VCD_FORMAT_INTERLEAVED 485 | VCD_FORMAT_CHECKSUM, 486 /* look_for_target_matches = */ true); 487 // Produce a reference version of the encoded text. 488 string delta_before; 489 EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before)); 490 EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, 491 strlen(kTarget), 492 &delta_before)); 493 EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before)); 494 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 495 delta_before.size()); 496 497 // Overwrite the dictionary text with all 'Q' characters. 498 dictionary_copy.replace(0, 499 dictionary_copy.size(), 500 dictionary_copy.size(), 501 'Q'); 502 // When the encoder is used on the same target text after overwriting 503 // the dictionary, it should produce the same encoded output. 504 string delta_after; 505 EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after)); 506 EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after)); 507 EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after)); 508 EXPECT_EQ(delta_before, delta_after); 509 } 510 511 // Binary data test part 1: The dictionary and target data should not 512 // be treated as NULL-terminated. An embedded NULL should be handled like 513 // any other byte of data. 514 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) { 515 const char embedded_null_dictionary_text[] = 516 { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 }; 517 const char embedded_null_target[] = 518 { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 }; 519 CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); 520 CHECK_EQ(8, sizeof(embedded_null_target)); 521 HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, 522 sizeof(embedded_null_dictionary_text)); 523 EXPECT_TRUE(embedded_null_dictionary.Init()); 524 VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, 525 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, 526 /* look_for_target_matches = */ true); 527 EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); 528 EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, 529 sizeof(embedded_null_target), 530 delta())); 531 EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); 532 decoder_.StartDecoding(embedded_null_dictionary_text, 533 sizeof(embedded_null_dictionary_text)); 534 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), 535 delta_size(), 536 &result_target_)); 537 EXPECT_TRUE(decoder_.FinishDecoding()); 538 EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); 539 EXPECT_EQ(string(embedded_null_target, 540 sizeof(embedded_null_target)), 541 result_target_); 542 } 543 544 // Binary data test part 2: An embedded CR or LF should be handled like 545 // any other byte of data. No text-processing of the data should occur. 546 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) { 547 const char embedded_null_dictionary_text[] = 548 { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A }; 549 const char embedded_null_target[] = 550 { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 }; 551 CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); 552 CHECK_EQ(8, sizeof(embedded_null_target)); 553 HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, 554 sizeof(embedded_null_dictionary_text)); 555 EXPECT_TRUE(embedded_null_dictionary.Init()); 556 VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, 557 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, 558 /* look_for_target_matches = */ true); 559 EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); 560 EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, 561 sizeof(embedded_null_target), 562 delta())); 563 EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); 564 decoder_.StartDecoding(embedded_null_dictionary_text, 565 sizeof(embedded_null_dictionary_text)); 566 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), 567 delta_size(), 568 &result_target_)); 569 EXPECT_TRUE(decoder_.FinishDecoding()); 570 EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); 571 EXPECT_EQ(string(embedded_null_target, 572 sizeof(embedded_null_target)), 573 result_target_); 574 } 575 576 TEST_F(VCDiffEncoderTest, UsingWideCharacters) { 577 const wchar_t wchar_dictionary_text[] = 578 L"\"Just the place for a Snark!\" the Bellman cried,\n" 579 L"As he landed his crew with care;\n" 580 L"Supporting each man on the top of the tide\n" 581 L"By a finger entwined in his hair.\n"; 582 583 const wchar_t wchar_target[] = 584 L"\"Just the place for a Snark! I have said it twice:\n" 585 L"That alone should encourage the crew.\n" 586 L"Just the place for a Snark! I have said it thrice:\n" 587 L"What I tell you three times is true.\"\n"; 588 589 HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text, 590 sizeof(wchar_dictionary_text)); 591 EXPECT_TRUE(wchar_dictionary.Init()); 592 VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary, 593 VCD_FORMAT_INTERLEAVED 594 | VCD_FORMAT_CHECKSUM, 595 /* look_for_target_matches = */ false); 596 EXPECT_TRUE(wchar_encoder.StartEncoding(delta())); 597 EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target, 598 sizeof(wchar_target), 599 delta())); 600 EXPECT_TRUE(wchar_encoder.FinishEncoding(delta())); 601 decoder_.StartDecoding((const char*) wchar_dictionary_text, 602 sizeof(wchar_dictionary_text)); 603 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), 604 delta_size(), 605 &result_target_)); 606 EXPECT_TRUE(decoder_.FinishDecoding()); 607 const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data(); 608 EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar)); 609 EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar)); 610 } 611 612 #if defined(HAVE_MPROTECT) && \ 613 (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)) 614 // Bug 1220602: Make sure the encoder doesn't read past the end of the input 615 // buffer. 616 TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) { 617 const size_t target_size = strlen(kTarget); 618 619 // Allocate two memory pages. 620 const int page_size = getpagesize(); 621 void* two_pages = NULL; 622 #ifdef HAVE_POSIX_MEMALIGN 623 posix_memalign(&two_pages, page_size, 2 * page_size); 624 #else // !HAVE_POSIX_MEMALIGN 625 two_pages = memalign(page_size, 2 * page_size); 626 #endif // HAVE_POSIX_MEMALIGN 627 char* const first_page = reinterpret_cast<char*>(two_pages); 628 char* const second_page = first_page + page_size; 629 630 // Place the target string at the end of the first page. 631 char* const target_with_guard = second_page - target_size; 632 memcpy(target_with_guard, kTarget, target_size); 633 634 // Make the second page unreadable. 635 mprotect(second_page, page_size, PROT_NONE); 636 637 // Now perform the encode operation, which will cause a segmentation fault 638 // if it reads past the end of the buffer. 639 EXPECT_TRUE(encoder_.StartEncoding(delta())); 640 EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); 641 EXPECT_TRUE(encoder_.FinishEncoding(delta())); 642 643 // Undo the mprotect. 644 mprotect(second_page, page_size, PROT_READ|PROT_WRITE); 645 free(two_pages); 646 } 647 648 TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) { 649 const size_t target_size = strlen(kTarget); 650 651 // Allocate two memory pages. 652 const int page_size = getpagesize(); 653 void* two_pages = NULL; 654 #ifdef HAVE_POSIX_MEMALIGN 655 posix_memalign(&two_pages, page_size, 2 * page_size); 656 #else // !HAVE_POSIX_MEMALIGN 657 two_pages = memalign(page_size, 2 * page_size); 658 #endif // HAVE_POSIX_MEMALIGN 659 char* const first_page = reinterpret_cast<char*>(two_pages); 660 char* const second_page = first_page + page_size; 661 662 // Make the first page unreadable. 663 mprotect(first_page, page_size, PROT_NONE); 664 665 // Place the target string at the beginning of the second page. 666 char* const target_with_guard = second_page; 667 memcpy(target_with_guard, kTarget, target_size); 668 669 // Now perform the encode operation, which will cause a segmentation fault 670 // if it reads past the beginning of the buffer. 671 EXPECT_TRUE(encoder_.StartEncoding(delta())); 672 EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); 673 EXPECT_TRUE(encoder_.FinishEncoding(delta())); 674 675 // Undo the mprotect. 676 mprotect(first_page, page_size, PROT_READ|PROT_WRITE); 677 free(two_pages); 678 } 679 #endif // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN) 680 681 class VCDiffHTML1Test : public VerifyEncodedBytesTest { 682 protected: 683 static const char kDictionary[]; 684 static const char kTarget[]; 685 static const char kRedundantTarget[]; 686 687 VCDiffHTML1Test(); 688 virtual ~VCDiffHTML1Test() { } 689 690 void SimpleEncode(); 691 void StreamingEncode(); 692 693 HashedDictionary hashed_dictionary_; 694 VCDiffStreamingEncoder encoder_; 695 VCDiffStreamingDecoder decoder_; 696 VCDiffEncoder simple_encoder_; 697 VCDiffDecoder simple_decoder_; 698 699 string result_target_; 700 }; 701 702 const char VCDiffHTML1Test::kDictionary[] = 703 "<html><font color=red>This part from the dict</font><br>"; 704 705 const char VCDiffHTML1Test::kTarget[] = 706 "<html><font color=red>This part from the dict</font><br>\n" 707 "And this part is not...</html>"; 708 709 const char VCDiffHTML1Test::kRedundantTarget[] = 710 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" 711 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" 712 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" 713 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; // 256 714 715 VCDiffHTML1Test::VCDiffHTML1Test() 716 : hashed_dictionary_(kDictionary, sizeof(kDictionary)), 717 encoder_(&hashed_dictionary_, 718 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, 719 /* look_for_target_matches = */ true), 720 simple_encoder_(kDictionary, sizeof(kDictionary)) { 721 EXPECT_TRUE(hashed_dictionary_.Init()); 722 } 723 724 void VCDiffHTML1Test::SimpleEncode() { 725 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); 726 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 727 delta_size()); 728 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 729 sizeof(kDictionary), 730 delta_as_const(), 731 &result_target_)); 732 EXPECT_EQ(kTarget, result_target_); 733 } 734 735 void VCDiffHTML1Test::StreamingEncode() { 736 EXPECT_TRUE(encoder_.StartEncoding(delta())); 737 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); 738 EXPECT_TRUE(encoder_.FinishEncoding(delta())); 739 } 740 741 TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) { 742 SimpleEncode(); 743 // These values do not depend on the block size used for encoding 744 ExpectByte(0xD6); // 'V' | 0x80 745 ExpectByte(0xC3); // 'C' | 0x80 746 ExpectByte(0xC4); // 'D' | 0x80 747 ExpectByte(0x00); // Simple encoder never uses interleaved format 748 ExpectByte(0x00); // Hdr_Indicator 749 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) 750 ExpectByte(sizeof(kDictionary)); // Dictionary length 751 ExpectByte(0x00); // Source segment position: start of dictionary 752 if (BlockHash::kBlockSize < 16) { 753 // A medium block size will catch the "his part " match. 754 ExpectByte(0x22); // Length of the delta encoding 755 ExpectSize(strlen(kTarget)); // Size of the target window 756 ExpectByte(0x00); // Delta_indicator (no compression) 757 ExpectByte(0x16); // Length of the data section 758 ExpectByte(0x05); // Length of the instructions section 759 ExpectByte(0x02); // Length of the address section 760 // Data section 761 ExpectString("\nAnd t"); // Data for 1st ADD 762 ExpectString("is not...</html>"); // Data for 2nd ADD 763 // Instructions section 764 ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) 765 ExpectByte(0x38); // COPY size (56) 766 ExpectByte(0x07); // ADD size 6 767 ExpectByte(0x19); // COPY size 9 mode VCD_SELF 768 ExpectByte(0x11); // ADD size 16 769 // Address section 770 ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) 771 ExpectByte(0x17); // COPY address (23) mode VCD_SELF 772 } else if (BlockHash::kBlockSize <= 56) { 773 // Any block size up to 56 will catch the matching prefix string. 774 ExpectByte(0x29); // Length of the delta encoding 775 ExpectSize(strlen(kTarget)); // Size of the target window 776 ExpectByte(0x00); // Delta_indicator (no compression) 777 ExpectByte(0x1F); // Length of the data section 778 ExpectByte(0x04); // Length of the instructions section 779 ExpectByte(0x01); // Length of the address section 780 ExpectString("\nAnd this part is not...</html>"); // Data for ADD 781 // Instructions section 782 ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) 783 ExpectByte(0x38); // COPY size (56) 784 ExpectByte(0x01); // ADD size 0 785 ExpectByte(0x1F); // Size of ADD (31) 786 // Address section 787 ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) 788 } else { 789 // The matching string is 56 characters long, and the block size is 790 // 64 or greater, so no match should be found. 791 ExpectSize(strlen(kTarget) + 7); // Delta encoding len 792 ExpectSize(strlen(kTarget)); // Size of the target window 793 ExpectByte(0x00); // Delta_indicator (no compression) 794 ExpectSize(strlen(kTarget)); // Length of the data section 795 ExpectByte(0x02); // Length of the instructions section 796 ExpectByte(0x00); // Length of the address section 797 // Data section 798 ExpectString(kTarget); 799 ExpectByte(0x01); // ADD size 0 800 ExpectSize(strlen(kTarget)); 801 } 802 ExpectNoMoreBytes(); 803 } 804 805 TEST_F(VCDiffHTML1Test, SimpleEncoderPerformsTargetMatching) { 806 EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget, 807 strlen(kRedundantTarget), 808 delta())); 809 EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize, 810 delta_size()); 811 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 812 sizeof(kDictionary), 813 delta_as_const(), 814 &result_target_)); 815 EXPECT_EQ(kRedundantTarget, result_target_); 816 // These values do not depend on the block size used for encoding 817 ExpectByte(0xD6); // 'V' | 0x80 818 ExpectByte(0xC3); // 'C' | 0x80 819 ExpectByte(0xC4); // 'D' | 0x80 820 ExpectByte(0x00); // Simple encoder never uses interleaved format 821 ExpectByte(0x00); // Hdr_Indicator 822 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) 823 ExpectByte(sizeof(kDictionary)); // Dictionary length 824 ExpectByte(0x00); // Source segment position: start of dictionary 825 ExpectByte(0x0C); // Length of the delta encoding 826 ExpectSize(strlen(kRedundantTarget)); // Size of the target window 827 ExpectByte(0x00); // Delta_indicator (no compression) 828 ExpectByte(0x01); // Length of the data section 829 ExpectByte(0x04); // Length of the instructions section 830 ExpectByte(0x01); // Length of the address section 831 // Data section 832 ExpectString("A"); // Data for ADD 833 // Instructions section 834 ExpectByte(0x02); // ADD size 1 835 ExpectByte(0x23); // COPY size 0 mode VCD_HERE 836 ExpectSize(strlen(kRedundantTarget) - 1); // COPY size 255 837 // Address section 838 ExpectByte(0x01); // COPY address (1) mode VCD_HERE 839 ExpectNoMoreBytes(); 840 } 841 842 TEST_F(VCDiffHTML1Test, SimpleEncoderWithoutTargetMatching) { 843 simple_encoder_.SetTargetMatching(false); 844 EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget, 845 strlen(kRedundantTarget), 846 delta())); 847 EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize, 848 delta_size()); 849 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 850 sizeof(kDictionary), 851 delta_as_const(), 852 &result_target_)); 853 EXPECT_EQ(kRedundantTarget, result_target_); 854 // These values do not depend on the block size used for encoding 855 ExpectByte(0xD6); // 'V' | 0x80 856 ExpectByte(0xC3); // 'C' | 0x80 857 ExpectByte(0xC4); // 'D' | 0x80 858 ExpectByte(0x00); // Simple encoder never uses interleaved format 859 ExpectByte(0x00); // Hdr_Indicator 860 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) 861 ExpectByte(sizeof(kDictionary)); // Dictionary length 862 ExpectByte(0x00); // Source segment position: start of dictionary 863 ExpectSize(strlen(kRedundantTarget) + 0x0A); // Length of the delta encoding 864 ExpectSize(strlen(kRedundantTarget)); // Size of the target window 865 ExpectByte(0x00); // Delta_indicator (no compression) 866 ExpectSize(strlen(kRedundantTarget)); // Length of the data section 867 ExpectByte(0x03); // Length of the instructions section 868 ExpectByte(0x00); // Length of the address section 869 // Data section 870 ExpectString(kRedundantTarget); // Data for ADD 871 // Instructions section 872 ExpectByte(0x01); // ADD size 0 873 ExpectSize(strlen(kRedundantTarget)); // ADD size 874 // Address section empty 875 ExpectNoMoreBytes(); 876 } 877 878 class VCDiffHTML2Test : public VerifyEncodedBytesTest { 879 protected: 880 static const char kDictionary[]; 881 static const char kTarget[]; 882 883 VCDiffHTML2Test(); 884 virtual ~VCDiffHTML2Test() { } 885 886 void SimpleEncode(); 887 void StreamingEncode(); 888 889 HashedDictionary hashed_dictionary_; 890 VCDiffStreamingEncoder encoder_; 891 VCDiffStreamingDecoder decoder_; 892 VCDiffEncoder simple_encoder_; 893 VCDiffDecoder simple_decoder_; 894 895 string result_target_; 896 }; 897 898 const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test"; 899 900 const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n"; 901 902 VCDiffHTML2Test::VCDiffHTML2Test() 903 : hashed_dictionary_(kDictionary, sizeof(kDictionary)), 904 encoder_(&hashed_dictionary_, 905 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, 906 /* look_for_target_matches = */ true), 907 simple_encoder_(kDictionary, sizeof(kDictionary)) { 908 EXPECT_TRUE(hashed_dictionary_.Init()); 909 } 910 911 void VCDiffHTML2Test::SimpleEncode() { 912 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); 913 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 914 delta_size()); 915 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 916 sizeof(kDictionary), 917 delta_as_const(), 918 &result_target_)); 919 EXPECT_EQ(kTarget, result_target_); 920 } 921 922 void VCDiffHTML2Test::StreamingEncode() { 923 EXPECT_TRUE(encoder_.StartEncoding(delta())); 924 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); 925 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, 926 delta_size()); 927 EXPECT_TRUE(simple_decoder_.Decode(kDictionary, 928 sizeof(kDictionary), 929 delta_as_const(), 930 &result_target_)); 931 EXPECT_EQ(kTarget, result_target_); 932 } 933 934 TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) { 935 SimpleEncode(); 936 // These values do not depend on the block size used for encoding 937 ExpectByte(0xD6); // 'V' | 0x80 938 ExpectByte(0xC3); // 'C' | 0x80 939 ExpectByte(0xC4); // 'D' | 0x80 940 ExpectByte(0x00); // Simple encoder never uses interleaved format 941 ExpectByte(0x00); // Hdr_Indicator 942 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) 943 ExpectByte(sizeof(kDictionary)); // Dictionary length 944 ExpectByte(0x00); // Source segment position: start of dictionary 945 if (BlockHash::kBlockSize <= 8) { 946 ExpectByte(12); // Length of the delta encoding 947 ExpectSize(strlen(kTarget)); // Size of the target window 948 ExpectByte(0x00); // Delta_indicator (no compression) 949 ExpectByte(0x04); // Length of the data section 950 ExpectByte(0x02); // Length of the instructions section 951 ExpectByte(0x01); // Length of the address section 952 ExpectByte('!'); 953 ExpectByte('!'); 954 ExpectByte('!'); 955 ExpectByte('\n'); 956 ExpectByte(0x1E); // COPY size 14 mode VCD_SELF 957 ExpectByte(0x05); // ADD size 4 958 ExpectByte(0x03); // COPY address (3) mode VCD_SELF 959 } else { 960 // Larger block sizes will not catch any matches. 961 ExpectSize(strlen(kTarget) + 7); // Delta encoding len 962 ExpectSize(strlen(kTarget)); // Size of the target window 963 ExpectByte(0x00); // Delta_indicator (no compression) 964 ExpectSize(strlen(kTarget)); // Length of the data section 965 ExpectByte(0x02); // Length of the instructions section 966 ExpectByte(0x00); // Length of the address section 967 // Data section 968 ExpectString(kTarget); 969 ExpectByte(0x01); // ADD size 0 970 ExpectSize(strlen(kTarget)); 971 } 972 ExpectNoMoreBytes(); 973 } 974 975 TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) { 976 StreamingEncode(); 977 const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget)); 978 CHECK_EQ(5, VarintBE<int64_t>::Length(html2_checksum)); 979 // These values do not depend on the block size used for encoding 980 ExpectByte(0xD6); // 'V' | 0x80 981 ExpectByte(0xC3); // 'C' | 0x80 982 ExpectByte(0xC4); // 'D' | 0x80 983 ExpectByte('S'); // Format extensions 984 ExpectByte(0x00); // Hdr_Indicator 985 ExpectByte(VCD_SOURCE | VCD_CHECKSUM); // Win_Indicator 986 ExpectByte(sizeof(kDictionary)); // Dictionary length 987 ExpectByte(0x00); // Source segment position: start of dictionary 988 if (BlockHash::kBlockSize <= 8) { 989 ExpectByte(17); // Length of the delta encoding 990 ExpectSize(strlen(kTarget)); // Size of the target window 991 ExpectByte(0x00); // Delta_indicator (no compression) 992 ExpectByte(0x00); // Length of the data section 993 ExpectByte(0x07); // Length of the instructions section 994 ExpectByte(0x00); // Length of the address section 995 ExpectChecksum(html2_checksum); 996 ExpectByte(0x1E); // COPY size 14 mode VCD_SELF 997 ExpectByte(0x03); // COPY address (3) mode VCD_SELF 998 ExpectByte(0x05); // ADD size 4 999 ExpectByte('!'); 1000 ExpectByte('!'); 1001 ExpectByte('!'); 1002 ExpectByte('\n'); 1003 } else { 1004 // Larger block sizes will not catch any matches. 1005 ExpectSize(strlen(kTarget) + 12); // Delta encoding len 1006 ExpectSize(strlen(kTarget)); // Size of the target window 1007 ExpectByte(0x00); // Delta_indicator (no compression) 1008 ExpectByte(0x00); // Length of the data section 1009 ExpectSize(0x02 + strlen(kTarget)); // Interleaved 1010 ExpectByte(0x00); // Length of the address section 1011 ExpectChecksum(html2_checksum); 1012 // Data section 1013 ExpectByte(0x01); // ADD size 0 1014 ExpectSize(strlen(kTarget)); 1015 ExpectString(kTarget); 1016 } 1017 ExpectNoMoreBytes(); 1018 } 1019 1020 } // anonymous namespace 1021 } // namespace open_vcdiff 1022