1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <config.h> 17 #include "vcdiffengine.h" 18 #include <string.h> // memset, strlen 19 #include <algorithm> 20 #include <string> 21 #include <vector> 22 #include "addrcache.h" 23 #include "blockhash.h" 24 #include "encodetable.h" 25 #include "google/output_string.h" 26 #include "rolling_hash.h" 27 #include "testing.h" 28 #include "varint_bigendian.h" 29 #include "vcdiff_defs.h" 30 31 namespace open_vcdiff { 32 33 namespace { 34 35 class VCDiffEngineTestBase : public testing::Test { 36 protected: 37 typedef std::string string; 38 39 // Some common definitions and helper functions used in the various tests 40 // for VCDiffEngine. 41 static const int kBlockSize = VCDiffEngine::kMinimumMatchSize; 42 43 VCDiffEngineTestBase() : interleaved_(false), 44 diff_output_string_(&diff_), 45 verify_position_(0), 46 saved_total_size_position_(0), 47 saved_delta_encoding_position_(0), 48 saved_section_sizes_position_(0), 49 data_bytes_(0), 50 instruction_bytes_(0), 51 address_bytes_(0) { } 52 53 virtual ~VCDiffEngineTestBase() { } 54 55 virtual void TearDown() { 56 VerifyMatchCounts(); 57 } 58 59 // Copy string_without_spaces into newly allocated result buffer, 60 // but pad its contents with space characters so that every character 61 // in string_without_spaces corresponds to (block_size - 1) 62 // spaces in the result, followed by that character. 63 // For example: 64 // If string_without_spaces begins "The only thing"... and block_size is 4, 65 // then 3 space characters will be inserted 66 // between each letter in the result, as follows: 67 // " T h e o n l y t h i n g"... 68 // This makes testing simpler, because finding a block_size-byte match 69 // between the dictionary and target only depends on the 70 // trailing letter in each block. 71 // If no_initial_padding is true, then the first letter will not have 72 // spaces added in front of it. 73 static void MakeEachLetterABlock(const char* string_without_spaces, 74 const char** result, 75 int block_size, 76 bool no_initial_padding) { 77 const size_t length_without_spaces = strlen(string_without_spaces); 78 char* padded_text = new char[(block_size * length_without_spaces) + 1]; 79 memset(padded_text, ' ', block_size * length_without_spaces); 80 char* padded_text_ptr = padded_text; 81 if (!no_initial_padding) { 82 padded_text_ptr += block_size - 1; 83 } 84 for (size_t i = 0; i < length_without_spaces; ++i) { 85 *padded_text_ptr = string_without_spaces[i]; 86 padded_text_ptr += block_size; 87 } 88 *(padded_text_ptr - block_size + 1) = '\0'; 89 *result = padded_text; 90 } 91 92 // These functions iterate through the decoded output and expect 93 // simple elements: bytes or variable-length integers. 94 void ExpectByte(char byte) { 95 EXPECT_GT(diff_.size(), verify_position_); 96 EXPECT_EQ(byte, diff_[verify_position_]); 97 ++verify_position_; 98 } 99 100 size_t ExpectVarint(int32_t expected_value) { 101 EXPECT_GT(diff_.size(), verify_position_); 102 const char* const original_position = &diff_[verify_position_]; 103 const char* new_position = original_position; 104 const size_t expected_length = VarintBE<int32_t>::Length(expected_value); 105 int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), 106 &new_position); 107 EXPECT_LE(0, parsed_value); 108 size_t parsed_length = new_position - original_position; 109 EXPECT_EQ(expected_value, parsed_value); 110 EXPECT_EQ(expected_length, parsed_length); 111 verify_position_ += parsed_length; 112 return parsed_length; 113 } 114 115 size_t ExpectSize(size_t size) { 116 return ExpectVarint(static_cast<int32_t>(size)); 117 } 118 119 size_t ExpectStringLength(const char* s) { 120 return ExpectSize(strlen(s)); 121 } 122 123 void SkipVarint() { 124 EXPECT_GT(diff_.size(), verify_position_); 125 const char* const original_position = &diff_[verify_position_]; 126 const char* new_position = original_position; 127 VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position); 128 size_t parsed_length = new_position - original_position; 129 verify_position_ += parsed_length; 130 } 131 132 void ExpectDataByte(char byte) { 133 ExpectByte(byte); 134 if (interleaved_) { 135 ++instruction_bytes_; 136 } else { 137 ++data_bytes_; 138 } 139 } 140 141 void ExpectDataString(const char *expected_string) { 142 const char* ptr = expected_string; 143 while (*ptr) { 144 ExpectDataByte(*ptr); 145 ++ptr; 146 } 147 } 148 149 void ExpectDataStringWithBlockSpacing(const char *expected_string, 150 bool trailing_spaces) { 151 const char* ptr = expected_string; 152 while (*ptr) { 153 for (int i = 0; i < (kBlockSize - 1); ++i) { 154 ExpectDataByte(' '); 155 } 156 ExpectDataByte(*ptr); 157 ++ptr; 158 } 159 if (trailing_spaces) { 160 for (int i = 0; i < (kBlockSize - 1); ++i) { 161 ExpectDataByte(' '); 162 } 163 } 164 } 165 166 void ExpectInstructionByte(char byte) { 167 ExpectByte(byte); 168 ++instruction_bytes_; 169 } 170 171 void ExpectInstructionVarint(int32_t value) { 172 instruction_bytes_ += ExpectVarint(value); 173 } 174 175 void ExpectAddressByte(char byte) { 176 ExpectByte(byte); 177 if (interleaved_) { 178 ++instruction_bytes_; 179 } else { 180 ++address_bytes_; 181 } 182 } 183 184 void ExpectAddressVarint(int32_t value) { 185 if (interleaved_) { 186 instruction_bytes_ += ExpectVarint(value); 187 } else { 188 address_bytes_ += ExpectVarint(value); 189 } 190 } 191 192 // The following functions leverage the fact that the encoder uses 193 // the default code table and cache sizes. They are able to search for 194 // instructions of a particular size. The logic for mapping from 195 // instruction type, mode, and size to opcode value is very different here 196 // from the logic used in encodetable.{h,cc}, so hopefully 197 // this version will help validate that the other is correct. 198 // This version uses conditional statements, while encodetable.h 199 // looks up values in a mapping table. 200 void ExpectAddress(int32_t address, int copy_mode) { 201 if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) { 202 ExpectAddressVarint(address); 203 } else { 204 ExpectAddressByte(address); 205 } 206 } 207 208 void ExpectAddInstruction(int size) { 209 if (size <= 18) { 210 ExpectInstructionByte(0x01 + size); 211 } else { 212 ExpectInstructionByte(0x01); 213 ExpectInstructionVarint(size); 214 } 215 } 216 217 void ExpectCopyInstruction(int size, int mode) { 218 if ((size >= 4) && (size <= 16)) { 219 ExpectInstructionByte(0x10 + (0x10 * mode) + size); 220 } else { 221 ExpectInstructionByte(0x13 + (0x10 * mode)); 222 ExpectInstructionVarint(size); 223 } 224 ExpectMatch(size); 225 } 226 227 bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) { 228 if (!default_cache_.IsSameMode(copy_mode) && 229 (add_size <= 4) && 230 (copy_size >= 4) && 231 (copy_size <= 6)) { 232 ExpectInstructionByte(0x9C + 233 (0x0C * copy_mode) + 234 (0x03 * add_size) + 235 copy_size); 236 ExpectMatch(copy_size); 237 return true; 238 } else if (default_cache_.IsSameMode(copy_mode) && 239 (add_size <= 4) && 240 (copy_size == 4)) { 241 ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size); 242 ExpectMatch(copy_size); 243 return true; 244 } else { 245 ExpectAddInstruction(add_size); 246 return false; 247 } 248 } 249 250 void ExpectAddInstructionForStringLength(const char* s) { 251 ExpectAddInstruction(static_cast<int>(strlen(s))); 252 } 253 254 // Call this function before beginning to iterate through the diff string 255 // using the Expect... functions. 256 // text must be NULL-terminated. 257 void VerifyHeaderForDictionaryAndTargetText(const char* dictionary, 258 const char* target_text) { 259 ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary) 260 ExpectStringLength(dictionary); 261 ExpectByte(0x00); // Source segment position: start of dictionary 262 saved_total_size_position_ = verify_position_; 263 SkipVarint(); // Length of the delta encoding 264 saved_delta_encoding_position_ = verify_position_; 265 ExpectStringLength(target_text); 266 ExpectByte(0x00); // Delta_indicator (no compression) 267 saved_section_sizes_position_ = verify_position_; 268 SkipVarint(); // length of data for ADDs and RUNs 269 SkipVarint(); // length of instructions section 270 SkipVarint(); // length of addresses for COPYs 271 } 272 273 // Call this function before beginning to iterating through the entire 274 // diff string using the Expect... functions. It makes sure that the 275 // size totals in the window header match the number of bytes that 276 // were parsed. 277 void VerifySizes() { 278 EXPECT_EQ(verify_position_, diff_.size()); 279 const size_t delta_encoding_size = verify_position_ - 280 saved_delta_encoding_position_; 281 verify_position_ = saved_total_size_position_; 282 ExpectSize(delta_encoding_size); 283 verify_position_ = saved_section_sizes_position_; 284 ExpectSize(data_bytes_); 285 ExpectSize(instruction_bytes_); 286 ExpectSize(address_bytes_); 287 } 288 289 void ExpectMatch(size_t match_size) { 290 if (match_size >= expected_match_counts_.size()) { 291 // Be generous to avoid resizing again 292 expected_match_counts_.resize(match_size * 2, 0); 293 } 294 ++expected_match_counts_[match_size]; 295 } 296 297 void VerifyMatchCounts() { 298 EXPECT_TRUE(std::equal(expected_match_counts_.begin(), 299 expected_match_counts_.end(), 300 actual_match_counts_.begin())); 301 } 302 303 bool interleaved_; 304 string diff_; 305 OutputString<string> diff_output_string_; 306 size_t verify_position_; 307 size_t saved_total_size_position_; 308 size_t saved_delta_encoding_position_; 309 size_t saved_section_sizes_position_; 310 size_t data_bytes_; 311 size_t instruction_bytes_; 312 size_t address_bytes_; 313 VCDiffAddressCache default_cache_; // Used for finding mode values 314 std::vector<int> expected_match_counts_; 315 std::vector<int> actual_match_counts_; 316 }; 317 318 class VCDiffEngineTest : public VCDiffEngineTestBase { 319 protected: 320 VCDiffEngineTest() : 321 engine_(dictionary_, strlen(dictionary_)) { 322 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); 323 } 324 325 virtual ~VCDiffEngineTest() { } 326 327 328 static void SetUpTestCase() { 329 MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_, 330 kBlockSize, false); 331 MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false); 332 } 333 334 static void TearDownTestCase() { 335 delete[] dictionary_; 336 delete[] target_; 337 } 338 339 void EncodeNothing(bool interleaved, bool target_matching) { 340 interleaved_ = interleaved; 341 VCDiffCodeTableWriter coder(interleaved); 342 coder.Init(engine_.dictionary_size()); 343 engine_.Encode("", 0, target_matching, &diff_output_string_, &coder); 344 EXPECT_TRUE(diff_.empty()); 345 actual_match_counts_ = coder.match_counts(); 346 } 347 348 // text must be NULL-terminated 349 void EncodeText(const char* text, bool interleaved, bool target_matching) { 350 interleaved_ = interleaved; 351 VCDiffCodeTableWriter coder(interleaved); 352 coder.Init(engine_.dictionary_size()); 353 engine_.Encode(text, 354 strlen(text), 355 target_matching, 356 &diff_output_string_, 357 &coder); 358 actual_match_counts_ = coder.match_counts(); 359 } 360 361 void Encode(bool interleaved, bool target_matching) { 362 EncodeText(target_, interleaved, target_matching); 363 VerifyHeader(); 364 } 365 366 void VerifyHeader() { 367 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); 368 } 369 370 static const char dictionary_without_spaces_[]; 371 static const char target_without_spaces_[]; 372 373 static const char* dictionary_; 374 static const char* target_; 375 376 const VCDiffEngine engine_; 377 }; 378 379 #ifdef GTEST_HAS_DEATH_TEST 380 typedef VCDiffEngineTest VCDiffEngineDeathTest; 381 #endif // GTEST_HAS_DEATH_TEST 382 383 const char VCDiffEngineTest::dictionary_without_spaces_[] = 384 "The only thing we have to fear is fear itself"; 385 386 const char VCDiffEngineTest::target_without_spaces_[] = 387 "What we hear is fearsome"; 388 389 const char* VCDiffEngineTest::dictionary_ = NULL; 390 const char* VCDiffEngineTest::target_ = NULL; 391 392 #ifdef GTEST_HAS_DEATH_TEST 393 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) { 394 EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()), 395 "twice"); 396 } 397 #endif // GTEST_HAS_DEATH_TEST 398 399 TEST_F(VCDiffEngineTest, EngineEncodeNothing) { 400 EncodeNothing(/* interleaved = */ false, /* target matching = */ false); 401 } 402 403 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) { 404 EncodeNothing(/* interleaved = */ true, /* target matching = */ false); 405 } 406 407 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) { 408 EncodeNothing(/* interleaved = */ false, /* target matching = */ true); 409 } 410 411 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) { 412 EncodeNothing(/* interleaved = */ true, /* target matching = */ true); 413 } 414 415 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) { 416 const char* small_text = " "; 417 EncodeText(small_text, 418 /* interleaved = */ false, 419 /* target matching = */ false); 420 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); 421 // Data for ADDs 422 ExpectDataString(small_text); 423 // Instructions and sizes 424 ExpectAddInstructionForStringLength(small_text); 425 } 426 427 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) { 428 const char* small_text = " "; 429 EncodeText(small_text, 430 /* interleaved = */ true, 431 /* target matching = */ false); 432 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); 433 // Interleaved section 434 ExpectAddInstructionForStringLength(small_text); 435 ExpectDataString(small_text); 436 } 437 438 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) { 439 Encode(/* interleaved = */ false, /* target matching = */ false); 440 // Data for ADDs 441 ExpectDataStringWithBlockSpacing("W", false); 442 ExpectDataByte('t'); 443 ExpectDataByte('s'); 444 ExpectDataByte('m'); 445 // Instructions and sizes 446 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 447 VCD_SELF_MODE)) { 448 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 449 } 450 ExpectAddInstruction(1); 451 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 452 ExpectCopyInstruction(11 * kBlockSize, 453 default_cache_.FirstNearMode()); 454 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 455 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 456 } 457 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 458 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 459 } 460 // Addresses for COPY 461 ExpectAddressVarint(18 * kBlockSize); // "ha" 462 ExpectAddressVarint(14 * kBlockSize); // " we h" 463 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 464 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 465 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 466 VerifySizes(); 467 } 468 469 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) { 470 Encode(/* interleaved = */ true, /* target matching = */ false); 471 // Interleaved section 472 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 473 VCD_SELF_MODE)) { 474 ExpectDataStringWithBlockSpacing("W", false); 475 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 476 } else { 477 ExpectDataStringWithBlockSpacing("W", false); 478 } 479 ExpectAddressVarint(18 * kBlockSize); // "ha" 480 ExpectAddInstruction(1); 481 ExpectDataByte('t'); 482 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 483 ExpectAddressVarint(14 * kBlockSize); // " we h" 484 ExpectCopyInstruction(11 * kBlockSize, 485 default_cache_.FirstNearMode()); 486 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 487 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 488 ExpectDataByte('s'); 489 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 490 } else { 491 ExpectDataByte('s'); 492 } 493 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 494 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 495 ExpectDataByte('m'); 496 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 497 } else { 498 ExpectDataByte('m'); 499 } 500 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 501 VerifySizes(); 502 } 503 504 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) { 505 Encode(/* interleaved = */ false, /* target matching = */ true); 506 // Data for ADDs 507 ExpectDataStringWithBlockSpacing("W", false); 508 ExpectDataByte('t'); 509 ExpectDataByte('s'); 510 ExpectDataByte('m'); 511 // Instructions and sizes 512 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 513 VCD_SELF_MODE)) { 514 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 515 } 516 ExpectAddInstruction(1); 517 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 518 ExpectCopyInstruction(11 * kBlockSize, 519 default_cache_.FirstNearMode()); 520 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 521 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 522 } 523 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 524 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 525 } 526 // Addresses for COPY 527 ExpectAddressVarint(18 * kBlockSize); // "ha" 528 ExpectAddressVarint(14 * kBlockSize); // " we h" 529 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 530 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 531 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 532 VerifySizes(); 533 } 534 535 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) { 536 Encode(/* interleaved = */ true, /* target matching = */ false); 537 // Interleaved section 538 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 539 VCD_SELF_MODE)) { 540 ExpectDataStringWithBlockSpacing("W", false); 541 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 542 } else { 543 ExpectDataStringWithBlockSpacing("W", false); 544 } 545 ExpectAddressVarint(18 * kBlockSize); // "ha" 546 ExpectAddInstruction(1); 547 ExpectDataByte('t'); 548 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 549 ExpectAddressVarint(14 * kBlockSize); // " we h" 550 ExpectCopyInstruction(11 * kBlockSize, 551 default_cache_.FirstNearMode()); 552 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 553 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 554 ExpectDataByte('s'); 555 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 556 } else { 557 ExpectDataByte('s'); 558 } 559 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 560 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 561 ExpectDataByte('m'); 562 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 563 } else { 564 ExpectDataByte('m'); 565 } 566 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 567 VerifySizes(); 568 } 569 570 // This test case takes a dictionary containing several instances of the string 571 // "weasel", and a target string which is identical to the dictionary 572 // except that all instances of "weasel" have been replaced with the string 573 // "moon-pie". It tests that COPY instructions are generated for all 574 // boilerplate text (that is, the text between the "moon-pie" instances in 575 // the target) and, if target matching is enabled, that each instance of 576 // "moon-pie" (except the first one) is encoded using a COPY instruction 577 // rather than an ADD. 578 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase { 579 protected: 580 // kCompressibleTestBlockSize: 581 // The size of the block to create for each letter in the 582 // dictionary and search string for the "compressible text" test. 583 // See MakeEachLetterABlock, below. 584 // If we use kCompressibleTestBlockSize = kBlockSize, then the 585 // encoder will find one match per unique letter in the HTML text. 586 // There are too many examples of "<" in the text for the encoder 587 // to iterate through them all, and some matches are not found. 588 // If we use kCompressibleTextBlockSize = 1, then the boilerplate 589 // text between "weasel" strings in the dictionary and "moon-pie" 590 // strings in the target may not be long enough to be found by 591 // the encoder's block-hash algorithm. A good value, that will give 592 // reproducible results across all block sizes, will be somewhere 593 // in between these extremes. 594 static const int kCompressibleTestBlockSize = kBlockSize / 4; 595 static const int kTrailingSpaces = kCompressibleTestBlockSize - 1; 596 597 WeaselsToMoonpiesTest() : 598 engine_(dictionary_, strlen(dictionary_)), 599 match_index_(0), 600 search_dictionary_(dictionary_, strlen(dictionary_)), 601 copied_moonpie_address_(0) { 602 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); 603 weasel_positions_[0] = 0; 604 after_weasel_[0] = 0; 605 moonpie_positions_[0] = 0; 606 after_moonpie_[0] = 0; 607 } 608 609 virtual ~WeaselsToMoonpiesTest() { } 610 611 static void SetUpTestCase() { 612 MakeEachLetterABlock(dictionary_without_spaces_, 613 &dictionary_, 614 kCompressibleTestBlockSize, 615 false); 616 MakeEachLetterABlock(target_without_spaces_, 617 &target_, 618 kCompressibleTestBlockSize, 619 false); 620 MakeEachLetterABlock(weasel_text_without_spaces_, 621 &weasel_text_, 622 kCompressibleTestBlockSize, 623 true); 624 MakeEachLetterABlock(moonpie_text_without_spaces_, 625 &moonpie_text_, 626 kCompressibleTestBlockSize, 627 true); 628 } 629 630 static void TearDownTestCase() { 631 delete[] dictionary_; 632 delete[] target_; 633 delete[] weasel_text_; 634 delete[] moonpie_text_; 635 } 636 637 // text must be NULL-terminated 638 void EncodeText(const char* text, bool interleaved, bool target_matching) { 639 interleaved_ = interleaved; 640 VCDiffCodeTableWriter coder(interleaved); 641 coder.Init(engine_.dictionary_size()); 642 engine_.Encode(text, 643 strlen(text), 644 target_matching, 645 &diff_output_string_, 646 &coder); 647 actual_match_counts_ = coder.match_counts(); 648 } 649 650 void Encode(bool interleaved, bool target_matching) { 651 EncodeText(target_, interleaved, target_matching); 652 VerifyHeader(); 653 } 654 655 void VerifyHeader() { 656 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); 657 } 658 659 void ExpectCopyForSize(size_t size, int mode) { 660 ExpectCopyInstruction(static_cast<int>(size), mode); 661 } 662 663 void ExpectAddForSize(size_t size) { 664 ExpectAddInstruction(static_cast<int>(size)); 665 } 666 667 void ExpectAddressVarintForSize(size_t value) { 668 ExpectAddressVarint(static_cast<int32_t>(value)); 669 } 670 671 void FindNextMoonpie(bool include_trailing_spaces) { 672 ++match_index_; 673 SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_, 674 AfterLastWeasel())); 675 if (CurrentWeaselPosition() == string::npos) { 676 SetCurrentMoonpiePosition(string::npos); 677 } else { 678 SetCurrentAfterWeaselPosition(CurrentWeaselPosition() 679 + strlen(weasel_text_) 680 + (include_trailing_spaces ? 681 kTrailingSpaces : 0)); 682 SetCurrentMoonpiePosition(AfterLastMoonpie() 683 + CurrentBoilerplateLength()); 684 SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition() 685 + strlen(moonpie_text_) 686 + (include_trailing_spaces ? 687 kTrailingSpaces : 0)); 688 } 689 } 690 bool NoMoreMoonpies() const { 691 return CurrentMoonpiePosition() == string::npos; 692 } 693 size_t CurrentWeaselPosition() const { 694 return weasel_positions_[match_index_]; 695 } 696 size_t LastWeaselPosition() const { 697 return weasel_positions_[match_index_ - 1]; 698 } 699 size_t CurrentMoonpiePosition() const { 700 return moonpie_positions_[match_index_]; 701 } 702 size_t LastMoonpiePosition() const { 703 return moonpie_positions_[match_index_ - 1]; 704 } 705 size_t AfterLastWeasel() const { 706 CHECK_GE(match_index_, 1); 707 return after_weasel_[match_index_ - 1]; 708 } 709 size_t AfterPreviousWeasel() const { 710 CHECK_GE(match_index_, 2); 711 return after_weasel_[match_index_ - 2]; 712 } 713 size_t AfterLastMoonpie() const { 714 CHECK_GE(match_index_, 1); 715 return after_moonpie_[match_index_ - 1]; 716 } 717 size_t AfterPreviousMoonpie() const { 718 CHECK_GE(match_index_, 2); 719 return after_moonpie_[match_index_ - 2]; 720 } 721 722 void SetCurrentWeaselPosition(size_t value) { 723 weasel_positions_[match_index_] = value; 724 } 725 void SetCurrentAfterWeaselPosition(size_t value) { 726 after_weasel_[match_index_] = value; 727 } 728 void SetCurrentMoonpiePosition(size_t value) { 729 moonpie_positions_[match_index_] = value; 730 } 731 void SetCurrentAfterMoonpiePosition(size_t value) { 732 after_moonpie_[match_index_] = value; 733 } 734 735 // Find the length of the text in between the "weasel" strings in the 736 // compressible dictionary, which is the same as the text between 737 // the "moon-pie" strings in the compressible target. 738 size_t CurrentBoilerplateLength() const { 739 CHECK_GE(match_index_, 1); 740 return CurrentWeaselPosition() - AfterLastWeasel(); 741 } 742 size_t DistanceFromLastWeasel() const { 743 CHECK_GE(match_index_, 1); 744 return CurrentWeaselPosition() - LastWeaselPosition(); 745 } 746 size_t DistanceFromLastMoonpie() const { 747 CHECK_GE(match_index_, 1); 748 return CurrentMoonpiePosition() - LastMoonpiePosition(); 749 } 750 size_t DistanceBetweenLastTwoWeasels() const { 751 CHECK_GE(match_index_, 2); 752 return AfterLastWeasel() - AfterPreviousWeasel(); 753 } 754 size_t DistanceBetweenLastTwoMoonpies() const { 755 CHECK_GE(match_index_, 2); 756 return AfterLastMoonpie() - AfterPreviousMoonpie(); 757 } 758 759 int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const; 760 int UpdateCopyModeForMoonpie(int copy_mode) const; 761 int32_t FindMoonpieAddressForCopyMode(int copy_mode) const; 762 763 void CopyBoilerplateAndAddMoonpie(int copy_mode); 764 void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode); 765 766 static const char dictionary_without_spaces_[]; 767 static const char target_without_spaces_[]; 768 static const char weasel_text_without_spaces_[]; 769 static const char moonpie_text_without_spaces_[]; 770 771 static const char* dictionary_; 772 static const char* target_; 773 static const char* weasel_text_; 774 static const char* moonpie_text_; 775 776 const VCDiffEngine engine_; 777 size_t weasel_positions_[128]; 778 size_t after_weasel_[128]; 779 size_t moonpie_positions_[128]; 780 size_t after_moonpie_[128]; 781 int match_index_; 782 string search_dictionary_; 783 size_t copied_moonpie_address_; 784 }; 785 786 // Care is taken in the formulation of the dictionary 787 // to ensure that the surrounding letters do not match; for example, 788 // there are not two instances of the string "weasels". Otherwise, 789 // the matching behavior would not be as predictable. 790 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] = 791 "<html>\n" 792 "<head>\n" 793 "<meta content=\"text/html; charset=ISO-8859-1\"\n" 794 "http-equiv=\"content-type\">\n" 795 "<title>All about weasels</title>\n" 796 "</head>\n" 797 "<!-- You will notice that the word \"weasel\" may be replaced" 798 " with something else -->\n" 799 "<body>\n" 800 "<h1>All about the weasel: highly compressible HTML text</h1>" 801 "<ul>\n" 802 "<li>Don\'t look a gift weasel in its mouth.</li>\n" 803 "<li>This item makes sure the next occurrence is found.</li>\n" 804 "<li>Don\'t count your weasel, before it\'s hatched.</li>\n" 805 "</ul>\n" 806 "<br>\n" 807 "</body>\n" 808 "</html>\n"; 809 810 const char WeaselsToMoonpiesTest::target_without_spaces_[] = 811 "<html>\n" 812 "<head>\n" 813 "<meta content=\"text/html; charset=ISO-8859-1\"\n" 814 "http-equiv=\"content-type\">\n" 815 "<title>All about moon-pies</title>\n" 816 "</head>\n" 817 "<!-- You will notice that the word \"moon-pie\" may be replaced" 818 " with something else -->\n" 819 "<body>\n" 820 "<h1>All about the moon-pie: highly compressible HTML text</h1>" 821 "<ul>\n" 822 "<li>Don\'t look a gift moon-pie in its mouth.</li>\n" 823 "<li>This item makes sure the next occurrence is found.</li>\n" 824 "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n" 825 "</ul>\n" 826 "<br>\n" 827 "</body>\n" 828 "</html>\n"; 829 830 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel"; 831 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie"; 832 833 const char* WeaselsToMoonpiesTest::dictionary_ = NULL; 834 const char* WeaselsToMoonpiesTest::target_ = NULL; 835 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL; 836 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL; 837 838 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode( 839 int copy_mode) const { 840 size_t copy_address = 0; 841 if (default_cache_.IsSelfMode(copy_mode)) { 842 copy_address = AfterLastWeasel(); 843 } else if (default_cache_.IsNearMode(copy_mode)) { 844 copy_address = DistanceBetweenLastTwoWeasels(); 845 } else if (default_cache_.IsSameMode(copy_mode)) { 846 copy_address = AfterLastWeasel() % 256; 847 } 848 return static_cast<int32_t>(copy_address); 849 } 850 851 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const { 852 if (copy_mode == default_cache_.FirstSameMode()) { 853 return default_cache_.FirstSameMode() 854 + static_cast<int>((copied_moonpie_address_ / 256) % 3); 855 } else { 856 return copy_mode; 857 } 858 } 859 860 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode( 861 int copy_mode) const { 862 size_t copy_address = 0; 863 if (default_cache_.IsHereMode(copy_mode)) { 864 copy_address = DistanceFromLastMoonpie(); 865 } else if (default_cache_.IsNearMode(copy_mode)) { 866 copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces; 867 } else if (default_cache_.IsSameMode(copy_mode)) { 868 copy_address = copied_moonpie_address_ % 256; 869 } 870 return static_cast<int32_t>(copy_address); 871 } 872 873 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" 874 // in the encoding. 875 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) { 876 EXPECT_FALSE(NoMoreMoonpies()); 877 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); 878 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); 879 ExpectAddInstructionForStringLength(moonpie_text_); 880 ExpectDataString(moonpie_text_); 881 } 882 883 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" 884 // in the encoding. The "moon-pie" text will be copied from the previously 885 // encoded target. 886 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie( 887 int copy_mode, 888 int moonpie_copy_mode) { 889 EXPECT_FALSE(NoMoreMoonpies()); 890 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); 891 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); 892 moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode); 893 ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode); 894 ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode), 895 moonpie_copy_mode); 896 copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition(); 897 } 898 899 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) { 900 Encode(/* interleaved = */ true, /* target matching = */ false); 901 FindNextMoonpie(false); 902 // Expect all five "weasel"s to be replaced with "moon-pie"s 903 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); 904 FindNextMoonpie(false); 905 CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE); 906 FindNextMoonpie(false); 907 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1); 908 FindNextMoonpie(false); 909 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2); 910 FindNextMoonpie(false); 911 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3); 912 FindNextMoonpie(false); 913 EXPECT_TRUE(NoMoreMoonpies()); 914 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), 915 default_cache_.FirstNearMode()); 916 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); 917 VerifySizes(); 918 } 919 920 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) { 921 Encode(/* interleaved = */ true, /* target matching = */ true); 922 // Expect all five "weasel"s to be replaced with "moon-pie"s. 923 // Every "moon-pie" after the first one should be copied from the 924 // previously encoded target text. 925 FindNextMoonpie(false); 926 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); 927 FindNextMoonpie(true); 928 CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE); 929 FindNextMoonpie(true); 930 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, 931 default_cache_.FirstSameMode()); 932 FindNextMoonpie(true); 933 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3, 934 VCD_HERE_MODE); 935 FindNextMoonpie(true); 936 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, 937 default_cache_.FirstSameMode()); 938 FindNextMoonpie(true); 939 EXPECT_TRUE(NoMoreMoonpies()); 940 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), 941 default_cache_.FirstNearMode() + 3); 942 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); 943 VerifySizes(); 944 } 945 946 } // anonymous namespace 947 } // namespace open-vcdiff 948