1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <config.h> 17 #include "vcdiffengine.h" 18 #include <string.h> // memset, strlen 19 #include <algorithm> 20 #include <string> 21 #include "addrcache.h" 22 #include "blockhash.h" 23 #include "encodetable.h" 24 #include "google/output_string.h" 25 #include "rolling_hash.h" 26 #include "testing.h" 27 #include "varint_bigendian.h" 28 #include "vcdiff_defs.h" 29 30 namespace open_vcdiff { 31 32 namespace { 33 34 class VCDiffEngineTestBase : public testing::Test { 35 protected: 36 typedef std::string string; 37 38 // Some common definitions and helper functions used in the various tests 39 // for VCDiffEngine. 40 static const int kBlockSize = VCDiffEngine::kMinimumMatchSize; 41 42 VCDiffEngineTestBase() : interleaved_(false), 43 diff_output_string_(&diff_), 44 verify_position_(0), 45 saved_total_size_position_(0), 46 saved_delta_encoding_position_(0), 47 saved_section_sizes_position_(0), 48 data_bytes_(0), 49 instruction_bytes_(0), 50 address_bytes_(0) { } 51 52 virtual ~VCDiffEngineTestBase() { } 53 54 virtual void TearDown() { 55 } 56 57 // Copy string_without_spaces into newly allocated result buffer, 58 // but pad its contents with space characters so that every character 59 // in string_without_spaces corresponds to (block_size - 1) 60 // spaces in the result, followed by that character. 61 // For example: 62 // If string_without_spaces begins "The only thing"... and block_size is 4, 63 // then 3 space characters will be inserted 64 // between each letter in the result, as follows: 65 // " T h e o n l y t h i n g"... 66 // This makes testing simpler, because finding a block_size-byte match 67 // between the dictionary and target only depends on the 68 // trailing letter in each block. 69 // If no_initial_padding is true, then the first letter will not have 70 // spaces added in front of it. 71 static void MakeEachLetterABlock(const char* string_without_spaces, 72 const char** result, 73 int block_size, 74 bool no_initial_padding) { 75 const size_t length_without_spaces = strlen(string_without_spaces); 76 char* padded_text = new char[(block_size * length_without_spaces) + 1]; 77 memset(padded_text, ' ', block_size * length_without_spaces); 78 char* padded_text_ptr = padded_text; 79 if (!no_initial_padding) { 80 padded_text_ptr += block_size - 1; 81 } 82 for (size_t i = 0; i < length_without_spaces; ++i) { 83 *padded_text_ptr = string_without_spaces[i]; 84 padded_text_ptr += block_size; 85 } 86 *(padded_text_ptr - block_size + 1) = '\0'; 87 *result = padded_text; 88 } 89 90 // These functions iterate through the decoded output and expect 91 // simple elements: bytes or variable-length integers. 92 void ExpectByte(char byte) { 93 EXPECT_GT(diff_.size(), verify_position_); 94 EXPECT_EQ(byte, diff_[verify_position_]); 95 ++verify_position_; 96 } 97 98 size_t ExpectVarint(int32_t expected_value) { 99 EXPECT_GT(diff_.size(), verify_position_); 100 const char* const original_position = &diff_[verify_position_]; 101 const char* new_position = original_position; 102 const size_t expected_length = VarintBE<int32_t>::Length(expected_value); 103 int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), 104 &new_position); 105 EXPECT_LE(0, parsed_value); 106 size_t parsed_length = new_position - original_position; 107 EXPECT_EQ(expected_value, parsed_value); 108 EXPECT_EQ(expected_length, parsed_length); 109 verify_position_ += parsed_length; 110 return parsed_length; 111 } 112 113 size_t ExpectSize(size_t size) { 114 return ExpectVarint(static_cast<int32_t>(size)); 115 } 116 117 size_t ExpectStringLength(const char* s) { 118 return ExpectSize(strlen(s)); 119 } 120 121 void SkipVarint() { 122 EXPECT_GT(diff_.size(), verify_position_); 123 const char* const original_position = &diff_[verify_position_]; 124 const char* new_position = original_position; 125 VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position); 126 size_t parsed_length = new_position - original_position; 127 verify_position_ += parsed_length; 128 } 129 130 void ExpectDataByte(char byte) { 131 ExpectByte(byte); 132 if (interleaved_) { 133 ++instruction_bytes_; 134 } else { 135 ++data_bytes_; 136 } 137 } 138 139 void ExpectDataString(const char *expected_string) { 140 const char* ptr = expected_string; 141 while (*ptr) { 142 ExpectDataByte(*ptr); 143 ++ptr; 144 } 145 } 146 147 void ExpectDataStringWithBlockSpacing(const char *expected_string, 148 bool trailing_spaces) { 149 const char* ptr = expected_string; 150 while (*ptr) { 151 for (int i = 0; i < (kBlockSize - 1); ++i) { 152 ExpectDataByte(' '); 153 } 154 ExpectDataByte(*ptr); 155 ++ptr; 156 } 157 if (trailing_spaces) { 158 for (int i = 0; i < (kBlockSize - 1); ++i) { 159 ExpectDataByte(' '); 160 } 161 } 162 } 163 164 void ExpectInstructionByte(char byte) { 165 ExpectByte(byte); 166 ++instruction_bytes_; 167 } 168 169 void ExpectInstructionVarint(int32_t value) { 170 instruction_bytes_ += ExpectVarint(value); 171 } 172 173 void ExpectAddressByte(char byte) { 174 ExpectByte(byte); 175 if (interleaved_) { 176 ++instruction_bytes_; 177 } else { 178 ++address_bytes_; 179 } 180 } 181 182 void ExpectAddressVarint(int32_t value) { 183 if (interleaved_) { 184 instruction_bytes_ += ExpectVarint(value); 185 } else { 186 address_bytes_ += ExpectVarint(value); 187 } 188 } 189 190 // The following functions leverage the fact that the encoder uses 191 // the default code table and cache sizes. They are able to search for 192 // instructions of a particular size. The logic for mapping from 193 // instruction type, mode, and size to opcode value is very different here 194 // from the logic used in encodetable.{h,cc}, so hopefully 195 // this version will help validate that the other is correct. 196 // This version uses conditional statements, while encodetable.h 197 // looks up values in a mapping table. 198 void ExpectAddress(int32_t address, int copy_mode) { 199 if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) { 200 ExpectAddressVarint(address); 201 } else { 202 ExpectAddressByte(address); 203 } 204 } 205 206 void ExpectAddInstruction(int size) { 207 if (size <= 18) { 208 ExpectInstructionByte(0x01 + size); 209 } else { 210 ExpectInstructionByte(0x01); 211 ExpectInstructionVarint(size); 212 } 213 } 214 215 void ExpectCopyInstruction(int size, int mode) { 216 if ((size >= 4) && (size <= 16)) { 217 ExpectInstructionByte(0x10 + (0x10 * mode) + size); 218 } else { 219 ExpectInstructionByte(0x13 + (0x10 * mode)); 220 ExpectInstructionVarint(size); 221 } 222 } 223 224 bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) { 225 if (!default_cache_.IsSameMode(copy_mode) && 226 (add_size <= 4) && 227 (copy_size >= 4) && 228 (copy_size <= 6)) { 229 ExpectInstructionByte(0x9C + 230 (0x0C * copy_mode) + 231 (0x03 * add_size) + 232 copy_size); 233 return true; 234 } else if (default_cache_.IsSameMode(copy_mode) && 235 (add_size <= 4) && 236 (copy_size == 4)) { 237 ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size); 238 return true; 239 } else { 240 ExpectAddInstruction(add_size); 241 return false; 242 } 243 } 244 245 void ExpectAddInstructionForStringLength(const char* s) { 246 ExpectAddInstruction(static_cast<int>(strlen(s))); 247 } 248 249 // Call this function before beginning to iterate through the diff string 250 // using the Expect... functions. 251 // text must be NULL-terminated. 252 void VerifyHeaderForDictionaryAndTargetText(const char* dictionary, 253 const char* target_text) { 254 ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary) 255 ExpectStringLength(dictionary); 256 ExpectByte(0x00); // Source segment position: start of dictionary 257 saved_total_size_position_ = verify_position_; 258 SkipVarint(); // Length of the delta encoding 259 saved_delta_encoding_position_ = verify_position_; 260 ExpectStringLength(target_text); 261 ExpectByte(0x00); // Delta_indicator (no compression) 262 saved_section_sizes_position_ = verify_position_; 263 SkipVarint(); // length of data for ADDs and RUNs 264 SkipVarint(); // length of instructions section 265 SkipVarint(); // length of addresses for COPYs 266 } 267 268 // Call this function before beginning to iterating through the entire 269 // diff string using the Expect... functions. It makes sure that the 270 // size totals in the window header match the number of bytes that 271 // were parsed. 272 void VerifySizes() { 273 EXPECT_EQ(verify_position_, diff_.size()); 274 const size_t delta_encoding_size = verify_position_ - 275 saved_delta_encoding_position_; 276 verify_position_ = saved_total_size_position_; 277 ExpectSize(delta_encoding_size); 278 verify_position_ = saved_section_sizes_position_; 279 ExpectSize(data_bytes_); 280 ExpectSize(instruction_bytes_); 281 ExpectSize(address_bytes_); 282 } 283 284 bool interleaved_; 285 string diff_; 286 OutputString<string> diff_output_string_; 287 size_t verify_position_; 288 size_t saved_total_size_position_; 289 size_t saved_delta_encoding_position_; 290 size_t saved_section_sizes_position_; 291 size_t data_bytes_; 292 size_t instruction_bytes_; 293 size_t address_bytes_; 294 VCDiffAddressCache default_cache_; // Used for finding mode values 295 }; 296 297 class VCDiffEngineTest : public VCDiffEngineTestBase { 298 protected: 299 VCDiffEngineTest() : 300 engine_(dictionary_, strlen(dictionary_)) { 301 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); 302 } 303 304 virtual ~VCDiffEngineTest() { } 305 306 307 static void SetUpTestCase() { 308 MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_, 309 kBlockSize, false); 310 MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false); 311 } 312 313 static void TearDownTestCase() { 314 delete[] dictionary_; 315 delete[] target_; 316 } 317 318 void EncodeNothing(bool interleaved, bool target_matching) { 319 interleaved_ = interleaved; 320 VCDiffCodeTableWriter coder(interleaved); 321 coder.Init(engine_.dictionary_size()); 322 engine_.Encode("", 0, target_matching, &diff_output_string_, &coder); 323 EXPECT_TRUE(diff_.empty()); 324 } 325 326 // text must be NULL-terminated 327 void EncodeText(const char* text, bool interleaved, bool target_matching) { 328 interleaved_ = interleaved; 329 VCDiffCodeTableWriter coder(interleaved); 330 coder.Init(engine_.dictionary_size()); 331 engine_.Encode(text, 332 strlen(text), 333 target_matching, 334 &diff_output_string_, 335 &coder); 336 } 337 338 void Encode(bool interleaved, bool target_matching) { 339 EncodeText(target_, interleaved, target_matching); 340 VerifyHeader(); 341 } 342 343 void VerifyHeader() { 344 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); 345 } 346 347 static const char dictionary_without_spaces_[]; 348 static const char target_without_spaces_[]; 349 350 static const char* dictionary_; 351 static const char* target_; 352 353 const VCDiffEngine engine_; 354 }; 355 356 #ifdef GTEST_HAS_DEATH_TEST 357 typedef VCDiffEngineTest VCDiffEngineDeathTest; 358 #endif // GTEST_HAS_DEATH_TEST 359 360 const char VCDiffEngineTest::dictionary_without_spaces_[] = 361 "The only thing we have to fear is fear itself"; 362 363 const char VCDiffEngineTest::target_without_spaces_[] = 364 "What we hear is fearsome"; 365 366 const char* VCDiffEngineTest::dictionary_ = NULL; 367 const char* VCDiffEngineTest::target_ = NULL; 368 369 #ifdef GTEST_HAS_DEATH_TEST 370 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) { 371 EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()), 372 "twice"); 373 } 374 #endif // GTEST_HAS_DEATH_TEST 375 376 TEST_F(VCDiffEngineTest, EngineEncodeNothing) { 377 EncodeNothing(/* interleaved = */ false, /* target matching = */ false); 378 } 379 380 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) { 381 EncodeNothing(/* interleaved = */ true, /* target matching = */ false); 382 } 383 384 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) { 385 EncodeNothing(/* interleaved = */ false, /* target matching = */ true); 386 } 387 388 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) { 389 EncodeNothing(/* interleaved = */ true, /* target matching = */ true); 390 } 391 392 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) { 393 const char* small_text = " "; 394 EncodeText(small_text, 395 /* interleaved = */ false, 396 /* target matching = */ false); 397 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); 398 // Data for ADDs 399 ExpectDataString(small_text); 400 // Instructions and sizes 401 ExpectAddInstructionForStringLength(small_text); 402 } 403 404 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) { 405 const char* small_text = " "; 406 EncodeText(small_text, 407 /* interleaved = */ true, 408 /* target matching = */ false); 409 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); 410 // Interleaved section 411 ExpectAddInstructionForStringLength(small_text); 412 ExpectDataString(small_text); 413 } 414 415 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) { 416 Encode(/* interleaved = */ false, /* target matching = */ false); 417 // Data for ADDs 418 ExpectDataStringWithBlockSpacing("W", false); 419 ExpectDataByte('t'); 420 ExpectDataByte('s'); 421 ExpectDataByte('m'); 422 // Instructions and sizes 423 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 424 VCD_SELF_MODE)) { 425 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 426 } 427 ExpectAddInstruction(1); 428 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 429 ExpectCopyInstruction(11 * kBlockSize, 430 default_cache_.FirstNearMode()); 431 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 432 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 433 } 434 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 435 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 436 } 437 // Addresses for COPY 438 ExpectAddressVarint(18 * kBlockSize); // "ha" 439 ExpectAddressVarint(14 * kBlockSize); // " we h" 440 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 441 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 442 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 443 VerifySizes(); 444 } 445 446 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) { 447 Encode(/* interleaved = */ true, /* target matching = */ false); 448 // Interleaved section 449 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 450 VCD_SELF_MODE)) { 451 ExpectDataStringWithBlockSpacing("W", false); 452 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 453 } else { 454 ExpectDataStringWithBlockSpacing("W", false); 455 } 456 ExpectAddressVarint(18 * kBlockSize); // "ha" 457 ExpectAddInstruction(1); 458 ExpectDataByte('t'); 459 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 460 ExpectAddressVarint(14 * kBlockSize); // " we h" 461 ExpectCopyInstruction(11 * kBlockSize, 462 default_cache_.FirstNearMode()); 463 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 464 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 465 ExpectDataByte('s'); 466 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 467 } else { 468 ExpectDataByte('s'); 469 } 470 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 471 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 472 ExpectDataByte('m'); 473 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 474 } else { 475 ExpectDataByte('m'); 476 } 477 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 478 VerifySizes(); 479 } 480 481 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) { 482 Encode(/* interleaved = */ false, /* target matching = */ true); 483 // Data for ADDs 484 ExpectDataStringWithBlockSpacing("W", false); 485 ExpectDataByte('t'); 486 ExpectDataByte('s'); 487 ExpectDataByte('m'); 488 // Instructions and sizes 489 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 490 VCD_SELF_MODE)) { 491 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 492 } 493 ExpectAddInstruction(1); 494 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 495 ExpectCopyInstruction(11 * kBlockSize, 496 default_cache_.FirstNearMode()); 497 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 498 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 499 } 500 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 501 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 502 } 503 // Addresses for COPY 504 ExpectAddressVarint(18 * kBlockSize); // "ha" 505 ExpectAddressVarint(14 * kBlockSize); // " we h" 506 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 507 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 508 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 509 VerifySizes(); 510 } 511 512 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) { 513 Encode(/* interleaved = */ true, /* target matching = */ true); 514 // Interleaved section 515 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, 516 VCD_SELF_MODE)) { 517 ExpectDataStringWithBlockSpacing("W", false); 518 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); 519 } else { 520 ExpectDataStringWithBlockSpacing("W", false); 521 } 522 ExpectAddressVarint(18 * kBlockSize); // "ha" 523 ExpectAddInstruction(1); 524 ExpectDataByte('t'); 525 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); 526 ExpectAddressVarint(14 * kBlockSize); // " we h" 527 ExpectCopyInstruction(11 * kBlockSize, 528 default_cache_.FirstNearMode()); 529 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" 530 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { 531 ExpectDataByte('s'); 532 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); 533 } else { 534 ExpectDataByte('s'); 535 } 536 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" 537 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { 538 ExpectDataByte('m'); 539 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); 540 } else { 541 ExpectDataByte('m'); 542 } 543 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" 544 VerifySizes(); 545 } 546 547 // This test case takes a dictionary containing several instances of the string 548 // "weasel", and a target string which is identical to the dictionary 549 // except that all instances of "weasel" have been replaced with the string 550 // "moon-pie". It tests that COPY instructions are generated for all 551 // boilerplate text (that is, the text between the "moon-pie" instances in 552 // the target) and, if target matching is enabled, that each instance of 553 // "moon-pie" (except the first one) is encoded using a COPY instruction 554 // rather than an ADD. 555 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase { 556 protected: 557 // kCompressibleTestBlockSize: 558 // The size of the block to create for each letter in the 559 // dictionary and search string for the "compressible text" test. 560 // See MakeEachLetterABlock, below. 561 // If we use kCompressibleTestBlockSize = kBlockSize, then the 562 // encoder will find one match per unique letter in the HTML text. 563 // There are too many examples of "<" in the text for the encoder 564 // to iterate through them all, and some matches are not found. 565 // If we use kCompressibleTextBlockSize = 1, then the boilerplate 566 // text between "weasel" strings in the dictionary and "moon-pie" 567 // strings in the target may not be long enough to be found by 568 // the encoder's block-hash algorithm. A good value, that will give 569 // reproducible results across all block sizes, will be somewhere 570 // in between these extremes. 571 static const int kCompressibleTestBlockSize = kBlockSize / 4; 572 static const int kTrailingSpaces = kCompressibleTestBlockSize - 1; 573 574 WeaselsToMoonpiesTest() : 575 engine_(dictionary_, strlen(dictionary_)), 576 match_index_(0), 577 search_dictionary_(dictionary_, strlen(dictionary_)), 578 copied_moonpie_address_(0) { 579 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); 580 weasel_positions_[0] = 0; 581 after_weasel_[0] = 0; 582 moonpie_positions_[0] = 0; 583 after_moonpie_[0] = 0; 584 } 585 586 virtual ~WeaselsToMoonpiesTest() { } 587 588 static void SetUpTestCase() { 589 MakeEachLetterABlock(dictionary_without_spaces_, 590 &dictionary_, 591 kCompressibleTestBlockSize, 592 false); 593 MakeEachLetterABlock(target_without_spaces_, 594 &target_, 595 kCompressibleTestBlockSize, 596 false); 597 MakeEachLetterABlock(weasel_text_without_spaces_, 598 &weasel_text_, 599 kCompressibleTestBlockSize, 600 true); 601 MakeEachLetterABlock(moonpie_text_without_spaces_, 602 &moonpie_text_, 603 kCompressibleTestBlockSize, 604 true); 605 } 606 607 static void TearDownTestCase() { 608 delete[] dictionary_; 609 delete[] target_; 610 delete[] weasel_text_; 611 delete[] moonpie_text_; 612 } 613 614 // text must be NULL-terminated 615 void EncodeText(const char* text, bool interleaved, bool target_matching) { 616 interleaved_ = interleaved; 617 VCDiffCodeTableWriter coder(interleaved); 618 coder.Init(engine_.dictionary_size()); 619 engine_.Encode(text, 620 strlen(text), 621 target_matching, 622 &diff_output_string_, 623 &coder); 624 } 625 626 void Encode(bool interleaved, bool target_matching) { 627 EncodeText(target_, interleaved, target_matching); 628 VerifyHeader(); 629 } 630 631 void VerifyHeader() { 632 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); 633 } 634 635 void ExpectCopyForSize(size_t size, int mode) { 636 ExpectCopyInstruction(static_cast<int>(size), mode); 637 } 638 639 void ExpectAddForSize(size_t size) { 640 ExpectAddInstruction(static_cast<int>(size)); 641 } 642 643 void ExpectAddressVarintForSize(size_t value) { 644 ExpectAddressVarint(static_cast<int32_t>(value)); 645 } 646 647 void FindNextMoonpie(bool include_trailing_spaces) { 648 ++match_index_; 649 SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_, 650 AfterLastWeasel())); 651 if (CurrentWeaselPosition() == string::npos) { 652 SetCurrentMoonpiePosition(string::npos); 653 } else { 654 SetCurrentAfterWeaselPosition(CurrentWeaselPosition() 655 + strlen(weasel_text_) 656 + (include_trailing_spaces ? 657 kTrailingSpaces : 0)); 658 SetCurrentMoonpiePosition(AfterLastMoonpie() 659 + CurrentBoilerplateLength()); 660 SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition() 661 + strlen(moonpie_text_) 662 + (include_trailing_spaces ? 663 kTrailingSpaces : 0)); 664 } 665 } 666 bool NoMoreMoonpies() const { 667 return CurrentMoonpiePosition() == string::npos; 668 } 669 size_t CurrentWeaselPosition() const { 670 return weasel_positions_[match_index_]; 671 } 672 size_t LastWeaselPosition() const { 673 return weasel_positions_[match_index_ - 1]; 674 } 675 size_t CurrentMoonpiePosition() const { 676 return moonpie_positions_[match_index_]; 677 } 678 size_t LastMoonpiePosition() const { 679 return moonpie_positions_[match_index_ - 1]; 680 } 681 size_t AfterLastWeasel() const { 682 CHECK_GE(match_index_, 1); 683 return after_weasel_[match_index_ - 1]; 684 } 685 size_t AfterPreviousWeasel() const { 686 CHECK_GE(match_index_, 2); 687 return after_weasel_[match_index_ - 2]; 688 } 689 size_t AfterLastMoonpie() const { 690 CHECK_GE(match_index_, 1); 691 return after_moonpie_[match_index_ - 1]; 692 } 693 size_t AfterPreviousMoonpie() const { 694 CHECK_GE(match_index_, 2); 695 return after_moonpie_[match_index_ - 2]; 696 } 697 698 void SetCurrentWeaselPosition(size_t value) { 699 weasel_positions_[match_index_] = value; 700 } 701 void SetCurrentAfterWeaselPosition(size_t value) { 702 after_weasel_[match_index_] = value; 703 } 704 void SetCurrentMoonpiePosition(size_t value) { 705 moonpie_positions_[match_index_] = value; 706 } 707 void SetCurrentAfterMoonpiePosition(size_t value) { 708 after_moonpie_[match_index_] = value; 709 } 710 711 // Find the length of the text in between the "weasel" strings in the 712 // compressible dictionary, which is the same as the text between 713 // the "moon-pie" strings in the compressible target. 714 size_t CurrentBoilerplateLength() const { 715 CHECK_GE(match_index_, 1); 716 return CurrentWeaselPosition() - AfterLastWeasel(); 717 } 718 size_t DistanceFromLastWeasel() const { 719 CHECK_GE(match_index_, 1); 720 return CurrentWeaselPosition() - LastWeaselPosition(); 721 } 722 size_t DistanceFromLastMoonpie() const { 723 CHECK_GE(match_index_, 1); 724 return CurrentMoonpiePosition() - LastMoonpiePosition(); 725 } 726 size_t DistanceBetweenLastTwoWeasels() const { 727 CHECK_GE(match_index_, 2); 728 return AfterLastWeasel() - AfterPreviousWeasel(); 729 } 730 size_t DistanceBetweenLastTwoMoonpies() const { 731 CHECK_GE(match_index_, 2); 732 return AfterLastMoonpie() - AfterPreviousMoonpie(); 733 } 734 735 int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const; 736 int UpdateCopyModeForMoonpie(int copy_mode) const; 737 int32_t FindMoonpieAddressForCopyMode(int copy_mode) const; 738 739 void CopyBoilerplateAndAddMoonpie(int copy_mode); 740 void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode); 741 742 static const char dictionary_without_spaces_[]; 743 static const char target_without_spaces_[]; 744 static const char weasel_text_without_spaces_[]; 745 static const char moonpie_text_without_spaces_[]; 746 747 static const char* dictionary_; 748 static const char* target_; 749 static const char* weasel_text_; 750 static const char* moonpie_text_; 751 752 const VCDiffEngine engine_; 753 size_t weasel_positions_[128]; 754 size_t after_weasel_[128]; 755 size_t moonpie_positions_[128]; 756 size_t after_moonpie_[128]; 757 int match_index_; 758 string search_dictionary_; 759 size_t copied_moonpie_address_; 760 }; 761 762 // Care is taken in the formulation of the dictionary 763 // to ensure that the surrounding letters do not match; for example, 764 // there are not two instances of the string "weasels". Otherwise, 765 // the matching behavior would not be as predictable. 766 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] = 767 "<html>\n" 768 "<head>\n" 769 "<meta content=\"text/html; charset=ISO-8859-1\"\n" 770 "http-equiv=\"content-type\">\n" 771 "<title>All about weasels</title>\n" 772 "</head>\n" 773 "<!-- You will notice that the word \"weasel\" may be replaced" 774 " with something else -->\n" 775 "<body>\n" 776 "<h1>All about the weasel: highly compressible HTML text</h1>" 777 "<ul>\n" 778 "<li>Don\'t look a gift weasel in its mouth.</li>\n" 779 "<li>This item makes sure the next occurrence is found.</li>\n" 780 "<li>Don\'t count your weasel, before it\'s hatched.</li>\n" 781 "</ul>\n" 782 "<br>\n" 783 "</body>\n" 784 "</html>\n"; 785 786 const char WeaselsToMoonpiesTest::target_without_spaces_[] = 787 "<html>\n" 788 "<head>\n" 789 "<meta content=\"text/html; charset=ISO-8859-1\"\n" 790 "http-equiv=\"content-type\">\n" 791 "<title>All about moon-pies</title>\n" 792 "</head>\n" 793 "<!-- You will notice that the word \"moon-pie\" may be replaced" 794 " with something else -->\n" 795 "<body>\n" 796 "<h1>All about the moon-pie: highly compressible HTML text</h1>" 797 "<ul>\n" 798 "<li>Don\'t look a gift moon-pie in its mouth.</li>\n" 799 "<li>This item makes sure the next occurrence is found.</li>\n" 800 "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n" 801 "</ul>\n" 802 "<br>\n" 803 "</body>\n" 804 "</html>\n"; 805 806 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel"; 807 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie"; 808 809 const char* WeaselsToMoonpiesTest::dictionary_ = NULL; 810 const char* WeaselsToMoonpiesTest::target_ = NULL; 811 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL; 812 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL; 813 814 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode( 815 int copy_mode) const { 816 size_t copy_address = 0; 817 if (default_cache_.IsSelfMode(copy_mode)) { 818 copy_address = AfterLastWeasel(); 819 } else if (default_cache_.IsNearMode(copy_mode)) { 820 copy_address = DistanceBetweenLastTwoWeasels(); 821 } else if (default_cache_.IsSameMode(copy_mode)) { 822 copy_address = AfterLastWeasel() % 256; 823 } 824 return static_cast<int32_t>(copy_address); 825 } 826 827 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const { 828 if (copy_mode == default_cache_.FirstSameMode()) { 829 return default_cache_.FirstSameMode() 830 + static_cast<int>((copied_moonpie_address_ / 256) % 3); 831 } else { 832 return copy_mode; 833 } 834 } 835 836 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode( 837 int copy_mode) const { 838 size_t copy_address = 0; 839 if (default_cache_.IsHereMode(copy_mode)) { 840 copy_address = DistanceFromLastMoonpie(); 841 } else if (default_cache_.IsNearMode(copy_mode)) { 842 copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces; 843 } else if (default_cache_.IsSameMode(copy_mode)) { 844 copy_address = copied_moonpie_address_ % 256; 845 } 846 return static_cast<int32_t>(copy_address); 847 } 848 849 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" 850 // in the encoding. 851 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) { 852 EXPECT_FALSE(NoMoreMoonpies()); 853 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); 854 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); 855 ExpectAddInstructionForStringLength(moonpie_text_); 856 ExpectDataString(moonpie_text_); 857 } 858 859 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" 860 // in the encoding. The "moon-pie" text will be copied from the previously 861 // encoded target. 862 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie( 863 int copy_mode, 864 int moonpie_copy_mode) { 865 EXPECT_FALSE(NoMoreMoonpies()); 866 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); 867 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); 868 moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode); 869 ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode); 870 ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode), 871 moonpie_copy_mode); 872 copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition(); 873 } 874 875 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) { 876 Encode(/* interleaved = */ true, /* target matching = */ false); 877 FindNextMoonpie(false); 878 // Expect all five "weasel"s to be replaced with "moon-pie"s 879 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); 880 FindNextMoonpie(false); 881 CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE); 882 FindNextMoonpie(false); 883 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1); 884 FindNextMoonpie(false); 885 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2); 886 FindNextMoonpie(false); 887 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3); 888 FindNextMoonpie(false); 889 EXPECT_TRUE(NoMoreMoonpies()); 890 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), 891 default_cache_.FirstNearMode()); 892 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); 893 VerifySizes(); 894 } 895 896 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) { 897 Encode(/* interleaved = */ true, /* target matching = */ true); 898 // Expect all five "weasel"s to be replaced with "moon-pie"s. 899 // Every "moon-pie" after the first one should be copied from the 900 // previously encoded target text. 901 FindNextMoonpie(false); 902 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); 903 FindNextMoonpie(true); 904 CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE); 905 FindNextMoonpie(true); 906 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, 907 default_cache_.FirstSameMode()); 908 FindNextMoonpie(true); 909 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3, 910 VCD_HERE_MODE); 911 FindNextMoonpie(true); 912 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, 913 default_cache_.FirstSameMode()); 914 FindNextMoonpie(true); 915 EXPECT_TRUE(NoMoreMoonpies()); 916 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), 917 default_cache_.FirstNearMode() + 3); 918 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); 919 VerifySizes(); 920 } 921 922 } // anonymous namespace 923 } // namespace open-vcdiff 924