1 // Copyright 2007 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // Classes to implement an Encoder for the format described in 17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. 18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html 19 // 20 // The RFC describes the possibility of using a secondary compressor 21 // to further reduce the size of each section of the VCDIFF output. 22 // That feature is not supported in this implementation of the encoder 23 // and decoder. 24 // No secondary compressor types have been publicly registered with 25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids 26 // in the more than five years since the registry was created, so there 27 // is no standard set of compressor IDs which would be generated by other 28 // encoders or accepted by other decoders. 29 30 #include <config.h> 31 #include "google/vcencoder.h" 32 #include <vector> 33 #include "checksum.h" 34 #include "encodetable.h" 35 #include "logging.h" 36 #include "google/output_string.h" 37 #include "vcdiffengine.h" 38 39 namespace open_vcdiff { 40 41 HashedDictionary::HashedDictionary(const char* dictionary_contents, 42 size_t dictionary_size) 43 : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { } 44 45 HashedDictionary::~HashedDictionary() { delete engine_; } 46 47 bool HashedDictionary::Init() { 48 return const_cast<VCDiffEngine*>(engine_)->Init(); 49 } 50 51 class VCDiffStreamingEncoderImpl { 52 public: 53 VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary, 54 VCDiffFormatExtensionFlags format_extensions, 55 bool look_for_target_matches); 56 57 // These functions are identical to their counterparts 58 // in VCDiffStreamingEncoder. 59 bool StartEncoding(OutputStringInterface* out); 60 61 bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out); 62 63 bool FinishEncoding(OutputStringInterface* out); 64 65 const std::vector<int>& match_counts() const { 66 return coder_.match_counts(); 67 } 68 69 private: 70 // Write the header (as defined in section 4.1 of the RFC) to *output. 71 // This includes information that can be gathered 72 // before the first chunk of input is available. 73 void WriteHeader(OutputStringInterface* output) const; 74 75 const VCDiffEngine* engine_; 76 77 // This implementation of the encoder uses the default 78 // code table. A VCDiffCodeTableWriter could also be constructed 79 // using a custom code table. 80 VCDiffCodeTableWriter coder_; 81 82 const VCDiffFormatExtensionFlags format_extensions_; 83 84 // Determines whether to look for matches within the previously encoded 85 // target data, or just within the source (dictionary) data. Please see 86 // vcencoder.h for a full explanation of this parameter. 87 const bool look_for_target_matches_; 88 89 // This state variable is used to ensure that StartEncoding(), EncodeChunk(), 90 // and FinishEncoding() are called in the correct order. It will be true 91 // if StartEncoding() has been called, followed by zero or more calls to 92 // EncodeChunk(), but FinishEncoding() has not yet been called. It will 93 // be false initially, and also after FinishEncoding() has been called. 94 bool encode_chunk_allowed_; 95 96 // Making these private avoids implicit copy constructor & assignment operator 97 VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT 98 void operator=(const VCDiffStreamingEncoderImpl&); 99 }; 100 101 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl( 102 const HashedDictionary* dictionary, 103 VCDiffFormatExtensionFlags format_extensions, 104 bool look_for_target_matches) 105 : engine_(dictionary->engine()), 106 coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0), 107 format_extensions_(format_extensions), 108 look_for_target_matches_(look_for_target_matches), 109 encode_chunk_allowed_(false) { } 110 111 inline void VCDiffStreamingEncoderImpl::WriteHeader( 112 OutputStringInterface* output) const { 113 DeltaFileHeader header_data = { 114 0xD6, // Header1: "V" | 0x80 115 0xC3, // Header2: "C" | 0x80 116 0xC4, // Header3: "D" | 0x80 117 0x00, // Header4: Draft standard format 118 0x00 }; // Hdr_Indicator: 119 // No compression, no custom code table 120 if (format_extensions_ != VCD_STANDARD_FORMAT) { 121 header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used 122 } 123 output->append(reinterpret_cast<const char*>(&header_data), 124 sizeof(header_data)); 125 // If custom cache table sizes or a custom code table were used 126 // for encoding, here is where they would be appended to *output. 127 // This implementation of the encoder does not use those features, 128 // although the decoder can understand and interpret them. 129 } 130 131 inline bool VCDiffStreamingEncoderImpl::StartEncoding( 132 OutputStringInterface* out) { 133 if (!coder_.Init(engine_->dictionary_size())) { 134 LOG(DFATAL) << "Internal error: " 135 "Initialization of code table writer failed" << LOG_ENDL; 136 return false; 137 } 138 WriteHeader(out); 139 encode_chunk_allowed_ = true; 140 return true; 141 } 142 143 inline bool VCDiffStreamingEncoderImpl::EncodeChunk( 144 const char* data, 145 size_t len, 146 OutputStringInterface* out) { 147 if (!encode_chunk_allowed_) { 148 LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL; 149 return false; 150 } 151 if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) { 152 coder_.AddChecksum(ComputeAdler32(data, len)); 153 } 154 engine_->Encode(data, len, look_for_target_matches_, out, &coder_); 155 return true; 156 } 157 158 inline bool VCDiffStreamingEncoderImpl::FinishEncoding( 159 OutputStringInterface* /*out*/) { 160 if (!encode_chunk_allowed_) { 161 LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL; 162 return false; 163 } 164 encode_chunk_allowed_ = false; 165 // There should not be any need to output more data 166 // since EncodeChunk() encodes a complete target window 167 // and there is no end-of-delta-file marker. 168 return true; 169 } 170 171 VCDiffStreamingEncoder::VCDiffStreamingEncoder( 172 const HashedDictionary* dictionary, 173 VCDiffFormatExtensionFlags format_extensions, 174 bool look_for_target_matches) 175 : impl_(new VCDiffStreamingEncoderImpl(dictionary, 176 format_extensions, 177 look_for_target_matches)) { } 178 179 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; } 180 181 bool VCDiffStreamingEncoder::StartEncodingToInterface( 182 OutputStringInterface* out) { 183 return impl_->StartEncoding(out); 184 } 185 186 bool VCDiffStreamingEncoder::EncodeChunkToInterface( 187 const char* data, 188 size_t len, 189 OutputStringInterface* out) { 190 return impl_->EncodeChunk(data, len, out); 191 } 192 193 bool VCDiffStreamingEncoder::FinishEncodingToInterface( 194 OutputStringInterface* out) { 195 return impl_->FinishEncoding(out); 196 } 197 198 void VCDiffStreamingEncoder::GetMatchCounts( 199 std::vector<int>* match_counts) const { 200 if (!match_counts) { 201 LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL; 202 return; 203 } 204 *match_counts = impl_->match_counts(); 205 } 206 207 bool VCDiffEncoder::EncodeToInterface(const char* target_data, 208 size_t target_len, 209 OutputStringInterface* out) { 210 out->clear(); 211 if (!encoder_) { 212 if (!dictionary_.Init()) { 213 LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL; 214 return false; 215 } 216 encoder_ = new VCDiffStreamingEncoder(&dictionary_, 217 flags_, 218 look_for_target_matches_); 219 } 220 if (!encoder_->StartEncodingToInterface(out)) { 221 return false; 222 } 223 if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) { 224 return false; 225 } 226 return encoder_->FinishEncodingToInterface(out); 227 } 228 229 } // namespace open_vcdiff 230