1 // Copyright 2007 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // Classes to implement an Encoder for the format described in 17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. 18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html 19 // 20 // The RFC describes the possibility of using a secondary compressor 21 // to further reduce the size of each section of the VCDIFF output. 22 // That feature is not supported in this implementation of the encoder 23 // and decoder. 24 // No secondary compressor types have been publicly registered with 25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids 26 // in the more than five years since the registry was created, so there 27 // is no standard set of compressor IDs which would be generated by other 28 // encoders or accepted by other decoders. 29 30 #include <config.h> 31 #include <memory> // auto_ptr 32 #include "checksum.h" 33 #include "encodetable.h" 34 #include "google/output_string.h" 35 #include "google/vcencoder.h" 36 #include "jsonwriter.h" 37 #include "logging.h" 38 #include "vcdiffengine.h" 39 40 namespace open_vcdiff { 41 42 HashedDictionary::HashedDictionary(const char* dictionary_contents, 43 size_t dictionary_size) 44 : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { } 45 46 HashedDictionary::~HashedDictionary() { delete engine_; } 47 48 bool HashedDictionary::Init() { 49 return const_cast<VCDiffEngine*>(engine_)->Init(); 50 } 51 52 class VCDiffStreamingEncoderImpl { 53 public: 54 VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary, 55 VCDiffFormatExtensionFlags format_extensions, 56 bool look_for_target_matches); 57 58 // These functions are identical to their counterparts 59 // in VCDiffStreamingEncoder. 60 bool StartEncoding(OutputStringInterface* out); 61 62 bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out); 63 64 bool FinishEncoding(OutputStringInterface* out); 65 66 private: 67 const VCDiffEngine* engine_; 68 69 std::auto_ptr<CodeTableWriterInterface> coder_; 70 71 const VCDiffFormatExtensionFlags format_extensions_; 72 73 // Determines whether to look for matches within the previously encoded 74 // target data, or just within the source (dictionary) data. Please see 75 // vcencoder.h for a full explanation of this parameter. 76 const bool look_for_target_matches_; 77 78 // This state variable is used to ensure that StartEncoding(), EncodeChunk(), 79 // and FinishEncoding() are called in the correct order. It will be true 80 // if StartEncoding() has been called, followed by zero or more calls to 81 // EncodeChunk(), but FinishEncoding() has not yet been called. It will 82 // be false initially, and also after FinishEncoding() has been called. 83 bool encode_chunk_allowed_; 84 85 // Making these private avoids implicit copy constructor & assignment operator 86 VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT 87 void operator=(const VCDiffStreamingEncoderImpl&); 88 }; 89 90 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl( 91 const HashedDictionary* dictionary, 92 VCDiffFormatExtensionFlags format_extensions, 93 bool look_for_target_matches) 94 : engine_(dictionary->engine()), 95 format_extensions_(format_extensions), 96 look_for_target_matches_(look_for_target_matches), 97 encode_chunk_allowed_(false) { 98 if (format_extensions & VCD_FORMAT_JSON) { 99 coder_.reset(new JSONCodeTableWriter()); 100 } else { 101 // This implementation of the encoder uses the default 102 // code table. A VCDiffCodeTableWriter could also be constructed 103 // using a custom code table. 104 coder_.reset(new VCDiffCodeTableWriter( 105 (format_extensions & VCD_FORMAT_INTERLEAVED) != 0)); 106 } 107 } 108 109 inline bool VCDiffStreamingEncoderImpl::StartEncoding( 110 OutputStringInterface* out) { 111 if (!coder_->Init(engine_->dictionary_size())) { 112 VCD_DFATAL << "Internal error: " 113 "Initialization of code table writer failed" << VCD_ENDL; 114 return false; 115 } 116 coder_->WriteHeader(out, format_extensions_); 117 encode_chunk_allowed_ = true; 118 return true; 119 } 120 121 inline bool VCDiffStreamingEncoderImpl::EncodeChunk( 122 const char* data, 123 size_t len, 124 OutputStringInterface* out) { 125 if (!encode_chunk_allowed_) { 126 VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL; 127 return false; 128 } 129 if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) { 130 coder_->AddChecksum(ComputeAdler32(data, len)); 131 } 132 engine_->Encode(data, len, look_for_target_matches_, out, coder_.get()); 133 return true; 134 } 135 136 inline bool VCDiffStreamingEncoderImpl::FinishEncoding( 137 OutputStringInterface* out) { 138 if (!encode_chunk_allowed_) { 139 VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL; 140 return false; 141 } 142 encode_chunk_allowed_ = false; 143 coder_->FinishEncoding(out); 144 return true; 145 } 146 147 VCDiffStreamingEncoder::VCDiffStreamingEncoder( 148 const HashedDictionary* dictionary, 149 VCDiffFormatExtensionFlags format_extensions, 150 bool look_for_target_matches) 151 : impl_(new VCDiffStreamingEncoderImpl(dictionary, 152 format_extensions, 153 look_for_target_matches)) { } 154 155 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; } 156 157 bool VCDiffStreamingEncoder::StartEncodingToInterface( 158 OutputStringInterface* out) { 159 return impl_->StartEncoding(out); 160 } 161 162 bool VCDiffStreamingEncoder::EncodeChunkToInterface( 163 const char* data, 164 size_t len, 165 OutputStringInterface* out) { 166 return impl_->EncodeChunk(data, len, out); 167 } 168 169 bool VCDiffStreamingEncoder::FinishEncodingToInterface( 170 OutputStringInterface* out) { 171 return impl_->FinishEncoding(out); 172 } 173 174 bool VCDiffEncoder::EncodeToInterface(const char* target_data, 175 size_t target_len, 176 OutputStringInterface* out) { 177 out->clear(); 178 if (!encoder_) { 179 if (!dictionary_.Init()) { 180 VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL; 181 return false; 182 } 183 encoder_ = new VCDiffStreamingEncoder(&dictionary_, 184 flags_, 185 look_for_target_matches_); 186 } 187 if (!encoder_->StartEncodingToInterface(out)) { 188 return false; 189 } 190 if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) { 191 return false; 192 } 193 return encoder_->FinishEncodingToInterface(out); 194 } 195 196 } // namespace open_vcdiff 197