Home | History | Annotate | Download | only in src
      1 // Copyright 2007 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 // Classes to implement an Encoder for the format described in
     17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
     18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
     19 //
     20 // The RFC describes the possibility of using a secondary compressor
     21 // to further reduce the size of each section of the VCDIFF output.
     22 // That feature is not supported in this implementation of the encoder
     23 // and decoder.
     24 // No secondary compressor types have been publicly registered with
     25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
     26 // in the more than five years since the registry was created, so there
     27 // is no standard set of compressor IDs which would be generated by other
     28 // encoders or accepted by other decoders.
     29 
     30 #include <config.h>
     31 #include "google/vcencoder.h"
     32 #include <vector>
     33 #include "checksum.h"
     34 #include "encodetable.h"
     35 #include "logging.h"
     36 #include "google/output_string.h"
     37 #include "vcdiffengine.h"
     38 
     39 namespace open_vcdiff {
     40 
     41 HashedDictionary::HashedDictionary(const char* dictionary_contents,
     42                                    size_t dictionary_size)
     43     : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
     44 
     45 HashedDictionary::~HashedDictionary() { delete engine_; }
     46 
     47 bool HashedDictionary::Init() {
     48   return const_cast<VCDiffEngine*>(engine_)->Init();
     49 }
     50 
     51 class VCDiffStreamingEncoderImpl {
     52  public:
     53   VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
     54                              VCDiffFormatExtensionFlags format_extensions,
     55                              bool look_for_target_matches);
     56 
     57   // These functions are identical to their counterparts
     58   // in VCDiffStreamingEncoder.
     59   bool StartEncoding(OutputStringInterface* out);
     60 
     61   bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
     62 
     63   bool FinishEncoding(OutputStringInterface* out);
     64 
     65   const std::vector<int>& match_counts() const {
     66     return coder_.match_counts();
     67   }
     68 
     69  private:
     70   // Write the header (as defined in section 4.1 of the RFC) to *output.
     71   // This includes information that can be gathered
     72   // before the first chunk of input is available.
     73   void WriteHeader(OutputStringInterface* output) const;
     74 
     75   const VCDiffEngine* engine_;
     76 
     77   // This implementation of the encoder uses the default
     78   // code table.  A VCDiffCodeTableWriter could also be constructed
     79   // using a custom code table.
     80   VCDiffCodeTableWriter coder_;
     81 
     82   const VCDiffFormatExtensionFlags format_extensions_;
     83 
     84   // Determines whether to look for matches within the previously encoded
     85   // target data, or just within the source (dictionary) data.  Please see
     86   // vcencoder.h for a full explanation of this parameter.
     87   const bool look_for_target_matches_;
     88 
     89   // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
     90   // and FinishEncoding() are called in the correct order.  It will be true
     91   // if StartEncoding() has been called, followed by zero or more calls to
     92   // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
     93   // be false initially, and also after FinishEncoding() has been called.
     94   bool encode_chunk_allowed_;
     95 
     96   // Making these private avoids implicit copy constructor & assignment operator
     97   VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
     98   void operator=(const VCDiffStreamingEncoderImpl&);
     99 };
    100 
    101 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
    102     const HashedDictionary* dictionary,
    103     VCDiffFormatExtensionFlags format_extensions,
    104     bool look_for_target_matches)
    105     : engine_(dictionary->engine()),
    106       coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0),
    107       format_extensions_(format_extensions),
    108       look_for_target_matches_(look_for_target_matches),
    109       encode_chunk_allowed_(false) { }
    110 
    111 inline void VCDiffStreamingEncoderImpl::WriteHeader(
    112     OutputStringInterface* output) const {
    113   DeltaFileHeader header_data = {
    114     0xD6,  // Header1: "V" | 0x80
    115     0xC3,  // Header2: "C" | 0x80
    116     0xC4,  // Header3: "D" | 0x80
    117     0x00,  // Header4: Draft standard format
    118     0x00 };  // Hdr_Indicator:
    119              // No compression, no custom code table
    120   if (format_extensions_ != VCD_STANDARD_FORMAT) {
    121     header_data.header4 = 'S';  // Header4: VCDIFF/SDCH, extensions used
    122   }
    123   output->append(reinterpret_cast<const char*>(&header_data),
    124                  sizeof(header_data));
    125   // If custom cache table sizes or a custom code table were used
    126   // for encoding, here is where they would be appended to *output.
    127   // This implementation of the encoder does not use those features,
    128   // although the decoder can understand and interpret them.
    129 }
    130 
    131 inline bool VCDiffStreamingEncoderImpl::StartEncoding(
    132     OutputStringInterface* out) {
    133   if (!coder_.Init(engine_->dictionary_size())) {
    134     LOG(DFATAL) << "Internal error: "
    135                    "Initialization of code table writer failed" << LOG_ENDL;
    136     return false;
    137   }
    138   WriteHeader(out);
    139   encode_chunk_allowed_ = true;
    140   return true;
    141 }
    142 
    143 inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
    144     const char* data,
    145     size_t len,
    146     OutputStringInterface* out) {
    147   if (!encode_chunk_allowed_) {
    148     LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL;
    149     return false;
    150   }
    151   if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
    152     coder_.AddChecksum(ComputeAdler32(data, len));
    153   }
    154   engine_->Encode(data, len, look_for_target_matches_, out, &coder_);
    155   return true;
    156 }
    157 
    158 inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
    159     OutputStringInterface* /*out*/) {
    160   if (!encode_chunk_allowed_) {
    161     LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL;
    162     return false;
    163   }
    164   encode_chunk_allowed_ = false;
    165   // There should not be any need to output more data
    166   // since EncodeChunk() encodes a complete target window
    167   // and there is no end-of-delta-file marker.
    168   return true;
    169 }
    170 
    171 VCDiffStreamingEncoder::VCDiffStreamingEncoder(
    172     const HashedDictionary* dictionary,
    173     VCDiffFormatExtensionFlags format_extensions,
    174     bool look_for_target_matches)
    175     : impl_(new VCDiffStreamingEncoderImpl(dictionary,
    176                                            format_extensions,
    177                                            look_for_target_matches)) { }
    178 
    179 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
    180 
    181 bool VCDiffStreamingEncoder::StartEncodingToInterface(
    182     OutputStringInterface* out) {
    183   return impl_->StartEncoding(out);
    184 }
    185 
    186 bool VCDiffStreamingEncoder::EncodeChunkToInterface(
    187     const char* data,
    188     size_t len,
    189     OutputStringInterface* out) {
    190   return impl_->EncodeChunk(data, len, out);
    191 }
    192 
    193 bool VCDiffStreamingEncoder::FinishEncodingToInterface(
    194     OutputStringInterface* out) {
    195   return impl_->FinishEncoding(out);
    196 }
    197 
    198 void VCDiffStreamingEncoder::GetMatchCounts(
    199     std::vector<int>* match_counts) const {
    200   if (!match_counts) {
    201     LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL;
    202     return;
    203   }
    204   *match_counts = impl_->match_counts();
    205 }
    206 
    207 bool VCDiffEncoder::EncodeToInterface(const char* target_data,
    208                                       size_t target_len,
    209                                       OutputStringInterface* out) {
    210   out->clear();
    211   if (!encoder_) {
    212     if (!dictionary_.Init()) {
    213       LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL;
    214       return false;
    215     }
    216     encoder_ = new VCDiffStreamingEncoder(&dictionary_,
    217                                           flags_,
    218                                           look_for_target_matches_);
    219   }
    220   if (!encoder_->StartEncodingToInterface(out)) {
    221     return false;
    222   }
    223   if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
    224     return false;
    225   }
    226   return encoder_->FinishEncodingToInterface(out);
    227 }
    228 
    229 }  // namespace open_vcdiff
    230