Home | History | Annotate | Download | only in src
      1 // Copyright 2007 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 // Classes to implement an Encoder for the format described in
     17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
     18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
     19 //
     20 // The RFC describes the possibility of using a secondary compressor
     21 // to further reduce the size of each section of the VCDIFF output.
     22 // That feature is not supported in this implementation of the encoder
     23 // and decoder.
     24 // No secondary compressor types have been publicly registered with
     25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
     26 // in the more than five years since the registry was created, so there
     27 // is no standard set of compressor IDs which would be generated by other
     28 // encoders or accepted by other decoders.
     29 
     30 #include <config.h>
     31 #include <memory>  // auto_ptr
     32 #include "checksum.h"
     33 #include "encodetable.h"
     34 #include "google/output_string.h"
     35 #include "google/vcencoder.h"
     36 #include "jsonwriter.h"
     37 #include "logging.h"
     38 #include "vcdiffengine.h"
     39 
     40 namespace open_vcdiff {
     41 
     42 HashedDictionary::HashedDictionary(const char* dictionary_contents,
     43                                    size_t dictionary_size)
     44     : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
     45 
     46 HashedDictionary::~HashedDictionary() { delete engine_; }
     47 
     48 bool HashedDictionary::Init() {
     49   return const_cast<VCDiffEngine*>(engine_)->Init();
     50 }
     51 
     52 class VCDiffStreamingEncoderImpl {
     53  public:
     54   VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
     55                              VCDiffFormatExtensionFlags format_extensions,
     56                              bool look_for_target_matches);
     57 
     58   // These functions are identical to their counterparts
     59   // in VCDiffStreamingEncoder.
     60   bool StartEncoding(OutputStringInterface* out);
     61 
     62   bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
     63 
     64   bool FinishEncoding(OutputStringInterface* out);
     65 
     66  private:
     67   const VCDiffEngine* engine_;
     68 
     69   std::auto_ptr<CodeTableWriterInterface> coder_;
     70 
     71   const VCDiffFormatExtensionFlags format_extensions_;
     72 
     73   // Determines whether to look for matches within the previously encoded
     74   // target data, or just within the source (dictionary) data.  Please see
     75   // vcencoder.h for a full explanation of this parameter.
     76   const bool look_for_target_matches_;
     77 
     78   // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
     79   // and FinishEncoding() are called in the correct order.  It will be true
     80   // if StartEncoding() has been called, followed by zero or more calls to
     81   // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
     82   // be false initially, and also after FinishEncoding() has been called.
     83   bool encode_chunk_allowed_;
     84 
     85   // Making these private avoids implicit copy constructor & assignment operator
     86   VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
     87   void operator=(const VCDiffStreamingEncoderImpl&);
     88 };
     89 
     90 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
     91     const HashedDictionary* dictionary,
     92     VCDiffFormatExtensionFlags format_extensions,
     93     bool look_for_target_matches)
     94     : engine_(dictionary->engine()),
     95       format_extensions_(format_extensions),
     96       look_for_target_matches_(look_for_target_matches),
     97       encode_chunk_allowed_(false) {
     98   if (format_extensions & VCD_FORMAT_JSON) {
     99     coder_.reset(new JSONCodeTableWriter());
    100   } else {
    101     // This implementation of the encoder uses the default
    102     // code table.  A VCDiffCodeTableWriter could also be constructed
    103     // using a custom code table.
    104     coder_.reset(new VCDiffCodeTableWriter(
    105         (format_extensions & VCD_FORMAT_INTERLEAVED) != 0));
    106   }
    107 }
    108 
    109 inline bool VCDiffStreamingEncoderImpl::StartEncoding(
    110     OutputStringInterface* out) {
    111   if (!coder_->Init(engine_->dictionary_size())) {
    112     VCD_DFATAL << "Internal error: "
    113                   "Initialization of code table writer failed" << VCD_ENDL;
    114     return false;
    115   }
    116   coder_->WriteHeader(out, format_extensions_);
    117   encode_chunk_allowed_ = true;
    118   return true;
    119 }
    120 
    121 inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
    122     const char* data,
    123     size_t len,
    124     OutputStringInterface* out) {
    125   if (!encode_chunk_allowed_) {
    126     VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL;
    127     return false;
    128   }
    129   if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
    130     coder_->AddChecksum(ComputeAdler32(data, len));
    131   }
    132   engine_->Encode(data, len, look_for_target_matches_, out, coder_.get());
    133   return true;
    134 }
    135 
    136 inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
    137     OutputStringInterface* out) {
    138   if (!encode_chunk_allowed_) {
    139     VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL;
    140     return false;
    141   }
    142   encode_chunk_allowed_ = false;
    143   coder_->FinishEncoding(out);
    144   return true;
    145 }
    146 
    147 VCDiffStreamingEncoder::VCDiffStreamingEncoder(
    148     const HashedDictionary* dictionary,
    149     VCDiffFormatExtensionFlags format_extensions,
    150     bool look_for_target_matches)
    151     : impl_(new VCDiffStreamingEncoderImpl(dictionary,
    152                                            format_extensions,
    153                                            look_for_target_matches)) { }
    154 
    155 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
    156 
    157 bool VCDiffStreamingEncoder::StartEncodingToInterface(
    158     OutputStringInterface* out) {
    159   return impl_->StartEncoding(out);
    160 }
    161 
    162 bool VCDiffStreamingEncoder::EncodeChunkToInterface(
    163     const char* data,
    164     size_t len,
    165     OutputStringInterface* out) {
    166   return impl_->EncodeChunk(data, len, out);
    167 }
    168 
    169 bool VCDiffStreamingEncoder::FinishEncodingToInterface(
    170     OutputStringInterface* out) {
    171   return impl_->FinishEncoding(out);
    172 }
    173 
    174 bool VCDiffEncoder::EncodeToInterface(const char* target_data,
    175                                       size_t target_len,
    176                                       OutputStringInterface* out) {
    177   out->clear();
    178   if (!encoder_) {
    179     if (!dictionary_.Init()) {
    180       VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL;
    181       return false;
    182     }
    183     encoder_ = new VCDiffStreamingEncoder(&dictionary_,
    184                                           flags_,
    185                                           look_for_target_matches_);
    186   }
    187   if (!encoder_->StartEncodingToInterface(out)) {
    188     return false;
    189   }
    190   if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
    191     return false;
    192   }
    193   return encoder_->FinishEncodingToInterface(out);
    194 }
    195 
    196 }  // namespace open_vcdiff
    197