Home | History | Annotate | Download | only in src
      1 // Copyright 2006 Google Inc.
      2 // Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef OPEN_VCDIFF_VCDIFFENGINE_H_
     17 #define OPEN_VCDIFF_VCDIFFENGINE_H_
     18 
     19 #include <config.h>
     20 #include <stddef.h>  // size_t
     21 #include <stdint.h>  // uint32_t
     22 
     23 namespace open_vcdiff {
     24 
     25 class BlockHash;
     26 class OutputStringInterface;
     27 class CodeTableWriterInterface;
     28 
     29 // The VCDiffEngine class is used to find the optimal encoding (in terms of COPY
     30 // and ADD instructions) for a given dictionary and target window.  To write the
     31 // instructions for this encoding, it calls the Copy() and Add() methods of the
     32 // code table writer object which is passed as an argument to Encode().
     33 class VCDiffEngine {
     34  public:
     35   // The minimum size of a string match that is worth putting into a COPY
     36   // instruction.  Since this value is more than twice the block size, the
     37   // encoder will always discover a match of this size, no matter whether it is
     38   // aligned on block boundaries in the dictionary text.
     39   static const size_t kMinimumMatchSize = 32;
     40 
     41   VCDiffEngine(const char* dictionary, size_t dictionary_size);
     42 
     43   ~VCDiffEngine();
     44 
     45   // Initializes the object before use.
     46   // This method must be called after constructing a VCDiffEngine object,
     47   // and before any other method may be called.  It should not be called
     48   // twice on the same object.
     49   // Returns true if initialization succeeded, or false if an error occurred,
     50   // in which case no other method except the destructor may then be used
     51   // on the object.
     52   // The Init() method is the only one allowed to treat hashed_dictionary_
     53   // as non-const.
     54   bool Init();
     55 
     56   size_t dictionary_size() const { return dictionary_size_; }
     57 
     58   // Main worker function.  Finds the best matches between the dictionary
     59   // (source) and target data, and uses the coder to write a
     60   // delta file window into *diff.
     61   // Because it is a const function, many threads
     62   // can call Encode() at once for the same VCDiffEngine object.
     63   // All thread-specific data will be stored in the coder and diff arguments.
     64   // The coder object must have been fully initialized (by calling its Init()
     65   // method, if any) before calling this function.
     66   //
     67   // look_for_target_matches determines whether to look for matches
     68   // within the previously encoded target data, or just within the source
     69   // (dictionary) data.  Please see vcencoder.h for a full explanation
     70   // of this parameter.
     71   void Encode(const char* target_data,
     72               size_t target_size,
     73               bool look_for_target_matches,
     74               OutputStringInterface* diff,
     75               CodeTableWriterInterface* coder) const;
     76 
     77  private:
     78   static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) {
     79     return size >= kMinimumMatchSize;
     80   }
     81 
     82   // The following two functions use templates to produce two different
     83   // versions of the code depending on the value of the option
     84   // look_for_target_matches.  This approach saves a test-and-branch instruction
     85   // within the inner loop of EncodeCopyForBestMatch.
     86   template<bool look_for_target_matches>
     87   void EncodeInternal(const char* target_data,
     88                       size_t target_size,
     89                       OutputStringInterface* diff,
     90                       CodeTableWriterInterface* coder) const;
     91 
     92   // If look_for_target_matches is true, then target_hash must point to a valid
     93   // BlockHash object, and cannot be NULL.  If look_for_target_matches is
     94   // false, then the value of target_hash is ignored.
     95   template<bool look_for_target_matches>
     96   size_t EncodeCopyForBestMatch(uint32_t hash_value,
     97                                 const char* target_candidate_start,
     98                                 const char* unencoded_target_start,
     99                                 size_t unencoded_target_size,
    100                                 const BlockHash* target_hash,
    101                                 CodeTableWriterInterface* coder) const;
    102 
    103   void AddUnmatchedRemainder(const char* unencoded_target_start,
    104                              size_t unencoded_target_size,
    105                              CodeTableWriterInterface* coder) const;
    106 
    107   void FinishEncoding(size_t target_size,
    108                       OutputStringInterface* diff,
    109                       CodeTableWriterInterface* coder) const;
    110 
    111   const char* dictionary_;  // A copy of the dictionary contents
    112 
    113   const size_t dictionary_size_;
    114 
    115   // A hash that contains one element for every kBlockSize bytes of dictionary_.
    116   // This can be reused to encode many different target strings using the
    117   // same dictionary, without the need to compute the hash values each time.
    118   const BlockHash* hashed_dictionary_;
    119 
    120   // Making these private avoids implicit copy constructor & assignment operator
    121   VCDiffEngine(const VCDiffEngine&);
    122   void operator=(const VCDiffEngine&);
    123 };
    124 
    125 }  // namespace open_vcdiff
    126 
    127 #endif  // OPEN_VCDIFF_VCDIFFENGINE_H_
    128