Home | History | Annotate | Download | only in src
      1 // Copyright 2008 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef OPEN_VCDIFF_ENCODETABLE_H_
     17 #define OPEN_VCDIFF_ENCODETABLE_H_
     18 
     19 #include <config.h>
     20 #include <stddef.h>  // size_t
     21 #include <stdint.h>  // int32_t
     22 #include <string>
     23 #include "addrcache.h"
     24 #include "checksum.h"
     25 #include "codetable.h"
     26 #include "codetablewriter_interface.h"
     27 
     28 namespace open_vcdiff {
     29 
     30 class OutputStringInterface;
     31 class VCDiffInstructionMap;
     32 
     33 // The method calls after construction *must* conform
     34 // to the following pattern:
     35 //    {{Add|Copy|Run}* [AddChecksum] Output}*
     36 //
     37 // When Output has been called in this sequence, a complete target window
     38 // (as defined in RFC 3284 section 4.3) will have been appended to
     39 // out (unless no calls to Add, Run, or Copy were made, in which
     40 // case Output will do nothing.)  The output will not be available for use
     41 // until after each call to Output().
     42 //
     43 // NOT threadsafe.
     44 //
     45 class VCDiffCodeTableWriter : public CodeTableWriterInterface {
     46  public:
     47   // This constructor uses the default code table.
     48   // If interleaved is true, the encoder writes each delta file window
     49   // by interleaving instructions and sizes with their corresponding
     50   // addresses and data, rather than placing these elements into three
     51   // separate sections.  This facilitates providing partially
     52   // decoded results when only a portion of a delta file window
     53   // is received (e.g. when HTTP over TCP is used as the
     54   // transmission protocol.)  The interleaved format is
     55   // not consistent with the VCDIFF draft standard.
     56   //
     57   explicit VCDiffCodeTableWriter(bool interleaved);
     58 
     59   // Uses a non-standard code table and non-standard cache sizes.  The caller
     60   // must guarantee that code_table_data remains allocated for the lifetime of
     61   // the VCDiffCodeTableWriter object.  Note that this is different from how
     62   // VCDiffCodeTableReader::UseCodeTable works.  It is assumed that a given
     63   // encoder will use either the default code table or a statically-defined
     64   // non-standard code table, whereas the decoder must have the ability to read
     65   // an arbitrary non-standard code table from a delta file and discard it once
     66   // the file has been decoded.
     67   //
     68   VCDiffCodeTableWriter(bool interleaved,
     69                         int near_cache_size,
     70                         int same_cache_size,
     71                         const VCDiffCodeTableData& code_table_data,
     72                         unsigned char max_mode);
     73 
     74   virtual ~VCDiffCodeTableWriter();
     75 
     76   // Initializes the constructed object for use.
     77   // This method must be called after a VCDiffCodeTableWriter is constructed
     78   // and before any of its other methods can be called.  It will return
     79   // false if there was an error initializing the object, or true if it
     80   // was successful.  After the object has been initialized and used,
     81   // Init() can be called again to restore the initial state of the object.
     82   //
     83   virtual bool Init(size_t dictionary_size);
     84 
     85   // Write the header (as defined in section 4.1 of the RFC) to *out.
     86   // This includes information that can be gathered
     87   // before the first chunk of input is available.
     88   virtual void WriteHeader(OutputStringInterface* out,
     89                            VCDiffFormatExtensionFlags format_extensions);
     90 
     91   virtual size_t target_length() const { return target_length_; }
     92 
     93   // Encode an ADD opcode with the "size" bytes starting at data
     94   virtual void Add(const char* data, size_t size);
     95 
     96   // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes.
     97   virtual void Copy(int32_t offset, size_t size);
     98 
     99   // Encode a RUN opcode for "size" copies of the value "byte".
    100   virtual void Run(size_t size, unsigned char byte);
    101 
    102   virtual void AddChecksum(VCDChecksum checksum) {
    103     add_checksum_ = true;
    104     checksum_ = checksum;
    105   }
    106 
    107   // Appends the encoded delta window to the output
    108   // string.  The output string is not null-terminated and may contain embedded
    109   // '\0' characters.
    110   virtual void Output(OutputStringInterface* out);
    111 
    112   // There should not be any need to output more data
    113   // since EncodeChunk() encodes a complete target window
    114   // and there is no end-of-delta-file marker.
    115   virtual void FinishEncoding(OutputStringInterface* /*out*/) {}
    116 
    117  private:
    118   typedef std::string string;
    119 
    120   // The maximum value for the mode of a COPY instruction.
    121   const unsigned char max_mode_;
    122 
    123   // If interleaved is true, sets data_for_add_and_run_ and
    124   // addresses_for_copy_ to point at instructions_and_sizes_,
    125   // so that instructions, sizes, addresses and data will be
    126   // combined into a single interleaved stream.
    127   // If interleaved is false, sets data_for_add_and_run_ and
    128   // addresses_for_copy_ to point at their corresponding
    129   // separate_... strings, so that the three sections will
    130   // be generated separately from one another.
    131   //
    132   void InitSectionPointers(bool interleaved);
    133 
    134   // Determines the best opcode to encode an instruction, and appends
    135   // or substitutes that opcode and its size into the
    136   // instructions_and_sizes_ string.
    137   //
    138   void EncodeInstruction(VCDiffInstructionType inst,
    139                          size_t size,
    140                          unsigned char mode);
    141 
    142   void EncodeInstruction(VCDiffInstructionType inst, size_t size) {
    143     return EncodeInstruction(inst, size, 0);
    144   }
    145 
    146   // Calculates the number of bytes needed to store the given size value as a
    147   // variable-length integer (VarintBE).
    148   static size_t CalculateLengthOfSizeAsVarint(size_t size);
    149 
    150   // Appends the size value to the string as a variable-length integer.
    151   static void AppendSizeToString(size_t size, string* out);
    152 
    153   // Appends the size value to the output string as a variable-length integer.
    154   static void AppendSizeToOutputString(size_t size, OutputStringInterface* out);
    155 
    156   // Calculates the "Length of the delta encoding" field for the delta window
    157   // header, based on the sizes of the sections and of the other header
    158   // elements.
    159   size_t CalculateLengthOfTheDeltaEncoding() const;
    160 
    161   // None of the following 'string' objects are null-terminated.
    162 
    163   // A series of instruction opcodes, each of which may be followed
    164   // by one or two Varint values representing the size parameters
    165   // of the first and second instruction in the opcode.
    166   string instructions_and_sizes_;
    167 
    168   // A series of data arguments (byte values) used for ADD and RUN
    169   // instructions.  Depending on whether interleaved output is used
    170   // for streaming or not, the pointer may point to
    171   // separate_data_for_add_and_run_ or to instructions_and_sizes_.
    172   string *data_for_add_and_run_;
    173   string separate_data_for_add_and_run_;
    174 
    175   // A series of Varint addresses used for COPY instructions.
    176   // For the SAME mode, a byte value is stored instead of a Varint.
    177   // Depending on whether interleaved output is used
    178   // for streaming or not, the pointer may point to
    179   // separate_addresses_for_copy_ or to instructions_and_sizes_.
    180   string *addresses_for_copy_;
    181   string separate_addresses_for_copy_;
    182 
    183   VCDiffAddressCache address_cache_;
    184 
    185   size_t dictionary_size_;
    186 
    187   // The number of bytes of target data that has been encoded so far.
    188   // Each time Add(), Copy(), or Run() is called, this will be incremented.
    189   // The target length is used to compute HERE mode addresses
    190   // for COPY instructions, and is also written into the header
    191   // of the delta window when Output() is called.
    192   //
    193   size_t target_length_;
    194 
    195   const VCDiffCodeTableData* code_table_data_;
    196 
    197   // The instruction map facilitates finding an opcode quickly given an
    198   // instruction inst, size, and mode.  This is an alternate representation
    199   // of the same information that is found in code_table_data_.
    200   //
    201   const VCDiffInstructionMap* instruction_map_;
    202 
    203   // The zero-based index within instructions_and_sizes_ of the byte
    204   // that contains the last single-instruction opcode generated by
    205   // EncodeInstruction().  (See that function for exhaustive details.)
    206   // It is necessary to use an index rather than a pointer for this value
    207   // because instructions_and_sizes_ may be resized, which would invalidate
    208   // any pointers into its data buffer.  The value -1 is reserved to mean that
    209   // either no opcodes have been generated yet, or else the last opcode
    210   // generated was a double-instruction opcode.
    211   //
    212   int last_opcode_index_;
    213 
    214   // If true, an Adler32 checksum of the target window data will be written as
    215   // a variable-length integer, just after the size of the addresses section.
    216   //
    217   bool add_checksum_;
    218 
    219   // The checksum to be written to the current target window,
    220   // if add_checksum_ is true.
    221   // This will not be calculated based on the individual calls to Add(), Run(),
    222   // and Copy(), which would be unnecessarily expensive.  Instead, the code
    223   // that uses the VCDiffCodeTableWriter object is expected to calculate
    224   // the checksum all at once and to call AddChecksum() with that value.
    225   // Must be called sometime before calling Output(), though it can be called
    226   // either before or after the calls to Add(), Run(), and Copy().
    227   //
    228   VCDChecksum checksum_;
    229 
    230   // Making these private avoids implicit copy constructor & assignment operator
    231   VCDiffCodeTableWriter(const VCDiffCodeTableWriter&);  // NOLINT
    232   void operator=(const VCDiffCodeTableWriter&);
    233 };
    234 
    235 };  // namespace open_vcdiff
    236 
    237 #endif  // OPEN_VCDIFF_ENCODETABLE_H_
    238