1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #ifndef OPEN_VCDIFF_ENCODETABLE_H_ 17 #define OPEN_VCDIFF_ENCODETABLE_H_ 18 19 #include <config.h> 20 #include <stddef.h> // size_t 21 #include <stdint.h> // int32_t 22 #include <string> 23 #include "addrcache.h" 24 #include "checksum.h" 25 #include "codetable.h" 26 #include "codetablewriter_interface.h" 27 28 namespace open_vcdiff { 29 30 class OutputStringInterface; 31 class VCDiffInstructionMap; 32 33 // The method calls after construction *must* conform 34 // to the following pattern: 35 // {{Add|Copy|Run}* [AddChecksum] Output}* 36 // 37 // When Output has been called in this sequence, a complete target window 38 // (as defined in RFC 3284 section 4.3) will have been appended to 39 // out (unless no calls to Add, Run, or Copy were made, in which 40 // case Output will do nothing.) The output will not be available for use 41 // until after each call to Output(). 42 // 43 // NOT threadsafe. 44 // 45 class VCDiffCodeTableWriter : public CodeTableWriterInterface { 46 public: 47 // This constructor uses the default code table. 48 // If interleaved is true, the encoder writes each delta file window 49 // by interleaving instructions and sizes with their corresponding 50 // addresses and data, rather than placing these elements into three 51 // separate sections. This facilitates providing partially 52 // decoded results when only a portion of a delta file window 53 // is received (e.g. when HTTP over TCP is used as the 54 // transmission protocol.) The interleaved format is 55 // not consistent with the VCDIFF draft standard. 56 // 57 explicit VCDiffCodeTableWriter(bool interleaved); 58 59 // Uses a non-standard code table and non-standard cache sizes. The caller 60 // must guarantee that code_table_data remains allocated for the lifetime of 61 // the VCDiffCodeTableWriter object. Note that this is different from how 62 // VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given 63 // encoder will use either the default code table or a statically-defined 64 // non-standard code table, whereas the decoder must have the ability to read 65 // an arbitrary non-standard code table from a delta file and discard it once 66 // the file has been decoded. 67 // 68 VCDiffCodeTableWriter(bool interleaved, 69 int near_cache_size, 70 int same_cache_size, 71 const VCDiffCodeTableData& code_table_data, 72 unsigned char max_mode); 73 74 virtual ~VCDiffCodeTableWriter(); 75 76 // Initializes the constructed object for use. 77 // This method must be called after a VCDiffCodeTableWriter is constructed 78 // and before any of its other methods can be called. It will return 79 // false if there was an error initializing the object, or true if it 80 // was successful. After the object has been initialized and used, 81 // Init() can be called again to restore the initial state of the object. 82 // 83 virtual bool Init(size_t dictionary_size); 84 85 // Write the header (as defined in section 4.1 of the RFC) to *out. 86 // This includes information that can be gathered 87 // before the first chunk of input is available. 88 virtual void WriteHeader(OutputStringInterface* out, 89 VCDiffFormatExtensionFlags format_extensions); 90 91 virtual size_t target_length() const { return target_length_; } 92 93 // Encode an ADD opcode with the "size" bytes starting at data 94 virtual void Add(const char* data, size_t size); 95 96 // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes. 97 virtual void Copy(int32_t offset, size_t size); 98 99 // Encode a RUN opcode for "size" copies of the value "byte". 100 virtual void Run(size_t size, unsigned char byte); 101 102 virtual void AddChecksum(VCDChecksum checksum) { 103 add_checksum_ = true; 104 checksum_ = checksum; 105 } 106 107 // Appends the encoded delta window to the output 108 // string. The output string is not null-terminated and may contain embedded 109 // '\0' characters. 110 virtual void Output(OutputStringInterface* out); 111 112 // There should not be any need to output more data 113 // since EncodeChunk() encodes a complete target window 114 // and there is no end-of-delta-file marker. 115 virtual void FinishEncoding(OutputStringInterface* /*out*/) {} 116 117 private: 118 typedef std::string string; 119 120 // The maximum value for the mode of a COPY instruction. 121 const unsigned char max_mode_; 122 123 // If interleaved is true, sets data_for_add_and_run_ and 124 // addresses_for_copy_ to point at instructions_and_sizes_, 125 // so that instructions, sizes, addresses and data will be 126 // combined into a single interleaved stream. 127 // If interleaved is false, sets data_for_add_and_run_ and 128 // addresses_for_copy_ to point at their corresponding 129 // separate_... strings, so that the three sections will 130 // be generated separately from one another. 131 // 132 void InitSectionPointers(bool interleaved); 133 134 // Determines the best opcode to encode an instruction, and appends 135 // or substitutes that opcode and its size into the 136 // instructions_and_sizes_ string. 137 // 138 void EncodeInstruction(VCDiffInstructionType inst, 139 size_t size, 140 unsigned char mode); 141 142 void EncodeInstruction(VCDiffInstructionType inst, size_t size) { 143 return EncodeInstruction(inst, size, 0); 144 } 145 146 // Calculates the number of bytes needed to store the given size value as a 147 // variable-length integer (VarintBE). 148 static size_t CalculateLengthOfSizeAsVarint(size_t size); 149 150 // Appends the size value to the string as a variable-length integer. 151 static void AppendSizeToString(size_t size, string* out); 152 153 // Appends the size value to the output string as a variable-length integer. 154 static void AppendSizeToOutputString(size_t size, OutputStringInterface* out); 155 156 // Calculates the "Length of the delta encoding" field for the delta window 157 // header, based on the sizes of the sections and of the other header 158 // elements. 159 size_t CalculateLengthOfTheDeltaEncoding() const; 160 161 // None of the following 'string' objects are null-terminated. 162 163 // A series of instruction opcodes, each of which may be followed 164 // by one or two Varint values representing the size parameters 165 // of the first and second instruction in the opcode. 166 string instructions_and_sizes_; 167 168 // A series of data arguments (byte values) used for ADD and RUN 169 // instructions. Depending on whether interleaved output is used 170 // for streaming or not, the pointer may point to 171 // separate_data_for_add_and_run_ or to instructions_and_sizes_. 172 string *data_for_add_and_run_; 173 string separate_data_for_add_and_run_; 174 175 // A series of Varint addresses used for COPY instructions. 176 // For the SAME mode, a byte value is stored instead of a Varint. 177 // Depending on whether interleaved output is used 178 // for streaming or not, the pointer may point to 179 // separate_addresses_for_copy_ or to instructions_and_sizes_. 180 string *addresses_for_copy_; 181 string separate_addresses_for_copy_; 182 183 VCDiffAddressCache address_cache_; 184 185 size_t dictionary_size_; 186 187 // The number of bytes of target data that has been encoded so far. 188 // Each time Add(), Copy(), or Run() is called, this will be incremented. 189 // The target length is used to compute HERE mode addresses 190 // for COPY instructions, and is also written into the header 191 // of the delta window when Output() is called. 192 // 193 size_t target_length_; 194 195 const VCDiffCodeTableData* code_table_data_; 196 197 // The instruction map facilitates finding an opcode quickly given an 198 // instruction inst, size, and mode. This is an alternate representation 199 // of the same information that is found in code_table_data_. 200 // 201 const VCDiffInstructionMap* instruction_map_; 202 203 // The zero-based index within instructions_and_sizes_ of the byte 204 // that contains the last single-instruction opcode generated by 205 // EncodeInstruction(). (See that function for exhaustive details.) 206 // It is necessary to use an index rather than a pointer for this value 207 // because instructions_and_sizes_ may be resized, which would invalidate 208 // any pointers into its data buffer. The value -1 is reserved to mean that 209 // either no opcodes have been generated yet, or else the last opcode 210 // generated was a double-instruction opcode. 211 // 212 int last_opcode_index_; 213 214 // If true, an Adler32 checksum of the target window data will be written as 215 // a variable-length integer, just after the size of the addresses section. 216 // 217 bool add_checksum_; 218 219 // The checksum to be written to the current target window, 220 // if add_checksum_ is true. 221 // This will not be calculated based on the individual calls to Add(), Run(), 222 // and Copy(), which would be unnecessarily expensive. Instead, the code 223 // that uses the VCDiffCodeTableWriter object is expected to calculate 224 // the checksum all at once and to call AddChecksum() with that value. 225 // Must be called sometime before calling Output(), though it can be called 226 // either before or after the calls to Add(), Run(), and Copy(). 227 // 228 VCDChecksum checksum_; 229 230 // Making these private avoids implicit copy constructor & assignment operator 231 VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT 232 void operator=(const VCDiffCodeTableWriter&); 233 }; 234 235 }; // namespace open_vcdiff 236 237 #endif // OPEN_VCDIFF_ENCODETABLE_H_ 238