Home | History | Annotate | Download | only in src
      1 // Copyright 2008 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 // Implements a Decoder for the format described in
     17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
     18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
     19 //
     20 // The RFC describes the possibility of using a secondary compressor
     21 // to further reduce the size of each section of the VCDIFF output.
     22 // That feature is not supported in this implementation of the encoder
     23 // and decoder.
     24 // No secondary compressor types have been publicly registered with
     25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
     26 // in the more than five years since the registry was created, so there
     27 // is no standard set of compressor IDs which would be generated by other
     28 // encoders or accepted by other decoders.
     29 
     30 #include <config.h>
     31 #include "google/vcdecoder.h"
     32 #include <stddef.h>  // size_t, ptrdiff_t
     33 #include <stdint.h>  // int32_t
     34 #include <string.h>  // memcpy, memset
     35 #include <memory>  // auto_ptr
     36 #include <string>
     37 #include "addrcache.h"
     38 #include "checksum.h"
     39 #include "codetable.h"
     40 #include "decodetable.h"
     41 #include "headerparser.h"
     42 #include "logging.h"
     43 #include "google/output_string.h"
     44 #include "varint_bigendian.h"
     45 #include "vcdiff_defs.h"
     46 
     47 namespace open_vcdiff {
     48 
     49 // This class is used to parse delta file windows as described
     50 // in RFC sections 4.2 and 4.3.  Its methods are not thread-safe.
     51 //
     52 // Here is the window format copied from the RFC:
     53 //
     54 // Window1
     55 //     Win_Indicator                            - byte
     56 //     [Source segment size]                    - integer
     57 //     [Source segment position]                - integer
     58 //     The delta encoding of the target window
     59 //         Length of the delta encoding         - integer
     60 //         The delta encoding
     61 //             Size of the target window        - integer
     62 //             Delta_Indicator                  - byte
     63 //             Length of data for ADDs and RUNs - integer
     64 //             Length of instructions and sizes - integer
     65 //             Length of addresses for COPYs    - integer
     66 //             Data section for ADDs and RUNs   - array of bytes
     67 //             Instructions and sizes section   - array of bytes
     68 //             Addresses section for COPYs      - array of bytes
     69 // Window2
     70 // ...
     71 //
     72 // Sample usage:
     73 //
     74 // VCDiffDeltaFileWindow delta_window_;
     75 // delta_window_.Init(parent);
     76 // ParseableChunk parseable_chunk(input_buffer,
     77 //                                input_size,
     78 //                                leftover_unencoded_bytes);
     79 // while (!parseable_chunk.Empty()) {
     80 //   switch (delta_window_.DecodeWindow(&parseable_chunk)) {
     81 //     case RESULT_END_OF_DATA:
     82 //       <Read more input and retry DecodeWindow later.>
     83 //     case RESULT_ERROR:
     84 //       <Handle error case.  An error log message has already been generated.>
     85 //   }
     86 // }
     87 //
     88 // DecodeWindow consumes only a single window, and needs to be placed within
     89 // a loop if multiple windows are to be processed.
     90 //
     91 class VCDiffDeltaFileWindow {
     92  public:
     93   VCDiffDeltaFileWindow();
     94   ~VCDiffDeltaFileWindow();
     95 
     96   // Init() should be called immediately after constructing the
     97   // VCDiffDeltaFileWindow().  It must be called before DecodeWindow() can be
     98   // invoked, or an error will occur.
     99   void Init(VCDiffStreamingDecoderImpl* parent);
    100 
    101   // Resets the pointers to the data sections in the current window.
    102   void Reset();
    103 
    104   bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
    105                     unsigned char max_mode) {
    106     return reader_.UseCodeTable(code_table_data, max_mode);
    107   }
    108 
    109   // Decodes a single delta window using the input data from *parseable_chunk.
    110   // Appends the decoded target window to parent_->decoded_target().  Returns
    111   // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if
    112   // the end of input was reached before the entire window could be decoded and
    113   // more input is expected (only possible if IsInterleaved() is true), or
    114   // RESULT_ERROR if an error occurred during decoding.  In the RESULT_ERROR
    115   // case, the value of parseable_chunk->pointer_ is undefined; otherwise,
    116   // parseable_chunk->Advance() is called to point to the input data position
    117   // just after the data that has been decoded.
    118   //
    119   VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk);
    120 
    121   bool FoundWindowHeader() const {
    122     return found_header_;
    123   }
    124 
    125   bool MoreDataExpected() const {
    126     // When parsing an interleaved-format delta file,
    127     // every time DecodeBody() exits, interleaved_bytes_expected_
    128     // will be decremented by the number of bytes parsed.  If it
    129     // reaches zero, then there is no more data expected because
    130     // the size of the interleaved section (given in the window
    131     // header) has been reached.
    132     return IsInterleaved() && (interleaved_bytes_expected_ > 0);
    133   }
    134 
    135   size_t target_window_start_pos() const { return target_window_start_pos_; }
    136 
    137   void set_target_window_start_pos(size_t new_start_pos) {
    138     target_window_start_pos_ = new_start_pos;
    139   }
    140 
    141   // Returns the number of bytes remaining to be decoded in the target window.
    142   // If not in the process of decoding a window, returns 0.
    143   size_t TargetBytesRemaining();
    144 
    145  private:
    146   // Reads the header of the window section as described in RFC sections 4.2 and
    147   // 4.3, up to and including the value "Length of addresses for COPYs".  If the
    148   // entire header is found, this function sets up the DeltaWindowSections
    149   // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
    150   // that the decoder can begin decoding the opcodes in these sections.  Returns
    151   // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
    152   // available data was reached before the entire header could be read.  (The
    153   // latter may be an error condition if there is no more data available.)
    154   // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
    155   // parsed header.
    156   //
    157   VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
    158 
    159   // After the window header has been parsed as far as the Delta_Indicator,
    160   // this function is called to parse the following delta window header fields:
    161   //
    162   //     Length of data for ADDs and RUNs - integer (VarintBE format)
    163   //     Length of instructions and sizes - integer (VarintBE format)
    164   //     Length of addresses for COPYs    - integer (VarintBE format)
    165   //
    166   // If has_checksum_ is true, it also looks for the following element:
    167   //
    168   //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
    169   //
    170   // It sets up the DeltaWindowSections instructions_and_sizes_,
    171   // data_for_add_and_run_, and addresses_for_copy_.  If the interleaved format
    172   // is being used, all three sections will include the entire window body; if
    173   // the standard format is used, three non-overlapping window sections will be
    174   // defined.  Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
    175   // if standard format is being used and there is not enough input data to read
    176   // the entire window body.  Otherwise, returns RESULT_SUCCESS.
    177   VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
    178 
    179   // Decodes the body of the window section as described in RFC sections 4.3,
    180   // including the sections "Data section for ADDs and RUNs", "Instructions
    181   // and sizes section", and "Addresses section for COPYs".  These sections
    182   // must already have been set up by ReadWindowHeader().  Returns a
    183   // non-negative value on success, or RESULT_END_OF_DATA if the end of input
    184   // was reached before the entire window could be decoded (only possible if
    185   // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
    186   // decoding.  Appends as much of the decoded target window as possible to
    187   // parent->decoded_target().
    188   //
    189   int DecodeBody(ParseableChunk* parseable_chunk);
    190 
    191   // Returns the number of bytes already decoded into the target window.
    192   size_t TargetBytesDecoded();
    193 
    194   // Decodes a single ADD instruction, updating parent_->decoded_target_.
    195   VCDiffResult DecodeAdd(size_t size);
    196 
    197   // Decodes a single RUN instruction, updating parent_->decoded_target_.
    198   VCDiffResult DecodeRun(size_t size);
    199 
    200   // Decodes a single COPY instruction, updating parent_->decoded_target_.
    201   VCDiffResult DecodeCopy(size_t size, unsigned char mode);
    202 
    203   // When using the interleaved format, this function is called both on parsing
    204   // the header and on resuming after a RESULT_END_OF_DATA was returned from a
    205   // previous call to DecodeBody().  It sets up all three section pointers to
    206   // reference the same interleaved stream of instructions, sizes, addresses,
    207   // and data.  These pointers must be reset every time that work resumes on a
    208   // delta window,  because the input data string may have been changed or
    209   // resized since DecodeBody() last returned.
    210   void UpdateInterleavedSectionPointers(const char* data_pos,
    211                                         const char* data_end) {
    212     const ptrdiff_t available_data = data_end - data_pos;
    213     // Don't read past the end of currently-available data
    214     if (available_data > interleaved_bytes_expected_) {
    215       instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
    216     } else {
    217       instructions_and_sizes_.Init(data_pos, available_data);
    218     }
    219     data_for_add_and_run_.Init(&instructions_and_sizes_);
    220     addresses_for_copy_.Init(&instructions_and_sizes_);
    221   }
    222 
    223   // If true, the interleaved format described in AllowInterleaved() is used
    224   // for the current delta file.  Only valid after ReadWindowHeader() has been
    225   // called and returned a positive number (i.e., the whole header was parsed),
    226   // but before the window has finished decoding.
    227   //
    228   bool IsInterleaved() const {
    229     // If the sections are interleaved, both addresses_for_copy_ and
    230     // data_for_add_and_run_ should point at instructions_and_sizes_.
    231     return !addresses_for_copy_.IsOwned();
    232   }
    233 
    234   // Executes a single COPY or ADD instruction, appending data to
    235   // parent_->decoded_target().
    236   void CopyBytes(const char* data, size_t size);
    237 
    238   // Executes a single RUN instruction, appending data to
    239   // parent_->decoded_target().
    240   void RunByte(unsigned char byte, size_t size);
    241 
    242   // Advance *parseable_chunk to point to the current position in the
    243   // instructions/sizes section.  If interleaved format is used, then
    244   // decrement the number of expected bytes in the instructions/sizes section
    245   // by the number of instruction/size bytes parsed.
    246   void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
    247 
    248   // The parent object which was passed to Init().
    249   VCDiffStreamingDecoderImpl* parent_;
    250 
    251   // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
    252   // has been called and succeeded in parsing the delta window header, but the
    253   // entire window has not yet been decoded.
    254   bool found_header_;
    255 
    256   // Contents and length of the current source window.  source_segment_ptr_
    257   // will be non-NULL if (a) the window section header for the current window
    258   // has been read, but the window has not yet finished decoding; or
    259   // (b) the window did not specify a source segment.
    260   const char* source_segment_ptr_;
    261   size_t source_segment_length_;
    262 
    263   // The delta encoding window sections as defined in RFC section 4.3.
    264   // The pointer for each section will be incremented as data is consumed and
    265   // decoded from that section.  If the interleaved format is used,
    266   // data_for_add_and_run_ and addresses_for_copy_ will both point to
    267   // instructions_and_sizes_; otherwise, they will be separate data sections.
    268   //
    269   DeltaWindowSection instructions_and_sizes_;
    270   DeltaWindowSection data_for_add_and_run_;
    271   DeltaWindowSection addresses_for_copy_;
    272 
    273   // The expected bytes left to decode in instructions_and_sizes_.  Only used
    274   // for the interleaved format.
    275   int interleaved_bytes_expected_;
    276 
    277   // The expected length of the target window once it has been decoded.
    278   size_t target_window_length_;
    279 
    280   // The index in decoded_target at which the first byte of the current
    281   // target window was/will be written.
    282   size_t target_window_start_pos_;
    283 
    284   // If has_checksum_ is true, then expected_checksum_ contains an Adler32
    285   // checksum of the target window data.  This is an extension included in the
    286   // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
    287   bool has_checksum_;
    288   VCDChecksum expected_checksum_;
    289 
    290   VCDiffCodeTableReader reader_;
    291 
    292   // Making these private avoids implicit copy constructor & assignment operator
    293   VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&);  // NOLINT
    294   void operator=(const VCDiffDeltaFileWindow&);
    295 };
    296 
    297 // *** Inline methods for VCDiffDeltaFileWindow
    298 
    299 inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
    300   Reset();
    301 }
    302 
    303 inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
    304 
    305 inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
    306   parent_ = parent;
    307 }
    308 
    309 class VCDiffStreamingDecoderImpl {
    310  public:
    311   typedef std::string string;
    312 
    313   // The default maximum target file size (and target window size) if
    314   // SetMaximumTargetFileSize() is not called.
    315   static const size_t kDefaultMaximumTargetFileSize = 67108864U;  // 64 MB
    316 
    317   // The largest value that can be passed to SetMaximumTargetWindowSize().
    318   // Using a larger value will result in an error.
    319   static const size_t kTargetSizeLimit = 2147483647U;  // INT32_MAX
    320 
    321   // A constant that is the default value for planned_target_file_size_,
    322   // indicating that the decoder does not have an expected length
    323   // for the target data.
    324   static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
    325 
    326   VCDiffStreamingDecoderImpl();
    327   ~VCDiffStreamingDecoderImpl();
    328 
    329   // Resets all member variables to their initial states.
    330   void Reset();
    331 
    332   // These functions are identical to their counterparts
    333   // in VCDiffStreamingDecoder.
    334   //
    335   void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
    336 
    337   bool DecodeChunk(const char* data,
    338                    size_t len,
    339                    OutputStringInterface* output_string);
    340 
    341   bool FinishDecoding();
    342 
    343   // If true, the version of VCDIFF used in the current delta file allows
    344   // for the interleaved format, in which instructions, addresses and data
    345   // are all sent interleaved in the instructions section of each window
    346   // rather than being sent in separate sections.  This is not part of
    347   // the VCDIFF draft standard, so we've defined a special version code
    348   // 'S' which implies that this feature is available.  Even if interleaving
    349   // is supported, it is not mandatory; interleaved format will be implied
    350   // if the address and data sections are both zero-length.
    351   //
    352   bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
    353 
    354   // If true, the version of VCDIFF used in the current delta file allows
    355   // each delta window to contain an Adler32 checksum of the target window data.
    356   // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
    357   // this checksum will appear as a variable-length integer, just after the
    358   // "length of addresses for COPYs" value and before the window data sections.
    359   // It is possible for some windows in a delta file to use the checksum feature
    360   // and for others not to use it (and leave the flag bit set to 0.)
    361   // Just as with AllowInterleaved(), this extension is not part of the draft
    362   // standard and is only available when the version code 'S' is specified.
    363   //
    364   bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
    365 
    366   bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
    367     maximum_target_file_size_ = new_maximum_target_file_size;
    368     return true;
    369   }
    370 
    371   bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
    372     if (new_maximum_target_window_size > kTargetSizeLimit) {
    373       VCD_ERROR << "Specified maximum target window size "
    374                 << new_maximum_target_window_size << " exceeds limit of "
    375                 << kTargetSizeLimit << " bytes" << VCD_ENDL;
    376       return false;
    377     }
    378     maximum_target_window_size_ = new_maximum_target_window_size;
    379     return true;
    380   }
    381 
    382   // See description of planned_target_file_size_, below.
    383   bool HasPlannedTargetFileSize() const {
    384     return planned_target_file_size_ != kUnlimitedBytes;
    385   }
    386 
    387   void SetPlannedTargetFileSize(size_t planned_target_file_size) {
    388     planned_target_file_size_ = planned_target_file_size;
    389   }
    390 
    391   void AddToTotalTargetWindowSize(size_t window_size) {
    392     total_of_target_window_sizes_ += window_size;
    393   }
    394 
    395   // Checks to see whether the decoded target data has reached its planned size.
    396   bool ReachedPlannedTargetFileSize() const {
    397     if (!HasPlannedTargetFileSize()) {
    398       return false;
    399     }
    400     // The planned target file size should not have been exceeded.
    401     // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
    402     // each target window would not make the target file exceed that limit, and
    403     // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
    404     // exceeds the advertised target window size.
    405     if (total_of_target_window_sizes_ > planned_target_file_size_) {
    406       VCD_DFATAL << "Internal error: Decoded data size "
    407                  << total_of_target_window_sizes_
    408                  << " exceeds planned target file size "
    409                  << planned_target_file_size_ << VCD_ENDL;
    410       return true;
    411     }
    412     return total_of_target_window_sizes_ == planned_target_file_size_;
    413   }
    414 
    415   // Checks to see whether adding a new target window of the specified size
    416   // would exceed the planned target file size, the maximum target file size,
    417   // or the maximum target window size.  If so, logs an error and returns true;
    418   // otherwise, returns false.
    419   bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
    420 
    421   // Returns the amount of input data passed to the last DecodeChunk()
    422   // that was not consumed by the decoder.  This is essential if
    423   // SetPlannedTargetFileSize() is being used, in order to preserve the
    424   // remaining input data stream once the planned target file has been decoded.
    425   size_t GetUnconsumedDataSize() const {
    426     return unparsed_bytes_.size();
    427   }
    428 
    429   // This function will return true if the decoder has parsed a complete delta
    430   // file header plus zero or more delta file windows, with no data left over.
    431   // It will also return true if no delta data at all was decoded.  If these
    432   // conditions are not met, then FinishDecoding() should not be called.
    433   bool IsDecodingComplete() const {
    434     if (!FoundFileHeader()) {
    435       // No complete delta file header has been parsed yet.  DecodeChunk()
    436       // may have received some data that it hasn't yet parsed, in which case
    437       // decoding is incomplete.
    438       return unparsed_bytes_.empty();
    439     } else if (custom_code_table_decoder_.get()) {
    440       // The decoder is in the middle of parsing a custom code table.
    441       return false;
    442     } else if (delta_window_.FoundWindowHeader()) {
    443       // The decoder is in the middle of parsing an interleaved format delta
    444       // window.
    445       return false;
    446     } else if (ReachedPlannedTargetFileSize()) {
    447       // The decoder found exactly the planned number of bytes.  In this case
    448       // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
    449       // data after the end of the delta file.
    450       return true;
    451     } else {
    452       // No complete delta file window has been parsed yet.  DecodeChunk()
    453       // may have received some data that it hasn't yet parsed, in which case
    454       // decoding is incomplete.
    455       return unparsed_bytes_.empty();
    456     }
    457   }
    458 
    459   const char* dictionary_ptr() const { return dictionary_ptr_; }
    460 
    461   size_t dictionary_size() const { return dictionary_size_; }
    462 
    463   VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
    464 
    465   string* decoded_target() { return &decoded_target_; }
    466 
    467   bool allow_vcd_target() const { return allow_vcd_target_; }
    468 
    469   void SetAllowVcdTarget(bool allow_vcd_target) {
    470     if (start_decoding_was_called_) {
    471       VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()"
    472                  << VCD_ENDL;
    473       return;
    474     }
    475     allow_vcd_target_ = allow_vcd_target;
    476   }
    477 
    478  private:
    479   // Reads the VCDiff delta file header section as described in RFC section 4.1,
    480   // except the custom code table data.  Returns RESULT_ERROR if an error
    481   // occurred, or RESULT_END_OF_DATA if the end of available data was reached
    482   // before the entire header could be read.  (The latter may be an error
    483   // condition if there is no more data available.)  Otherwise, advances
    484   // data->position_ past the header and returns RESULT_SUCCESS.
    485   //
    486   VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
    487 
    488   // Indicates whether or not the header has already been read.
    489   bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
    490 
    491   // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
    492   // file header, this function parses the custom cache sizes and initializes
    493   // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
    494   // custom code table in ReadCustomCodeTable().  Returns RESULT_ERROR if an
    495   // error occurred, or RESULT_END_OF_DATA if the end of available data was
    496   // reached before the custom cache sizes could be read.  Otherwise, returns
    497   // the number of bytes read.
    498   //
    499   int InitCustomCodeTable(const char* data_start, const char* data_end);
    500 
    501   // If a custom code table was specified in the header section that was parsed
    502   // by ReadDeltaFileHeader(), this function makes a recursive call to another
    503   // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
    504   // custom code table is expected to be supplied as an embedded VCDIFF
    505   // encoding that uses the standard code table.  Returns RESULT_ERROR if an
    506   // error occurs, or RESULT_END_OF_DATA if the end of available data was
    507   // reached before the entire custom code table could be read.  Otherwise,
    508   // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
    509   // custom code table.  If the function returns RESULT_SUCCESS or
    510   // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
    511   //
    512   VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
    513 
    514   // Called after the decoder exhausts all input data.  This function
    515   // copies from decoded_target_ into output_string all the data that
    516   // has not yet been output.  It sets decoded_target_output_position_
    517   // to mark the start of the next data that needs to be output.
    518   void AppendNewOutputText(OutputStringInterface* output_string);
    519 
    520   // Appends to output_string the portion of decoded_target_ that has
    521   // not yet been output, then clears decoded_target_.  This function is
    522   // called after each complete target window has been decoded if
    523   // allow_vcd_target is false.  In that case, there is no need to retain
    524   // target data from any window except the current window.
    525   void FlushDecodedTarget(OutputStringInterface* output_string);
    526 
    527   // Contents and length of the source (dictionary) data.
    528   const char* dictionary_ptr_;
    529   size_t dictionary_size_;
    530 
    531   // This string will be used to store any unparsed bytes left over when
    532   // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
    533   // It will also be used to concatenate those unparsed bytes with the data
    534   // supplied to the next call to DecodeChunk(), so that they appear in
    535   // contiguous memory.
    536   string unparsed_bytes_;
    537 
    538   // The portion of the target file that has been decoded so far.  This will be
    539   // used to fill the output string for DecodeChunk(), and will also be used to
    540   // execute COPY instructions that reference target data.  Since the source
    541   // window can come from a range of addresses in the previously decoded target
    542   // data, the entire target file needs to be available to the decoder, not just
    543   // the current target window.
    544   string decoded_target_;
    545 
    546   // The VCDIFF version byte (also known as "header4") from the
    547   // delta file header.
    548   unsigned char vcdiff_version_code_;
    549 
    550   VCDiffDeltaFileWindow delta_window_;
    551 
    552   std::auto_ptr<VCDiffAddressCache> addr_cache_;
    553 
    554   // Will be NULL unless a custom code table has been defined.
    555   std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
    556 
    557   // Used to receive the decoded custom code table.
    558   string custom_code_table_string_;
    559 
    560   // If a custom code table is specified, it will be expressed
    561   // as an embedded VCDIFF delta file which uses the default code table
    562   // as the source file (dictionary).  Use a child decoder object
    563   // to decode that delta file.
    564   std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
    565 
    566   // If set, then the decoder is expecting *exactly* this number of
    567   // target bytes to be decoded from one or more delta file windows.
    568   // If this number is exceeded while decoding a window, but was not met
    569   // before starting on that window, an error will be reported.
    570   // If FinishDecoding() is called before this number is met, an error
    571   // will also be reported.  This feature is used for decoding the
    572   // embedded code table data within a VCDIFF delta file; we want to
    573   // stop processing the embedded data once the entire code table has
    574   // been decoded, and treat the rest of the available data as part
    575   // of the enclosing delta file.
    576   size_t planned_target_file_size_;
    577 
    578   size_t maximum_target_file_size_;
    579 
    580   size_t maximum_target_window_size_;
    581 
    582   // Contains the sum of the decoded sizes of all target windows seen so far,
    583   // including the expected total size of the current target window in progress
    584   // (even if some of the current target window has not yet been decoded.)
    585   size_t total_of_target_window_sizes_;
    586 
    587   // Contains the byte position within decoded_target_ of the first data that
    588   // has not yet been output by AppendNewOutputText().
    589   size_t decoded_target_output_position_;
    590 
    591   // This value is used to ensure the correct order of calls to the interface
    592   // functions, i.e., a single call to StartDecoding(), followed by zero or
    593   // more calls to DecodeChunk(), followed by a single call to
    594   // FinishDecoding().
    595   bool start_decoding_was_called_;
    596 
    597   // If this value is true then the VCD_TARGET flag can be specified to allow
    598   // the source segment to be chosen from the previously-decoded target data.
    599   // (This is the default behavior.)  If it is false, then specifying the
    600   // VCD_TARGET flag is considered an error, and the decoder does not need to
    601   // keep in memory any decoded target data prior to the current window.
    602   bool allow_vcd_target_;
    603 
    604   // Making these private avoids implicit copy constructor & assignment operator
    605   VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&);  // NOLINT
    606   void operator=(const VCDiffStreamingDecoderImpl&);
    607 };
    608 
    609 // *** Methods for VCDiffStreamingDecoderImpl
    610 
    611 const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
    612 const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
    613 
    614 VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
    615     : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
    616       maximum_target_window_size_(kDefaultMaximumTargetFileSize),
    617       allow_vcd_target_(true) {
    618   delta_window_.Init(this);
    619   Reset();
    620 }
    621 
    622 // Reset() will delete the component objects without reallocating them.
    623 VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
    624 
    625 void VCDiffStreamingDecoderImpl::Reset() {
    626   start_decoding_was_called_ = false;
    627   dictionary_ptr_ = NULL;
    628   dictionary_size_ = 0;
    629   vcdiff_version_code_ = '\0';
    630   planned_target_file_size_ = kUnlimitedBytes;
    631   total_of_target_window_sizes_ = 0;
    632   addr_cache_.reset();
    633   custom_code_table_.reset();
    634   custom_code_table_decoder_.reset();
    635   delta_window_.Reset();
    636   decoded_target_output_position_ = 0;
    637 }
    638 
    639 void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
    640                                                size_t dictionary_size) {
    641   if (start_decoding_was_called_) {
    642     VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()"
    643                << VCD_ENDL;
    644     return;
    645   }
    646   unparsed_bytes_.clear();
    647   decoded_target_.clear();  // delta_window_.Reset() depends on this
    648   Reset();
    649   dictionary_ptr_ = dictionary_ptr;
    650   dictionary_size_ = dictionary_size;
    651   start_decoding_was_called_ = true;
    652 }
    653 
    654 // Reads the VCDiff delta file header section as described in RFC section 4.1:
    655 //
    656 //     Header1                                  - byte = 0xD6 (ASCII 'V' | 0x80)
    657 //     Header2                                  - byte = 0xC3 (ASCII 'C' | 0x80)
    658 //     Header3                                  - byte = 0xC4 (ASCII 'D' | 0x80)
    659 //     Header4                                  - byte
    660 //     Hdr_Indicator                            - byte
    661 //     [Secondary compressor ID]                - byte
    662 //     [Length of code table data]              - integer
    663 //     [Code table data]
    664 //
    665 // Initializes the code table and address cache objects.  Returns RESULT_ERROR
    666 // if an error occurred, and RESULT_END_OF_DATA if the end of available data was
    667 // reached before the entire header could be read.  (The latter may be an error
    668 // condition if there is no more data available.)  Otherwise, returns
    669 // RESULT_SUCCESS, and removes the header bytes from the data string.
    670 //
    671 // It's relatively inefficient to expect this function to parse any number of
    672 // input bytes available, down to 1 byte, but it is necessary in case the input
    673 // is not a properly formatted VCDIFF delta file.  If the entire input consists
    674 // of two bytes "12", then we should recognize that it does not match the
    675 // initial VCDIFF magic number "VCD" and report an error, rather than waiting
    676 // indefinitely for more input that will never arrive.
    677 //
    678 VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
    679     ParseableChunk* data) {
    680   if (FoundFileHeader()) {
    681     return RESULT_SUCCESS;
    682   }
    683   size_t data_size = data->UnparsedSize();
    684   const DeltaFileHeader* header =
    685       reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
    686   bool wrong_magic_number = false;
    687   switch (data_size) {
    688     // Verify only the bytes that are available.
    689     default:
    690       // Found header contents up to and including VCDIFF version
    691       vcdiff_version_code_ = header->header4;
    692       if ((vcdiff_version_code_ != 0x00) &&  // Draft standard VCDIFF (RFC 3284)
    693           (vcdiff_version_code_ != 'S')) {   // Enhancements for SDCH protocol
    694         VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL;
    695         return RESULT_ERROR;
    696       }
    697       // fall through
    698     case 3:
    699       if (header->header3 != 0xC4) {  // magic value 'D' | 0x80
    700         wrong_magic_number = true;
    701       }
    702       // fall through
    703     case 2:
    704       if (header->header2 != 0xC3) {  // magic value 'C' | 0x80
    705         wrong_magic_number = true;
    706       }
    707       // fall through
    708     case 1:
    709       if (header->header1 != 0xD6) {  // magic value 'V' | 0x80
    710         wrong_magic_number = true;
    711       }
    712       // fall through
    713     case 0:
    714       if (wrong_magic_number) {
    715         VCD_ERROR << "Did not find VCDIFF header bytes; "
    716                       "input is not a VCDIFF delta file" << VCD_ENDL;
    717         return RESULT_ERROR;
    718       }
    719       if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
    720   }
    721   // Secondary compressor not supported.
    722   if (header->hdr_indicator & VCD_DECOMPRESS) {
    723     VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL;
    724     return RESULT_ERROR;
    725   }
    726   if (header->hdr_indicator & VCD_CODETABLE) {
    727     int bytes_parsed = InitCustomCodeTable(
    728         data->UnparsedData() + sizeof(DeltaFileHeader),
    729         data->End());
    730     switch (bytes_parsed) {
    731       case RESULT_ERROR:
    732         return RESULT_ERROR;
    733       case RESULT_END_OF_DATA:
    734         return RESULT_END_OF_DATA;
    735       default:
    736         data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
    737     }
    738   } else {
    739     addr_cache_.reset(new VCDiffAddressCache);
    740     // addr_cache_->Init() will be called
    741     // from VCDiffStreamingDecoderImpl::DecodeChunk()
    742     data->Advance(sizeof(DeltaFileHeader));
    743   }
    744   return RESULT_SUCCESS;
    745 }
    746 
    747 int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
    748                                                     const char* data_end) {
    749   // A custom code table is being specified.  Parse the variable-length
    750   // cache sizes and begin parsing the encoded custom code table.
    751   int32_t near_cache_size = 0, same_cache_size = 0;
    752   VCDiffHeaderParser header_parser(data_start, data_end);
    753   if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
    754     return header_parser.GetResult();
    755   }
    756   if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
    757     return header_parser.GetResult();
    758   }
    759   custom_code_table_.reset(new struct VCDiffCodeTableData);
    760   memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
    761   custom_code_table_string_.clear();
    762   addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
    763   // addr_cache_->Init() will be called
    764   // from VCDiffStreamingDecoderImpl::DecodeChunk()
    765 
    766   // If we reach this point (the start of the custom code table)
    767   // without encountering a RESULT_END_OF_DATA condition, then we won't call
    768   // ReadDeltaFileHeader() again for this delta file.
    769   //
    770   // Instantiate a recursive decoder to interpret the custom code table
    771   // as a VCDIFF encoding of the default code table.
    772   custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
    773   custom_code_table_decoder_->StartDecoding(
    774       reinterpret_cast<const char*>(
    775           &VCDiffCodeTableData::kDefaultCodeTableData),
    776       sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
    777   custom_code_table_decoder_->SetPlannedTargetFileSize(
    778       sizeof(*custom_code_table_));
    779   return static_cast<int>(header_parser.ParsedSize());
    780 }
    781 
    782 VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
    783     ParseableChunk* data) {
    784   if (!custom_code_table_decoder_.get()) {
    785     return RESULT_SUCCESS;
    786   }
    787   if (!custom_code_table_.get()) {
    788     VCD_DFATAL << "Internal error:  custom_code_table_decoder_ is set,"
    789                   " but custom_code_table_ is NULL" << VCD_ENDL;
    790     return RESULT_ERROR;
    791   }
    792   OutputString<string> output_string(&custom_code_table_string_);
    793   if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
    794                                                data->UnparsedSize(),
    795                                                &output_string)) {
    796     return RESULT_ERROR;
    797   }
    798   if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
    799     // Skip over the consumed data.
    800     data->Finish();
    801     return RESULT_END_OF_DATA;
    802   }
    803   if (!custom_code_table_decoder_->FinishDecoding()) {
    804     return RESULT_ERROR;
    805   }
    806   if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
    807     VCD_DFATAL << "Decoded custom code table size ("
    808                << custom_code_table_string_.length()
    809                << ") does not match size of a code table ("
    810                << sizeof(*custom_code_table_) << ")" << VCD_ENDL;
    811     return RESULT_ERROR;
    812   }
    813   memcpy(custom_code_table_.get(),
    814          custom_code_table_string_.data(),
    815          sizeof(*custom_code_table_));
    816   custom_code_table_string_.clear();
    817   // Skip over the consumed data.
    818   data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
    819   custom_code_table_decoder_.reset();
    820   delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
    821   return RESULT_SUCCESS;
    822 }
    823 
    824 void VCDiffStreamingDecoderImpl::FlushDecodedTarget(
    825     OutputStringInterface* output_string) {
    826   output_string->append(
    827       decoded_target_.data() + decoded_target_output_position_,
    828       decoded_target_.size() - decoded_target_output_position_);
    829   decoded_target_.clear();
    830   delta_window_.set_target_window_start_pos(0);
    831   decoded_target_output_position_ = 0;
    832 }
    833 
    834 void VCDiffStreamingDecoderImpl::AppendNewOutputText(
    835     OutputStringInterface* output_string) {
    836   const size_t bytes_decoded_this_chunk =
    837       decoded_target_.size() - decoded_target_output_position_;
    838   if (bytes_decoded_this_chunk > 0) {
    839     size_t target_bytes_remaining = delta_window_.TargetBytesRemaining();
    840     if (target_bytes_remaining > 0) {
    841       // The decoder is midway through decoding a target window.  Resize
    842       // output_string to match the expected length.  The interface guarantees
    843       // not to resize output_string more than once per target window decoded.
    844       output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
    845                                             + target_bytes_remaining);
    846     }
    847     output_string->append(
    848         decoded_target_.data() + decoded_target_output_position_,
    849         bytes_decoded_this_chunk);
    850     decoded_target_output_position_ = decoded_target_.size();
    851   }
    852 }
    853 
    854 bool VCDiffStreamingDecoderImpl::DecodeChunk(
    855     const char* data,
    856     size_t len,
    857     OutputStringInterface* output_string) {
    858   if (!start_decoding_was_called_) {
    859     VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL;
    860     Reset();
    861     return false;
    862   }
    863   ParseableChunk parseable_chunk(data, len);
    864   if (!unparsed_bytes_.empty()) {
    865     unparsed_bytes_.append(data, len);
    866     parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
    867                                   unparsed_bytes_.size());
    868   }
    869   VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
    870   if (RESULT_SUCCESS == result) {
    871     result = ReadCustomCodeTable(&parseable_chunk);
    872   }
    873   if (RESULT_SUCCESS == result) {
    874     while (!parseable_chunk.Empty()) {
    875       result = delta_window_.DecodeWindow(&parseable_chunk);
    876       if (RESULT_SUCCESS != result) {
    877         break;
    878       }
    879       if (ReachedPlannedTargetFileSize()) {
    880         // Found exactly the length we expected.  Stop decoding.
    881         break;
    882       }
    883       if (!allow_vcd_target()) {
    884         // VCD_TARGET will never be used to reference target data before the
    885         // start of the current window, so flush and clear the contents of
    886         // decoded_target_.
    887         FlushDecodedTarget(output_string);
    888       }
    889     }
    890   }
    891   if (RESULT_ERROR == result) {
    892     Reset();  // Don't allow further DecodeChunk calls
    893     return false;
    894   }
    895   unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
    896                          parseable_chunk.UnparsedSize());
    897   AppendNewOutputText(output_string);
    898   return true;
    899 }
    900 
    901 // Finishes decoding after all data has been received.  Returns true
    902 // if decoding of the entire stream was successful.
    903 bool VCDiffStreamingDecoderImpl::FinishDecoding() {
    904   bool success = true;
    905   if (!start_decoding_was_called_) {
    906     VCD_WARNING << "FinishDecoding() called before StartDecoding(),"
    907                    " or called after DecodeChunk() returned false"
    908                 << VCD_ENDL;
    909     success = false;
    910   } else if (!IsDecodingComplete()) {
    911     VCD_ERROR << "FinishDecoding() called before parsing entire"
    912                  " delta file window" << VCD_ENDL;
    913     success = false;
    914   }
    915   // Reset the object state for the next decode operation
    916   Reset();
    917   return success;
    918 }
    919 
    920 bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
    921     size_t window_size) const {
    922   if (window_size > maximum_target_window_size_) {
    923     VCD_ERROR << "Length of target window (" << window_size
    924               << ") exceeds limit of " << maximum_target_window_size_
    925               << " bytes" << VCD_ENDL;
    926     return true;
    927   }
    928   if (HasPlannedTargetFileSize()) {
    929     // The logical expression to check would be:
    930     //
    931     //   total_of_target_window_sizes_ + window_size > planned_target_file_size_
    932     //
    933     // but the addition might cause an integer overflow if target_bytes_to_add
    934     // is very large.  So it is better to check target_bytes_to_add against
    935     // the remaining planned target bytes.
    936     size_t remaining_planned_target_file_size =
    937         planned_target_file_size_ - total_of_target_window_sizes_;
    938     if (window_size > remaining_planned_target_file_size) {
    939       VCD_ERROR << "Length of target window (" << window_size
    940                 << " bytes) plus previous windows ("
    941                 << total_of_target_window_sizes_
    942                 << " bytes) would exceed planned size of "
    943                 << planned_target_file_size_ << " bytes" << VCD_ENDL;
    944       return true;
    945     }
    946   }
    947   size_t remaining_maximum_target_bytes =
    948       maximum_target_file_size_ - total_of_target_window_sizes_;
    949   if (window_size > remaining_maximum_target_bytes) {
    950     VCD_ERROR << "Length of target window (" << window_size
    951               << " bytes) plus previous windows ("
    952               << total_of_target_window_sizes_
    953               << " bytes) would exceed maximum target file size of "
    954               << maximum_target_file_size_ << " bytes" << VCD_ENDL;
    955     return true;
    956   }
    957   return false;
    958 }
    959 
    960 // *** Methods for VCDiffDeltaFileWindow
    961 
    962 void VCDiffDeltaFileWindow::Reset() {
    963   found_header_ = false;
    964 
    965   // Mark the start of the current target window.
    966   target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
    967   target_window_length_ = 0;
    968 
    969   source_segment_ptr_ = NULL;
    970   source_segment_length_ = 0;
    971 
    972   instructions_and_sizes_.Invalidate();
    973   data_for_add_and_run_.Invalidate();
    974   addresses_for_copy_.Invalidate();
    975 
    976   interleaved_bytes_expected_ = 0;
    977 
    978   has_checksum_ = false;
    979   expected_checksum_ = 0;
    980 }
    981 
    982 VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
    983     VCDiffHeaderParser* header_parser) {
    984   size_t add_and_run_data_length = 0;
    985   size_t instructions_and_sizes_length = 0;
    986   size_t addresses_length = 0;
    987   if (!header_parser->ParseSectionLengths(has_checksum_,
    988                                           &add_and_run_data_length,
    989                                           &instructions_and_sizes_length,
    990                                           &addresses_length,
    991                                           &expected_checksum_)) {
    992     return header_parser->GetResult();
    993   }
    994   if (parent_->AllowInterleaved() &&
    995       (add_and_run_data_length == 0) &&
    996       (addresses_length == 0)) {
    997     // The interleaved format is being used.
    998     interleaved_bytes_expected_ =
    999         static_cast<int>(instructions_and_sizes_length);
   1000     UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
   1001                                      header_parser->End());
   1002   } else {
   1003     // If interleaved format is not used, then the whole window contents
   1004     // must be available before decoding can begin.  If only part of
   1005     // the current window is available, then report end of data
   1006     // and re-parse the whole header when DecodeChunk() is called again.
   1007     if (header_parser->UnparsedSize() < (add_and_run_data_length +
   1008                                          instructions_and_sizes_length +
   1009                                          addresses_length)) {
   1010       return RESULT_END_OF_DATA;
   1011     }
   1012     data_for_add_and_run_.Init(header_parser->UnparsedData(),
   1013                                add_and_run_data_length);
   1014     instructions_and_sizes_.Init(data_for_add_and_run_.End(),
   1015                                  instructions_and_sizes_length);
   1016     addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
   1017     if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
   1018       VCD_ERROR << "The end of the instructions section "
   1019                    "does not match the end of the delta window" << VCD_ENDL;
   1020       return RESULT_ERROR;
   1021     }
   1022   }
   1023   reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
   1024                instructions_and_sizes_.End());
   1025   return RESULT_SUCCESS;
   1026 }
   1027 
   1028 // Here are the elements of the delta window header to be parsed,
   1029 // from section 4 of the RFC:
   1030 //
   1031 //     Window1
   1032 //         Win_Indicator                            - byte
   1033 //         [Source segment size]                    - integer
   1034 //         [Source segment position]                - integer
   1035 //         The delta encoding of the target window
   1036 //             Length of the delta encoding         - integer
   1037 //             The delta encoding
   1038 //                 Size of the target window        - integer
   1039 //                 Delta_Indicator                  - byte
   1040 //                 Length of data for ADDs and RUNs - integer
   1041 //                 Length of instructions and sizes - integer
   1042 //                 Length of addresses for COPYs    - integer
   1043 //                 Data section for ADDs and RUNs   - array of bytes
   1044 //                 Instructions and sizes section   - array of bytes
   1045 //                 Addresses section for COPYs      - array of bytes
   1046 //
   1047 VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
   1048     ParseableChunk* parseable_chunk) {
   1049   std::string* decoded_target = parent_->decoded_target();
   1050   VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
   1051                                    parseable_chunk->End());
   1052   size_t source_segment_position = 0;
   1053   unsigned char win_indicator = 0;
   1054   if (!header_parser.ParseWinIndicatorAndSourceSegment(
   1055           parent_->dictionary_size(),
   1056           decoded_target->size(),
   1057           parent_->allow_vcd_target(),
   1058           &win_indicator,
   1059           &source_segment_length_,
   1060           &source_segment_position)) {
   1061     return header_parser.GetResult();
   1062   }
   1063   has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
   1064   if (!header_parser.ParseWindowLengths(&target_window_length_)) {
   1065     return header_parser.GetResult();
   1066   }
   1067   if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
   1068     // An error has been logged by TargetWindowWouldExceedSizeLimits().
   1069     return RESULT_ERROR;
   1070   }
   1071   header_parser.ParseDeltaIndicator();
   1072   VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
   1073   if (RESULT_SUCCESS != setup_return_code) {
   1074     return setup_return_code;
   1075   }
   1076   // Reserve enough space in the output string for the current target window.
   1077   const size_t wanted_capacity =
   1078       target_window_start_pos_ + target_window_length_;
   1079   if (decoded_target->capacity() < wanted_capacity) {
   1080     decoded_target->reserve(wanted_capacity);
   1081   }
   1082   // Get a pointer to the start of the source segment.
   1083   if (win_indicator & VCD_SOURCE) {
   1084     source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
   1085   } else if (win_indicator & VCD_TARGET) {
   1086     // This assignment must happen after the reserve().
   1087     // decoded_target should not be resized again while processing this window,
   1088     // so source_segment_ptr_ should remain valid.
   1089     source_segment_ptr_ = decoded_target->data() + source_segment_position;
   1090   }
   1091   // The whole window header was found and parsed successfully.
   1092   found_header_ = true;
   1093   parseable_chunk->Advance(header_parser.ParsedSize());
   1094   parent_->AddToTotalTargetWindowSize(target_window_length_);
   1095   return RESULT_SUCCESS;
   1096 }
   1097 
   1098 void VCDiffDeltaFileWindow::UpdateInstructionPointer(
   1099     ParseableChunk* parseable_chunk) {
   1100   if (IsInterleaved()) {
   1101     size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
   1102     // Reduce expected instruction segment length by bytes parsed
   1103     interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
   1104     parseable_chunk->Advance(bytes_parsed);
   1105   }
   1106 }
   1107 
   1108 inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
   1109   return parent_->decoded_target()->size() - target_window_start_pos_;
   1110 }
   1111 
   1112 size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
   1113   if (target_window_length_ == 0) {
   1114     // There is no window being decoded at present
   1115     return 0;
   1116   } else {
   1117     return target_window_length_ - TargetBytesDecoded();
   1118   }
   1119 }
   1120 
   1121 inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
   1122   parent_->decoded_target()->append(data, size);
   1123 }
   1124 
   1125 inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
   1126   parent_->decoded_target()->append(size, byte);
   1127 }
   1128 
   1129 VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
   1130   if (size > data_for_add_and_run_.UnparsedSize()) {
   1131     return RESULT_END_OF_DATA;
   1132   }
   1133   // Write the next "size" data bytes
   1134   CopyBytes(data_for_add_and_run_.UnparsedData(), size);
   1135   data_for_add_and_run_.Advance(size);
   1136   return RESULT_SUCCESS;
   1137 }
   1138 
   1139 VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
   1140   if (data_for_add_and_run_.Empty()) {
   1141     return RESULT_END_OF_DATA;
   1142   }
   1143   // Write "size" copies of the next data byte
   1144   RunByte(*data_for_add_and_run_.UnparsedData(), size);
   1145   data_for_add_and_run_.Advance(1);
   1146   return RESULT_SUCCESS;
   1147 }
   1148 
   1149 VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
   1150                                                unsigned char mode) {
   1151   // Keep track of the number of target bytes decoded as a local variable
   1152   // to avoid recalculating it each time it is needed.
   1153   size_t target_bytes_decoded = TargetBytesDecoded();
   1154   const VCDAddress here_address =
   1155       static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
   1156   const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
   1157       here_address,
   1158       mode,
   1159       addresses_for_copy_.UnparsedDataAddr(),
   1160       addresses_for_copy_.End());
   1161   switch (decoded_address) {
   1162     case RESULT_ERROR:
   1163       VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL;
   1164       return RESULT_ERROR;
   1165     case RESULT_END_OF_DATA:
   1166       return RESULT_END_OF_DATA;
   1167     default:
   1168       if ((decoded_address < 0) || (decoded_address > here_address)) {
   1169         VCD_DFATAL << "Internal error: unexpected address " << decoded_address
   1170                    << " returned from DecodeAddress, with here_address = "
   1171                    << here_address << VCD_ENDL;
   1172         return RESULT_ERROR;
   1173       }
   1174       break;
   1175   }
   1176   size_t address = static_cast<size_t>(decoded_address);
   1177   if ((address + size) <= source_segment_length_) {
   1178     // Copy all data from source segment
   1179     CopyBytes(&source_segment_ptr_[address], size);
   1180     return RESULT_SUCCESS;
   1181   }
   1182   // Copy some data from target window...
   1183   if (address < source_segment_length_) {
   1184     // ... plus some data from source segment
   1185     const size_t partial_copy_size = source_segment_length_ - address;
   1186     CopyBytes(&source_segment_ptr_[address], partial_copy_size);
   1187     target_bytes_decoded += partial_copy_size;
   1188     address += partial_copy_size;
   1189     size -= partial_copy_size;
   1190   }
   1191   address -= source_segment_length_;
   1192   // address is now based at start of target window
   1193   const char* const target_segment_ptr = parent_->decoded_target()->data() +
   1194                                          target_window_start_pos_;
   1195   while (size > (target_bytes_decoded - address)) {
   1196     // Recursive copy that extends into the yet-to-be-copied target data
   1197     const size_t partial_copy_size = target_bytes_decoded - address;
   1198     CopyBytes(&target_segment_ptr[address], partial_copy_size);
   1199     target_bytes_decoded += partial_copy_size;
   1200     address += partial_copy_size;
   1201     size -= partial_copy_size;
   1202   }
   1203   CopyBytes(&target_segment_ptr[address], size);
   1204   return RESULT_SUCCESS;
   1205 }
   1206 
   1207 int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
   1208   if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
   1209                               != parseable_chunk->UnparsedData())) {
   1210     VCD_DFATAL << "Internal error: interleaved format is used, but the"
   1211                   " input pointer does not point to the instructions section"
   1212                << VCD_ENDL;
   1213     return RESULT_ERROR;
   1214   }
   1215   while (TargetBytesDecoded() < target_window_length_) {
   1216     int32_t decoded_size = VCD_INSTRUCTION_ERROR;
   1217     unsigned char mode = 0;
   1218     VCDiffInstructionType instruction =
   1219         reader_.GetNextInstruction(&decoded_size, &mode);
   1220     switch (instruction) {
   1221       case VCD_INSTRUCTION_END_OF_DATA:
   1222         UpdateInstructionPointer(parseable_chunk);
   1223         return RESULT_END_OF_DATA;
   1224       case VCD_INSTRUCTION_ERROR:
   1225         return RESULT_ERROR;
   1226       default:
   1227         break;
   1228     }
   1229     const size_t size = static_cast<size_t>(decoded_size);
   1230     // The value of "size" itself could be enormous (say, INT32_MAX)
   1231     // so check it individually against the limit to protect against
   1232     // overflow when adding it to something else.
   1233     if ((size > target_window_length_) ||
   1234         ((size + TargetBytesDecoded()) > target_window_length_)) {
   1235       VCD_ERROR << VCDiffInstructionName(instruction)
   1236                 << " with size " << size
   1237                 << " plus existing " << TargetBytesDecoded()
   1238                 << " bytes of target data exceeds length of target"
   1239                    " window (" << target_window_length_ << " bytes)"
   1240                 << VCD_ENDL;
   1241       return RESULT_ERROR;
   1242     }
   1243     VCDiffResult result = RESULT_SUCCESS;
   1244     switch (instruction) {
   1245       case VCD_ADD:
   1246         result = DecodeAdd(size);
   1247         break;
   1248       case VCD_RUN:
   1249         result = DecodeRun(size);
   1250         break;
   1251       case VCD_COPY:
   1252         result = DecodeCopy(size, mode);
   1253         break;
   1254       default:
   1255         VCD_DFATAL << "Unexpected instruction type " << instruction
   1256                    << "in opcode stream" << VCD_ENDL;
   1257         return RESULT_ERROR;
   1258     }
   1259     switch (result) {
   1260       case RESULT_END_OF_DATA:
   1261         reader_.UnGetInstruction();
   1262         UpdateInstructionPointer(parseable_chunk);
   1263         return RESULT_END_OF_DATA;
   1264       case RESULT_ERROR:
   1265         return RESULT_ERROR;
   1266       case RESULT_SUCCESS:
   1267         break;
   1268     }
   1269   }
   1270   if (TargetBytesDecoded() != target_window_length_) {
   1271     VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded()
   1272               << " bytes) does not match expected size ("
   1273               << target_window_length_ << " bytes)" << VCD_ENDL;
   1274     return RESULT_ERROR;
   1275   }
   1276   const char* const target_window_start =
   1277       parent_->decoded_target()->data() + target_window_start_pos_;
   1278   if (has_checksum_ &&
   1279       (ComputeAdler32(target_window_start, target_window_length_)
   1280            != expected_checksum_)) {
   1281     VCD_ERROR << "Target data does not match checksum; this could mean "
   1282                  "that the wrong dictionary was used" << VCD_ENDL;
   1283     return RESULT_ERROR;
   1284   }
   1285   if (!instructions_and_sizes_.Empty()) {
   1286     VCD_ERROR << "Excess instructions and sizes left over "
   1287                  "after decoding target window" << VCD_ENDL;
   1288       return RESULT_ERROR;
   1289   }
   1290   if (!IsInterleaved()) {
   1291     // Standard format is being used, with three separate sections for the
   1292     // instructions, data, and addresses.
   1293     if (!data_for_add_and_run_.Empty()) {
   1294       VCD_ERROR << "Excess ADD/RUN data left over "
   1295                    "after decoding target window" << VCD_ENDL;
   1296         return RESULT_ERROR;
   1297     }
   1298     if (!addresses_for_copy_.Empty()) {
   1299       VCD_ERROR << "Excess COPY addresses left over "
   1300                    "after decoding target window" << VCD_ENDL;
   1301         return RESULT_ERROR;
   1302     }
   1303     // Reached the end of the window.  Update the ParseableChunk to point to the
   1304     // end of the addresses section, which is the last section in the window.
   1305     parseable_chunk->SetPosition(addresses_for_copy_.End());
   1306   } else {
   1307     // Interleaved format is being used.
   1308     UpdateInstructionPointer(parseable_chunk);
   1309   }
   1310   return RESULT_SUCCESS;
   1311 }
   1312 
   1313 VCDiffResult VCDiffDeltaFileWindow::DecodeWindow(
   1314     ParseableChunk* parseable_chunk) {
   1315   if (!parent_) {
   1316     VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() "
   1317                   "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL;
   1318     return RESULT_ERROR;
   1319   }
   1320   if (!found_header_) {
   1321     switch (ReadHeader(parseable_chunk)) {
   1322       case RESULT_END_OF_DATA:
   1323         return RESULT_END_OF_DATA;
   1324       case RESULT_ERROR:
   1325         return RESULT_ERROR;
   1326       default:
   1327         // Reset address cache between windows (RFC section 5.1)
   1328         if (!parent_->addr_cache()->Init()) {
   1329           VCD_DFATAL << "Error initializing address cache" << VCD_ENDL;
   1330           return RESULT_ERROR;
   1331         }
   1332     }
   1333   } else {
   1334     // We are resuming a window that was partially decoded before a
   1335     // RESULT_END_OF_DATA was returned.  This can only happen on the first
   1336     // loop iteration, and only if the interleaved format is enabled and used.
   1337     if (!IsInterleaved()) {
   1338       VCD_DFATAL << "Internal error: Resumed decoding of a delta file window"
   1339                     " when interleaved format is not being used" << VCD_ENDL;
   1340       return RESULT_ERROR;
   1341     }
   1342     UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
   1343                                      parseable_chunk->End());
   1344     reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
   1345                            instructions_and_sizes_.End());
   1346   }
   1347   switch (DecodeBody(parseable_chunk)) {
   1348     case RESULT_END_OF_DATA:
   1349       if (MoreDataExpected()) {
   1350         return RESULT_END_OF_DATA;
   1351       } else {
   1352         VCD_ERROR << "End of data reached while decoding VCDIFF delta file"
   1353                   << VCD_ENDL;
   1354         // fall through to RESULT_ERROR case
   1355       }
   1356     case RESULT_ERROR:
   1357       return RESULT_ERROR;
   1358     default:
   1359       break;  // DecodeBody succeeded
   1360   }
   1361   // Get ready to read a new delta window
   1362   Reset();
   1363   return RESULT_SUCCESS;
   1364 }
   1365 
   1366 // *** Methods for VCDiffStreamingDecoder
   1367 
   1368 VCDiffStreamingDecoder::VCDiffStreamingDecoder()
   1369 : impl_(new VCDiffStreamingDecoderImpl) { }
   1370 
   1371 VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
   1372 
   1373 void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
   1374   impl_->StartDecoding(source, len);
   1375 }
   1376 
   1377 bool VCDiffStreamingDecoder::DecodeChunkToInterface(
   1378     const char* data,
   1379     size_t len,
   1380     OutputStringInterface* output_string) {
   1381   return impl_->DecodeChunk(data, len, output_string);
   1382 }
   1383 
   1384 bool VCDiffStreamingDecoder::FinishDecoding() {
   1385   return impl_->FinishDecoding();
   1386 }
   1387 
   1388 bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
   1389     size_t new_maximum_target_file_size) {
   1390   return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
   1391 }
   1392 
   1393 bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
   1394     size_t new_maximum_target_window_size) {
   1395   return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
   1396 }
   1397 
   1398 void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
   1399   impl_->SetAllowVcdTarget(allow_vcd_target);
   1400 }
   1401 
   1402 bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
   1403                                       size_t dictionary_size,
   1404                                       const string& encoding,
   1405                                       OutputStringInterface* target) {
   1406   target->clear();
   1407   decoder_.StartDecoding(dictionary_ptr, dictionary_size);
   1408   if (!decoder_.DecodeChunkToInterface(encoding.data(),
   1409                                        encoding.size(),
   1410                                        target)) {
   1411     return false;
   1412   }
   1413   return decoder_.FinishDecoding();
   1414 }
   1415 
   1416 }  // namespace open_vcdiff
   1417