Home | History | Annotate | Download | only in src
      1 // Copyright 2008 Google Inc.
      2 // Author: Lincoln Smith
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 // Implements a Decoder for the format described in
     17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
     18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
     19 //
     20 // The RFC describes the possibility of using a secondary compressor
     21 // to further reduce the size of each section of the VCDIFF output.
     22 // That feature is not supported in this implementation of the encoder
     23 // and decoder.
     24 // No secondary compressor types have been publicly registered with
     25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
     26 // in the more than five years since the registry was created, so there
     27 // is no standard set of compressor IDs which would be generated by other
     28 // encoders or accepted by other decoders.
     29 
     30 #include <config.h>
     31 #include "google/vcdecoder.h"
     32 #include <stddef.h>  // size_t, ptrdiff_t
     33 #include <stdint.h>  // int32_t
     34 #include <string.h>  // memcpy, memset
     35 #include <memory>  // auto_ptr
     36 #include <string>
     37 #include "addrcache.h"
     38 #include "checksum.h"
     39 #include "codetable.h"
     40 #include "decodetable.h"
     41 #include "headerparser.h"
     42 #include "logging.h"
     43 #include "google/output_string.h"
     44 #include "varint_bigendian.h"
     45 #include "vcdiff_defs.h"
     46 
     47 namespace open_vcdiff {
     48 
     49 // This class is used to parse delta file windows as described
     50 // in RFC sections 4.2 and 4.3.  Its methods are not thread-safe.
     51 //
     52 // Here is the window format copied from the RFC:
     53 //
     54 // Window1
     55 //     Win_Indicator                            - byte
     56 //     [Source segment size]                    - integer
     57 //     [Source segment position]                - integer
     58 //     The delta encoding of the target window
     59 //         Length of the delta encoding         - integer
     60 //         The delta encoding
     61 //             Size of the target window        - integer
     62 //             Delta_Indicator                  - byte
     63 //             Length of data for ADDs and RUNs - integer
     64 //             Length of instructions and sizes - integer
     65 //             Length of addresses for COPYs    - integer
     66 //             Data section for ADDs and RUNs   - array of bytes
     67 //             Instructions and sizes section   - array of bytes
     68 //             Addresses section for COPYs      - array of bytes
     69 // Window2
     70 // ...
     71 //
     72 // Sample usage:
     73 //
     74 // VCDiffDeltaFileWindow delta_window_;
     75 // delta_window_.Init(parent);
     76 // ParseableChunk parseable_chunk(input_buffer,
     77 //                                input_size,
     78 //                                leftover_unencoded_bytes);
     79 // switch (delta_window_.DecodeWindows(&parseable_chunk)) {
     80 //   case RESULT_END_OF_DATA:
     81 //     <Read more input and retry DecodeWindows later.>
     82 //   case RESULT_ERROR:
     83 //     <Handle error case.  An error log message has already been generated.>
     84 // }
     85 //
     86 // DecodeWindows consumes as many windows from the input as it can.  It only
     87 // needs to be placed within a loop if the loop is used to obtain more input
     88 // (delta file) data.
     89 //
     90 class VCDiffDeltaFileWindow {
     91  public:
     92   VCDiffDeltaFileWindow();
     93   ~VCDiffDeltaFileWindow();
     94 
     95   // Init() should be called immediately after constructing the
     96   // VCDiffDeltaFileWindow().  It must be called before DecodeWindows() can be
     97   // invoked, or an error will occur.
     98   void Init(VCDiffStreamingDecoderImpl* parent);
     99 
    100   // Resets the pointers to the data sections in the current window.
    101   void Reset();
    102 
    103   bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
    104                     unsigned char max_mode) {
    105     return reader_.UseCodeTable(code_table_data, max_mode);
    106   }
    107 
    108   // Decodes as many delta windows as possible using the input data from
    109   // *parseable_chunk.  Appends the decoded target windows to
    110   // parent_->decoded_target().  Returns RESULT_SUCCESS on success, or
    111   // RESULT_END_OF_DATA if the end of input was reached before the entire window
    112   // could be decoded and more input is expected (only possible if
    113   // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
    114   // decoding.  In the RESULT_ERROR case, the value of parseable_chunk->pointer_
    115   // is undefined; otherwise, parseable_chunk->Advance() is called to point to
    116   // the input data position just after the data that has been decoded.
    117   //
    118   // If planned_target_file_size is not set to kUnlimitedBytes, then the decoder
    119   // expects *exactly* this number of target bytes to be decoded from one or
    120   // more delta file windows.  If this number is met exactly after finishing a
    121   // delta window, this function will return RESULT_SUCCESS without processing
    122   // any more bytes from data_pointer.  If this number is exceeded while
    123   // decoding a window, but was not met before starting that window,
    124   // then RESULT_ERROR will be returned.
    125   //
    126   VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk);
    127 
    128   bool FoundWindowHeader() const {
    129     return found_header_;
    130   }
    131 
    132   bool MoreDataExpected() const {
    133     // When parsing an interleaved-format delta file,
    134     // every time DecodeBody() exits, interleaved_bytes_expected_
    135     // will be decremented by the number of bytes parsed.  If it
    136     // reaches zero, then there is no more data expected because
    137     // the size of the interleaved section (given in the window
    138     // header) has been reached.
    139     return IsInterleaved() && (interleaved_bytes_expected_ > 0);
    140   }
    141 
    142   size_t target_window_start_pos() const { return target_window_start_pos_; }
    143 
    144   void set_target_window_start_pos(size_t new_start_pos) {
    145     target_window_start_pos_ = new_start_pos;
    146   }
    147 
    148   // Returns the number of bytes remaining to be decoded in the target window.
    149   // If not in the process of decoding a window, returns 0.
    150   size_t TargetBytesRemaining();
    151 
    152  private:
    153   // Reads the header of the window section as described in RFC sections 4.2 and
    154   // 4.3, up to and including the value "Length of addresses for COPYs".  If the
    155   // entire header is found, this function sets up the DeltaWindowSections
    156   // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
    157   // that the decoder can begin decoding the opcodes in these sections.  Returns
    158   // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
    159   // available data was reached before the entire header could be read.  (The
    160   // latter may be an error condition if there is no more data available.)
    161   // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
    162   // parsed header.
    163   //
    164   VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
    165 
    166   // After the window header has been parsed as far as the Delta_Indicator,
    167   // this function is called to parse the following delta window header fields:
    168   //
    169   //     Length of data for ADDs and RUNs - integer (VarintBE format)
    170   //     Length of instructions and sizes - integer (VarintBE format)
    171   //     Length of addresses for COPYs    - integer (VarintBE format)
    172   //
    173   // If has_checksum_ is true, it also looks for the following element:
    174   //
    175   //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
    176   //
    177   // It sets up the DeltaWindowSections instructions_and_sizes_,
    178   // data_for_add_and_run_, and addresses_for_copy_.  If the interleaved format
    179   // is being used, all three sections will include the entire window body; if
    180   // the standard format is used, three non-overlapping window sections will be
    181   // defined.  Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
    182   // if standard format is being used and there is not enough input data to read
    183   // the entire window body.  Otherwise, returns RESULT_SUCCESS.
    184   VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
    185 
    186   // Decodes the body of the window section as described in RFC sections 4.3,
    187   // including the sections "Data section for ADDs and RUNs", "Instructions
    188   // and sizes section", and "Addresses section for COPYs".  These sections
    189   // must already have been set up by ReadWindowHeader().  Returns a
    190   // non-negative value on success, or RESULT_END_OF_DATA if the end of input
    191   // was reached before the entire window could be decoded (only possible if
    192   // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
    193   // decoding.  Appends as much of the decoded target window as possible to
    194   // parent->decoded_target().
    195   //
    196   int DecodeBody(ParseableChunk* parseable_chunk);
    197 
    198   // Returns the number of bytes already decoded into the target window.
    199   size_t TargetBytesDecoded();
    200 
    201   // Decodes a single ADD instruction, updating parent_->decoded_target_.
    202   VCDiffResult DecodeAdd(size_t size);
    203 
    204   // Decodes a single RUN instruction, updating parent_->decoded_target_.
    205   VCDiffResult DecodeRun(size_t size);
    206 
    207   // Decodes a single COPY instruction, updating parent_->decoded_target_.
    208   VCDiffResult DecodeCopy(size_t size, unsigned char mode);
    209 
    210   // When using the interleaved format, this function is called both on parsing
    211   // the header and on resuming after a RESULT_END_OF_DATA was returned from a
    212   // previous call to DecodeBody().  It sets up all three section pointers to
    213   // reference the same interleaved stream of instructions, sizes, addresses,
    214   // and data.  These pointers must be reset every time that work resumes on a
    215   // delta window,  because the input data string may have been changed or
    216   // resized since DecodeBody() last returned.
    217   void UpdateInterleavedSectionPointers(const char* data_pos,
    218                                         const char* data_end) {
    219     const ptrdiff_t available_data = data_end - data_pos;
    220     // Don't read past the end of currently-available data
    221     if (available_data > interleaved_bytes_expected_) {
    222       instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
    223     } else {
    224       instructions_and_sizes_.Init(data_pos, available_data);
    225     }
    226     data_for_add_and_run_.Init(&instructions_and_sizes_);
    227     addresses_for_copy_.Init(&instructions_and_sizes_);
    228   }
    229 
    230   // If true, the interleaved format described in AllowInterleaved() is used
    231   // for the current delta file.  Only valid after ReadWindowHeader() has been
    232   // called and returned a positive number (i.e., the whole header was parsed),
    233   // but before the window has finished decoding.
    234   //
    235   bool IsInterleaved() const {
    236     // If the sections are interleaved, both addresses_for_copy_ and
    237     // data_for_add_and_run_ should point at instructions_and_sizes_.
    238     return !addresses_for_copy_.IsOwned();
    239   }
    240 
    241   // Executes a single COPY or ADD instruction, appending data to
    242   // parent_->decoded_target().
    243   void CopyBytes(const char* data, size_t size);
    244 
    245   // Executes a single RUN instruction, appending data to
    246   // parent_->decoded_target().
    247   void RunByte(unsigned char byte, size_t size);
    248 
    249   // Advance *parseable_chunk to point to the current position in the
    250   // instructions/sizes section.  If interleaved format is used, then
    251   // decrement the number of expected bytes in the instructions/sizes section
    252   // by the number of instruction/size bytes parsed.
    253   void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
    254 
    255   // The parent object which was passed to Init().
    256   VCDiffStreamingDecoderImpl* parent_;
    257 
    258   // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
    259   // has been called and succeeded in parsing the delta window header, but the
    260   // entire window has not yet been decoded.
    261   bool found_header_;
    262 
    263   // Contents and length of the current source window.  source_segment_ptr_
    264   // will be non-NULL if (a) the window section header for the current window
    265   // has been read, but the window has not yet finished decoding; or
    266   // (b) the window did not specify a source segment.
    267   const char* source_segment_ptr_;
    268   size_t source_segment_length_;
    269 
    270   // The delta encoding window sections as defined in RFC section 4.3.
    271   // The pointer for each section will be incremented as data is consumed and
    272   // decoded from that section.  If the interleaved format is used,
    273   // data_for_add_and_run_ and addresses_for_copy_ will both point to
    274   // instructions_and_sizes_; otherwise, they will be separate data sections.
    275   //
    276   DeltaWindowSection instructions_and_sizes_;
    277   DeltaWindowSection data_for_add_and_run_;
    278   DeltaWindowSection addresses_for_copy_;
    279 
    280   // The expected bytes left to decode in instructions_and_sizes_.  Only used
    281   // for the interleaved format.
    282   int interleaved_bytes_expected_;
    283 
    284   // The expected length of the target window once it has been decoded.
    285   size_t target_window_length_;
    286 
    287   // The index in decoded_target at which the first byte of the current
    288   // target window was/will be written.
    289   size_t target_window_start_pos_;
    290 
    291   // If has_checksum_ is true, then expected_checksum_ contains an Adler32
    292   // checksum of the target window data.  This is an extension included in the
    293   // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
    294   bool has_checksum_;
    295   VCDChecksum expected_checksum_;
    296 
    297   VCDiffCodeTableReader reader_;
    298 
    299   // Making these private avoids implicit copy constructor & assignment operator
    300   VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&);  // NOLINT
    301   void operator=(const VCDiffDeltaFileWindow&);
    302 };
    303 
    304 // *** Inline methods for VCDiffDeltaFileWindow
    305 
    306 inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
    307   Reset();
    308 }
    309 
    310 inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
    311 
    312 inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
    313   parent_ = parent;
    314 }
    315 
    316 class VCDiffStreamingDecoderImpl {
    317  public:
    318   typedef std::string string;
    319 
    320   // The default maximum target file size (and target window size) if
    321   // SetMaximumTargetFileSize() is not called.
    322   static const size_t kDefaultMaximumTargetFileSize = 67108864U;  // 64 MB
    323 
    324   // The largest value that can be passed to SetMaximumTargetWindowSize().
    325   // Using a larger value will result in an error.
    326   static const size_t kTargetSizeLimit = 2147483647U;  // INT32_MAX
    327 
    328   // A constant that is the default value for planned_target_file_size_,
    329   // indicating that the decoder does not have an expected length
    330   // for the target data.
    331   static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
    332 
    333   VCDiffStreamingDecoderImpl();
    334   ~VCDiffStreamingDecoderImpl();
    335 
    336   // Resets all member variables to their initial states.
    337   void Reset();
    338 
    339   // These functions are identical to their counterparts
    340   // in VCDiffStreamingDecoder.
    341   //
    342   void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
    343 
    344   bool DecodeChunk(const char* data,
    345                    size_t len,
    346                    OutputStringInterface* output_string);
    347 
    348   bool FinishDecoding();
    349 
    350   // If true, the version of VCDIFF used in the current delta file allows
    351   // for the interleaved format, in which instructions, addresses and data
    352   // are all sent interleaved in the instructions section of each window
    353   // rather than being sent in separate sections.  This is not part of
    354   // the VCDIFF draft standard, so we've defined a special version code
    355   // 'S' which implies that this feature is available.  Even if interleaving
    356   // is supported, it is not mandatory; interleaved format will be implied
    357   // if the address and data sections are both zero-length.
    358   //
    359   bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
    360 
    361   // If true, the version of VCDIFF used in the current delta file allows
    362   // each delta window to contain an Adler32 checksum of the target window data.
    363   // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
    364   // this checksum will appear as a variable-length integer, just after the
    365   // "length of addresses for COPYs" value and before the window data sections.
    366   // It is possible for some windows in a delta file to use the checksum feature
    367   // and for others not to use it (and leave the flag bit set to 0.)
    368   // Just as with AllowInterleaved(), this extension is not part of the draft
    369   // standard and is only available when the version code 'S' is specified.
    370   //
    371   bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
    372 
    373   bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
    374     maximum_target_file_size_ = new_maximum_target_file_size;
    375     return true;
    376   }
    377 
    378   bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
    379     if (new_maximum_target_window_size > kTargetSizeLimit) {
    380       LOG(ERROR) << "Specified maximum target window size "
    381                  << new_maximum_target_window_size << " exceeds limit of "
    382                  << kTargetSizeLimit << " bytes" << LOG_ENDL;
    383       return false;
    384     }
    385     maximum_target_window_size_ = new_maximum_target_window_size;
    386     return true;
    387   }
    388 
    389   // See description of planned_target_file_size_, below.
    390   bool HasPlannedTargetFileSize() const {
    391     return planned_target_file_size_ != kUnlimitedBytes;
    392   }
    393 
    394   void SetPlannedTargetFileSize(size_t planned_target_file_size) {
    395     planned_target_file_size_ = planned_target_file_size;
    396   }
    397 
    398   void AddToTotalTargetWindowSize(size_t window_size) {
    399     total_of_target_window_sizes_ += window_size;
    400   }
    401 
    402   // Checks to see whether the decoded target data has reached its planned size.
    403   bool ReachedPlannedTargetFileSize() const {
    404     if (!HasPlannedTargetFileSize()) {
    405       return false;
    406     }
    407     // The planned target file size should not have been exceeded.
    408     // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
    409     // each target window would not make the target file exceed that limit, and
    410     // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
    411     // exceeds the advertised target window size.
    412     if (total_of_target_window_sizes_ > planned_target_file_size_) {
    413       LOG(DFATAL) << "Internal error: Decoded data size "
    414                   << total_of_target_window_sizes_
    415                   << " exceeds planned target file size "
    416                   << planned_target_file_size_ << LOG_ENDL;
    417       return true;
    418     }
    419     return total_of_target_window_sizes_ == planned_target_file_size_;
    420   }
    421 
    422   // Checks to see whether adding a new target window of the specified size
    423   // would exceed the planned target file size, the maximum target file size,
    424   // or the maximum target window size.  If so, logs an error and returns true;
    425   // otherwise, returns false.
    426   bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
    427 
    428   // Returns the amount of input data passed to the last DecodeChunk()
    429   // that was not consumed by the decoder.  This is essential if
    430   // SetPlannedTargetFileSize() is being used, in order to preserve the
    431   // remaining input data stream once the planned target file has been decoded.
    432   size_t GetUnconsumedDataSize() const {
    433     return unparsed_bytes_.size();
    434   }
    435 
    436   // This function will return true if the decoder has parsed a complete delta
    437   // file header plus zero or more delta file windows, with no data left over.
    438   // It will also return true if no delta data at all was decoded.  If these
    439   // conditions are not met, then FinishDecoding() should not be called.
    440   bool IsDecodingComplete() const {
    441     if (!FoundFileHeader()) {
    442       // No complete delta file header has been parsed yet.  DecodeChunk()
    443       // may have received some data that it hasn't yet parsed, in which case
    444       // decoding is incomplete.
    445       return unparsed_bytes_.empty();
    446     } else if (custom_code_table_decoder_.get()) {
    447       // The decoder is in the middle of parsing a custom code table.
    448       return false;
    449     } else if (delta_window_.FoundWindowHeader()) {
    450       // The decoder is in the middle of parsing an interleaved format delta
    451       // window.
    452       return false;
    453     } else if (ReachedPlannedTargetFileSize()) {
    454       // The decoder found exactly the planned number of bytes.  In this case
    455       // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
    456       // data after the end of the delta file.
    457       return true;
    458     } else {
    459       // No complete delta file window has been parsed yet.  DecodeChunk()
    460       // may have received some data that it hasn't yet parsed, in which case
    461       // decoding is incomplete.
    462       return unparsed_bytes_.empty();
    463     }
    464   }
    465 
    466   const char* dictionary_ptr() const { return dictionary_ptr_; }
    467 
    468   size_t dictionary_size() const { return dictionary_size_; }
    469 
    470   VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
    471 
    472   string* decoded_target() { return &decoded_target_; }
    473 
    474   bool allow_vcd_target() const { return allow_vcd_target_; }
    475 
    476   void SetAllowVcdTarget(bool allow_vcd_target) {
    477     if (start_decoding_was_called_) {
    478       LOG(DFATAL) << "SetAllowVcdTarget() called after StartDecoding()"
    479                   << LOG_ENDL;
    480       return;
    481     }
    482     allow_vcd_target_ = allow_vcd_target;
    483   }
    484 
    485   // Removes the contents of decoded_target_ that precede the beginning of the
    486   // current window.
    487   void TruncateToBeginningOfWindow();
    488 
    489  private:
    490   // Reads the VCDiff delta file header section as described in RFC section 4.1,
    491   // except the custom code table data.  Returns RESULT_ERROR if an error
    492   // occurred, or RESULT_END_OF_DATA if the end of available data was reached
    493   // before the entire header could be read.  (The latter may be an error
    494   // condition if there is no more data available.)  Otherwise, advances
    495   // data->position_ past the header and returns RESULT_SUCCESS.
    496   //
    497   VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
    498 
    499   // Indicates whether or not the header has already been read.
    500   bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
    501 
    502   // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
    503   // file header, this function parses the custom cache sizes and initializes
    504   // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
    505   // custom code table in ReadCustomCodeTable().  Returns RESULT_ERROR if an
    506   // error occurred, or RESULT_END_OF_DATA if the end of available data was
    507   // reached before the custom cache sizes could be read.  Otherwise, returns
    508   // the number of bytes read.
    509   //
    510   int InitCustomCodeTable(const char* data_start, const char* data_end);
    511 
    512   // If a custom code table was specified in the header section that was parsed
    513   // by ReadDeltaFileHeader(), this function makes a recursive call to another
    514   // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
    515   // custom code table is expected to be supplied as an embedded VCDIFF
    516   // encoding that uses the standard code table.  Returns RESULT_ERROR if an
    517   // error occurs, or RESULT_END_OF_DATA if the end of available data was
    518   // reached before the entire custom code table could be read.  Otherwise,
    519   // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
    520   // custom code table.  If the function returns RESULT_SUCCESS or
    521   // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
    522   //
    523   VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
    524 
    525   // Contents and length of the source (dictionary) data.
    526   const char* dictionary_ptr_;
    527   size_t dictionary_size_;
    528 
    529   // This string will be used to store any unparsed bytes left over when
    530   // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
    531   // It will also be used to concatenate those unparsed bytes with the data
    532   // supplied to the next call to DecodeChunk(), so that they appear in
    533   // contiguous memory.
    534   string unparsed_bytes_;
    535 
    536   // The portion of the target file that has been decoded so far.  This will be
    537   // used to fill the output string for DecodeChunk(), and will also be used to
    538   // execute COPY instructions that reference target data.  Since the source
    539   // window can come from a range of addresses in the previously decoded target
    540   // data, the entire target file needs to be available to the decoder, not just
    541   // the current target window.
    542   string decoded_target_;
    543 
    544   // The VCDIFF version byte (also known as "header4") from the
    545   // delta file header.
    546   unsigned char vcdiff_version_code_;
    547 
    548   VCDiffDeltaFileWindow delta_window_;
    549 
    550   std::auto_ptr<VCDiffAddressCache> addr_cache_;
    551 
    552   // Will be NULL unless a custom code table has been defined.
    553   std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
    554 
    555   // Used to receive the decoded custom code table.
    556   string custom_code_table_string_;
    557 
    558   // If a custom code table is specified, it will be expressed
    559   // as an embedded VCDIFF delta file which uses the default code table
    560   // as the source file (dictionary).  Use a child decoder object
    561   // to decode that delta file.
    562   std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
    563 
    564   // If set, then the decoder is expecting *exactly* this number of
    565   // target bytes to be decoded from one or more delta file windows.
    566   // If this number is exceeded while decoding a window, but was not met
    567   // before starting on that window, an error will be reported.
    568   // If FinishDecoding() is called before this number is met, an error
    569   // will also be reported.  This feature is used for decoding the
    570   // embedded code table data within a VCDIFF delta file; we want to
    571   // stop processing the embedded data once the entire code table has
    572   // been decoded, and treat the rest of the available data as part
    573   // of the enclosing delta file.
    574   size_t planned_target_file_size_;
    575 
    576   size_t maximum_target_file_size_;
    577 
    578   size_t maximum_target_window_size_;
    579 
    580   // Contains the sum of the decoded sizes of all target windows seen so far,
    581   // including the expected total size of the current target window in progress
    582   // (even if some of the current target window has not yet been decoded.)
    583   size_t total_of_target_window_sizes_;
    584 
    585   // This value is used to ensure the correct order of calls to the interface
    586   // functions, i.e., a single call to StartDecoding(), followed by zero or
    587   // more calls to DecodeChunk(), followed by a single call to
    588   // FinishDecoding().
    589   bool start_decoding_was_called_;
    590 
    591   // If this value is true then the VCD_TARGET flag can be specified to allow
    592   // the source segment to be chosen from the previously-decoded target data.
    593   // (This is the default behavior.)  If it is false, then specifying the
    594   // VCD_TARGET flag is considered an error, and the decoder does not need to
    595   // keep in memory any decoded target data prior to the current window.
    596   bool allow_vcd_target_;
    597 
    598   // Making these private avoids implicit copy constructor & assignment operator
    599   VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&);  // NOLINT
    600   void operator=(const VCDiffStreamingDecoderImpl&);
    601 };
    602 
    603 // *** Methods for VCDiffStreamingDecoderImpl
    604 
    605 const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
    606 const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
    607 
    608 VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
    609     : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
    610       maximum_target_window_size_(kDefaultMaximumTargetFileSize),
    611       allow_vcd_target_(true) {
    612   delta_window_.Init(this);
    613   Reset();
    614 }
    615 
    616 // Reset() will delete the component objects without reallocating them.
    617 VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
    618 
    619 void VCDiffStreamingDecoderImpl::Reset() {
    620   start_decoding_was_called_ = false;
    621   dictionary_ptr_ = NULL;
    622   dictionary_size_ = 0;
    623   vcdiff_version_code_ = '\0';
    624   planned_target_file_size_ = kUnlimitedBytes;
    625   total_of_target_window_sizes_ = 0;
    626   addr_cache_.reset();
    627   custom_code_table_.reset();
    628   custom_code_table_decoder_.reset();
    629   delta_window_.Reset();
    630 }
    631 
    632 void VCDiffStreamingDecoderImpl::TruncateToBeginningOfWindow() {
    633   // Conserve the data for the current window that has been partially decoded.
    634   decoded_target_.erase(0, delta_window_.target_window_start_pos());
    635   delta_window_.set_target_window_start_pos(0);
    636 }
    637 
    638 void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
    639                                                size_t dictionary_size) {
    640   if (start_decoding_was_called_) {
    641     LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()"
    642                 << LOG_ENDL;
    643     return;
    644   }
    645   unparsed_bytes_.clear();
    646   decoded_target_.clear();  // delta_window_.Reset() depends on this
    647   Reset();
    648   dictionary_ptr_ = dictionary_ptr;
    649   dictionary_size_ = dictionary_size;
    650   start_decoding_was_called_ = true;
    651 }
    652 
    653 // Reads the VCDiff delta file header section as described in RFC section 4.1:
    654 //
    655 //     Header1                                  - byte = 0xD6 (ASCII 'V' | 0x80)
    656 //     Header2                                  - byte = 0xC3 (ASCII 'C' | 0x80)
    657 //     Header3                                  - byte = 0xC4 (ASCII 'D' | 0x80)
    658 //     Header4                                  - byte
    659 //     Hdr_Indicator                            - byte
    660 //     [Secondary compressor ID]                - byte
    661 //     [Length of code table data]              - integer
    662 //     [Code table data]
    663 //
    664 // Initializes the code table and address cache objects.  Returns RESULT_ERROR
    665 // if an error occurred, and RESULT_END_OF_DATA if the end of available data was
    666 // reached before the entire header could be read.  (The latter may be an error
    667 // condition if there is no more data available.)  Otherwise, returns
    668 // RESULT_SUCCESS, and removes the header bytes from the data string.
    669 //
    670 // It's relatively inefficient to expect this function to parse any number of
    671 // input bytes available, down to 1 byte, but it is necessary in case the input
    672 // is not a properly formatted VCDIFF delta file.  If the entire input consists
    673 // of two bytes "12", then we should recognize that it does not match the
    674 // initial VCDIFF magic number "VCD" and report an error, rather than waiting
    675 // indefinitely for more input that will never arrive.
    676 //
    677 VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
    678     ParseableChunk* data) {
    679   if (FoundFileHeader()) {
    680     return RESULT_SUCCESS;
    681   }
    682   size_t data_size = data->UnparsedSize();
    683   const DeltaFileHeader* header =
    684       reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
    685   bool wrong_magic_number = false;
    686   switch (data_size) {
    687     // Verify only the bytes that are available.
    688     default:
    689       // Found header contents up to and including VCDIFF version
    690       vcdiff_version_code_ = header->header4;
    691       if ((vcdiff_version_code_ != 0x00) &&  // Draft standard VCDIFF (RFC 3284)
    692           (vcdiff_version_code_ != 'S')) {   // Enhancements for SDCH protocol
    693         LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL;
    694         return RESULT_ERROR;
    695       }
    696       // fall through
    697     case 3:
    698       if (header->header3 != 0xC4) {  // magic value 'D' | 0x80
    699         wrong_magic_number = true;
    700       }
    701       // fall through
    702     case 2:
    703       if (header->header2 != 0xC3) {  // magic value 'C' | 0x80
    704         wrong_magic_number = true;
    705       }
    706       // fall through
    707     case 1:
    708       if (header->header1 != 0xD6) {  // magic value 'V' | 0x80
    709         wrong_magic_number = true;
    710       }
    711       // fall through
    712     case 0:
    713       if (wrong_magic_number) {
    714         LOG(ERROR) << "Did not find VCDIFF header bytes; "
    715                       "input is not a VCDIFF delta file" << LOG_ENDL;
    716         return RESULT_ERROR;
    717       }
    718       if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
    719   }
    720   // Secondary compressor not supported.
    721   if (header->hdr_indicator & VCD_DECOMPRESS) {
    722     LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL;
    723     return RESULT_ERROR;
    724   }
    725   if (header->hdr_indicator & VCD_CODETABLE) {
    726     int bytes_parsed = InitCustomCodeTable(
    727         data->UnparsedData() + sizeof(DeltaFileHeader),
    728         data->End());
    729     switch (bytes_parsed) {
    730       case RESULT_ERROR:
    731         return RESULT_ERROR;
    732       case RESULT_END_OF_DATA:
    733         return RESULT_END_OF_DATA;
    734       default:
    735         data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
    736     }
    737   } else {
    738     addr_cache_.reset(new VCDiffAddressCache);
    739     // addr_cache_->Init() will be called
    740     // from VCDiffStreamingDecoderImpl::DecodeChunk()
    741     data->Advance(sizeof(DeltaFileHeader));
    742   }
    743   return RESULT_SUCCESS;
    744 }
    745 
    746 int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
    747                                                     const char* data_end) {
    748   // A custom code table is being specified.  Parse the variable-length
    749   // cache sizes and begin parsing the encoded custom code table.
    750   int32_t near_cache_size = 0, same_cache_size = 0;
    751   VCDiffHeaderParser header_parser(data_start, data_end);
    752   if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
    753     return header_parser.GetResult();
    754   }
    755   if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
    756     return header_parser.GetResult();
    757   }
    758   custom_code_table_.reset(new struct VCDiffCodeTableData);
    759   memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
    760   custom_code_table_string_.clear();
    761   addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
    762   // addr_cache_->Init() will be called
    763   // from VCDiffStreamingDecoderImpl::DecodeChunk()
    764 
    765   // If we reach this point (the start of the custom code table)
    766   // without encountering a RESULT_END_OF_DATA condition, then we won't call
    767   // ReadDeltaFileHeader() again for this delta file.
    768   //
    769   // Instantiate a recursive decoder to interpret the custom code table
    770   // as a VCDIFF encoding of the default code table.
    771   custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
    772   custom_code_table_decoder_->StartDecoding(
    773       reinterpret_cast<const char*>(
    774           &VCDiffCodeTableData::kDefaultCodeTableData),
    775       sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
    776   custom_code_table_decoder_->SetPlannedTargetFileSize(
    777       sizeof(*custom_code_table_));
    778   return static_cast<int>(header_parser.ParsedSize());
    779 }
    780 
    781 VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
    782     ParseableChunk* data) {
    783   if (!custom_code_table_decoder_.get()) {
    784     return RESULT_SUCCESS;
    785   }
    786   if (!custom_code_table_.get()) {
    787     LOG(DFATAL) << "Internal error:  custom_code_table_decoder_ is set,"
    788                    " but custom_code_table_ is NULL" << LOG_ENDL;
    789     return RESULT_ERROR;
    790   }
    791   OutputString<string> output_string(&custom_code_table_string_);
    792   if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
    793                                                data->UnparsedSize(),
    794                                                &output_string)) {
    795     return RESULT_ERROR;
    796   }
    797   if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
    798     // Skip over the consumed data.
    799     data->Finish();
    800     return RESULT_END_OF_DATA;
    801   }
    802   if (!custom_code_table_decoder_->FinishDecoding()) {
    803     return RESULT_ERROR;
    804   }
    805   if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
    806     LOG(DFATAL) << "Decoded custom code table size ("
    807                 << custom_code_table_string_.length()
    808                 << ") does not match size of a code table ("
    809                 << sizeof(*custom_code_table_) << ")" << LOG_ENDL;
    810     return RESULT_ERROR;
    811   }
    812   memcpy(custom_code_table_.get(),
    813          custom_code_table_string_.data(),
    814          sizeof(*custom_code_table_));
    815   custom_code_table_string_.clear();
    816   // Skip over the consumed data.
    817   data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
    818   custom_code_table_decoder_.reset();
    819   delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
    820   return RESULT_SUCCESS;
    821 }
    822 
    823 namespace {
    824 
    825 class TrackNewOutputText {
    826  public:
    827   typedef std::string string;
    828 
    829   explicit TrackNewOutputText(const string& decoded_target)
    830       : decoded_target_(decoded_target),
    831       initial_decoded_target_size_(decoded_target.size()) { }
    832 
    833   void AppendNewOutputText(size_t target_bytes_remaining,
    834                            OutputStringInterface* output_string) {
    835     const size_t bytes_decoded_this_chunk =
    836         decoded_target_.size() - initial_decoded_target_size_;
    837     if (bytes_decoded_this_chunk > 0) {
    838       if (target_bytes_remaining > 0) {
    839         // The decoder is midway through decoding a target window.  Resize
    840         // output_string to match the expected length.  The interface guarantees
    841         // not to resize the output_string more than once per target window
    842         // decoded.
    843         output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
    844                                               + target_bytes_remaining);
    845       }
    846       output_string->append(
    847           decoded_target_.data() + initial_decoded_target_size_,
    848           bytes_decoded_this_chunk);
    849     }
    850   }
    851 
    852  private:
    853   const string& decoded_target_;
    854   size_t initial_decoded_target_size_;
    855 };
    856 
    857 }  // anonymous namespace
    858 
    859 bool VCDiffStreamingDecoderImpl::DecodeChunk(
    860     const char* data,
    861     size_t len,
    862     OutputStringInterface* output_string) {
    863   if (!start_decoding_was_called_) {
    864     LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL;
    865     Reset();
    866     return false;
    867   }
    868   ParseableChunk parseable_chunk(data, len);
    869   if (!unparsed_bytes_.empty()) {
    870     unparsed_bytes_.append(data, len);
    871     parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
    872                                   unparsed_bytes_.size());
    873   }
    874   TrackNewOutputText output_tracker(decoded_target_);
    875   VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
    876   if (RESULT_SUCCESS == result) {
    877     result = ReadCustomCodeTable(&parseable_chunk);
    878   }
    879   if (RESULT_SUCCESS == result) {
    880     result = delta_window_.DecodeWindows(&parseable_chunk);
    881   }
    882   if (RESULT_ERROR == result) {
    883     Reset();  // Don't allow further DecodeChunk calls
    884     return false;
    885   }
    886   unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
    887                          parseable_chunk.UnparsedSize());
    888   output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(),
    889                                      output_string);
    890   if (!allow_vcd_target()) {
    891     // VCD_TARGET will never be used to reference target data beyond the start
    892     // of the current window, so throw away any earlier target data.
    893     TruncateToBeginningOfWindow();
    894   }
    895   return true;
    896 }
    897 
    898 // Finishes decoding after all data has been received.  Returns true
    899 // if decoding of the entire stream was successful.
    900 bool VCDiffStreamingDecoderImpl::FinishDecoding() {
    901   bool success = true;
    902   if (!start_decoding_was_called_) {
    903     LOG(WARNING) << "FinishDecoding() called before StartDecoding(),"
    904                     " or called after DecodeChunk() returned false"
    905                  << LOG_ENDL;
    906     success = false;
    907   } else if (!IsDecodingComplete()) {
    908     LOG(ERROR) << "FinishDecoding() called before parsing entire"
    909                   " delta file window" << LOG_ENDL;
    910     success = false;
    911   }
    912   // Reset the object state for the next decode operation
    913   Reset();
    914   return success;
    915 }
    916 
    917 bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
    918     size_t window_size) const {
    919   if (window_size > maximum_target_window_size_) {
    920     LOG(ERROR) << "Length of target window (" << window_size
    921                << ") exceeds limit of " << maximum_target_window_size_
    922                << " bytes" << LOG_ENDL;
    923     return true;
    924   }
    925   if (HasPlannedTargetFileSize()) {
    926     // The logical expression to check would be:
    927     //
    928     //   total_of_target_window_sizes_ + window_size > planned_target_file_size_
    929     //
    930     // but the addition might cause an integer overflow if target_bytes_to_add
    931     // is very large.  So it is better to check target_bytes_to_add against
    932     // the remaining planned target bytes.
    933     size_t remaining_planned_target_file_size =
    934         planned_target_file_size_ - total_of_target_window_sizes_;
    935     if (window_size > remaining_planned_target_file_size) {
    936       LOG(ERROR) << "Length of target window (" << window_size
    937                  << " bytes) plus previous windows ("
    938                  << total_of_target_window_sizes_
    939                  << " bytes) would exceed planned size of "
    940                  << planned_target_file_size_ << " bytes" << LOG_ENDL;
    941       return true;
    942     }
    943   }
    944   size_t remaining_maximum_target_bytes =
    945       maximum_target_file_size_ - total_of_target_window_sizes_;
    946   if (window_size > remaining_maximum_target_bytes) {
    947     LOG(ERROR) << "Length of target window (" << window_size
    948                << " bytes) plus previous windows ("
    949                << total_of_target_window_sizes_
    950                << " bytes) would exceed maximum target file size of "
    951                << maximum_target_file_size_ << " bytes" << LOG_ENDL;
    952     return true;
    953   }
    954   return false;
    955 }
    956 
    957 // *** Methods for VCDiffDeltaFileWindow
    958 
    959 void VCDiffDeltaFileWindow::Reset() {
    960   found_header_ = false;
    961 
    962   // Mark the start of the current target window.
    963   target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
    964   target_window_length_ = 0;
    965 
    966   source_segment_ptr_ = NULL;
    967   source_segment_length_ = 0;
    968 
    969   instructions_and_sizes_.Invalidate();
    970   data_for_add_and_run_.Invalidate();
    971   addresses_for_copy_.Invalidate();
    972 
    973   interleaved_bytes_expected_ = 0;
    974 
    975   has_checksum_ = false;
    976   expected_checksum_ = 0;
    977 }
    978 
    979 VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
    980     VCDiffHeaderParser* header_parser) {
    981   size_t add_and_run_data_length = 0;
    982   size_t instructions_and_sizes_length = 0;
    983   size_t addresses_length = 0;
    984   if (!header_parser->ParseSectionLengths(has_checksum_,
    985                                           &add_and_run_data_length,
    986                                           &instructions_and_sizes_length,
    987                                           &addresses_length,
    988                                           &expected_checksum_)) {
    989     return header_parser->GetResult();
    990   }
    991   if (parent_->AllowInterleaved() &&
    992       (add_and_run_data_length == 0) &&
    993       (addresses_length == 0)) {
    994     // The interleaved format is being used.
    995     interleaved_bytes_expected_ =
    996         static_cast<int>(instructions_and_sizes_length);
    997     UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
    998                                      header_parser->End());
    999   } else {
   1000     // If interleaved format is not used, then the whole window contents
   1001     // must be available before decoding can begin.  If only part of
   1002     // the current window is available, then report end of data
   1003     // and re-parse the whole header when DecodeChunk() is called again.
   1004     if (header_parser->UnparsedSize() < (add_and_run_data_length +
   1005                                          instructions_and_sizes_length +
   1006                                          addresses_length)) {
   1007       return RESULT_END_OF_DATA;
   1008     }
   1009     data_for_add_and_run_.Init(header_parser->UnparsedData(),
   1010                                add_and_run_data_length);
   1011     instructions_and_sizes_.Init(data_for_add_and_run_.End(),
   1012                                  instructions_and_sizes_length);
   1013     addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
   1014     if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
   1015       LOG(ERROR) << "The end of the instructions section "
   1016                      "does not match the end of the delta window" << LOG_ENDL;
   1017       return RESULT_ERROR;
   1018     }
   1019   }
   1020   reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
   1021                instructions_and_sizes_.End());
   1022   return RESULT_SUCCESS;
   1023 }
   1024 
   1025 // Here are the elements of the delta window header to be parsed,
   1026 // from section 4 of the RFC:
   1027 //
   1028 //     Window1
   1029 //         Win_Indicator                            - byte
   1030 //         [Source segment size]                    - integer
   1031 //         [Source segment position]                - integer
   1032 //         The delta encoding of the target window
   1033 //             Length of the delta encoding         - integer
   1034 //             The delta encoding
   1035 //                 Size of the target window        - integer
   1036 //                 Delta_Indicator                  - byte
   1037 //                 Length of data for ADDs and RUNs - integer
   1038 //                 Length of instructions and sizes - integer
   1039 //                 Length of addresses for COPYs    - integer
   1040 //                 Data section for ADDs and RUNs   - array of bytes
   1041 //                 Instructions and sizes section   - array of bytes
   1042 //                 Addresses section for COPYs      - array of bytes
   1043 //
   1044 VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
   1045     ParseableChunk* parseable_chunk) {
   1046   std::string* decoded_target = parent_->decoded_target();
   1047   VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
   1048                                    parseable_chunk->End());
   1049   size_t source_segment_position = 0;
   1050   unsigned char win_indicator = 0;
   1051   if (!header_parser.ParseWinIndicatorAndSourceSegment(
   1052           parent_->dictionary_size(),
   1053           decoded_target->size(),
   1054           parent_->allow_vcd_target(),
   1055           &win_indicator,
   1056           &source_segment_length_,
   1057           &source_segment_position)) {
   1058     return header_parser.GetResult();
   1059   }
   1060   has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
   1061   if (!header_parser.ParseWindowLengths(&target_window_length_)) {
   1062     return header_parser.GetResult();
   1063   }
   1064   if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
   1065     // An error has been logged by TargetWindowWouldExceedSizeLimits().
   1066     return RESULT_ERROR;
   1067   }
   1068   header_parser.ParseDeltaIndicator();
   1069   VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
   1070   if (RESULT_SUCCESS != setup_return_code) {
   1071     return setup_return_code;
   1072   }
   1073   // Reserve enough space in the output string for the current target window.
   1074   decoded_target->reserve(target_window_start_pos_ + target_window_length_);
   1075   // Get a pointer to the start of the source segment.
   1076   if (win_indicator & VCD_SOURCE) {
   1077     source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
   1078   } else if (win_indicator & VCD_TARGET) {
   1079     // This assignment must happen after the reserve().
   1080     // decoded_target should not be resized again while processing this window,
   1081     // so source_segment_ptr_ should remain valid.
   1082     source_segment_ptr_ = decoded_target->data() + source_segment_position;
   1083   }
   1084   // The whole window header was found and parsed successfully.
   1085   found_header_ = true;
   1086   parseable_chunk->Advance(header_parser.ParsedSize());
   1087   parent_->AddToTotalTargetWindowSize(target_window_length_);
   1088   return RESULT_SUCCESS;
   1089 }
   1090 
   1091 void VCDiffDeltaFileWindow::UpdateInstructionPointer(
   1092     ParseableChunk* parseable_chunk) {
   1093   if (IsInterleaved()) {
   1094     size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
   1095     // Reduce expected instruction segment length by bytes parsed
   1096     interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
   1097     parseable_chunk->Advance(bytes_parsed);
   1098   }
   1099 }
   1100 
   1101 inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
   1102   return parent_->decoded_target()->size() - target_window_start_pos_;
   1103 }
   1104 
   1105 size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
   1106   if (target_window_length_ == 0) {
   1107     // There is no window being decoded at present
   1108     return 0;
   1109   } else {
   1110     return target_window_length_ - TargetBytesDecoded();
   1111   }
   1112 }
   1113 
   1114 inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
   1115   parent_->decoded_target()->append(data, size);
   1116 }
   1117 
   1118 inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
   1119   parent_->decoded_target()->append(size, byte);
   1120 }
   1121 
   1122 VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
   1123   if (size > data_for_add_and_run_.UnparsedSize()) {
   1124     return RESULT_END_OF_DATA;
   1125   }
   1126   // Write the next "size" data bytes
   1127   CopyBytes(data_for_add_and_run_.UnparsedData(), size);
   1128   data_for_add_and_run_.Advance(size);
   1129   return RESULT_SUCCESS;
   1130 }
   1131 
   1132 VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
   1133   if (data_for_add_and_run_.Empty()) {
   1134     return RESULT_END_OF_DATA;
   1135   }
   1136   // Write "size" copies of the next data byte
   1137   RunByte(*data_for_add_and_run_.UnparsedData(), size);
   1138   data_for_add_and_run_.Advance(1);
   1139   return RESULT_SUCCESS;
   1140 }
   1141 
   1142 VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
   1143                                                unsigned char mode) {
   1144   // Keep track of the number of target bytes decoded as a local variable
   1145   // to avoid recalculating it each time it is needed.
   1146   size_t target_bytes_decoded = TargetBytesDecoded();
   1147   const VCDAddress here_address =
   1148       static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
   1149   const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
   1150       here_address,
   1151       mode,
   1152       addresses_for_copy_.UnparsedDataAddr(),
   1153       addresses_for_copy_.End());
   1154   switch (decoded_address) {
   1155     case RESULT_ERROR:
   1156       LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL;
   1157       return RESULT_ERROR;
   1158     case RESULT_END_OF_DATA:
   1159       return RESULT_END_OF_DATA;
   1160     default:
   1161       if ((decoded_address < 0) || (decoded_address > here_address)) {
   1162         LOG(DFATAL) << "Internal error: unexpected address " << decoded_address
   1163                     << " returned from DecodeAddress, with here_address = "
   1164                     << here_address << LOG_ENDL;
   1165         return RESULT_ERROR;
   1166       }
   1167       break;
   1168   }
   1169   size_t address = static_cast<size_t>(decoded_address);
   1170   if ((address + size) <= source_segment_length_) {
   1171     // Copy all data from source segment
   1172     CopyBytes(&source_segment_ptr_[address], size);
   1173     return RESULT_SUCCESS;
   1174   }
   1175   // Copy some data from target window...
   1176   if (address < source_segment_length_) {
   1177     // ... plus some data from source segment
   1178     const size_t partial_copy_size = source_segment_length_ - address;
   1179     CopyBytes(&source_segment_ptr_[address], partial_copy_size);
   1180     target_bytes_decoded += partial_copy_size;
   1181     address += partial_copy_size;
   1182     size -= partial_copy_size;
   1183   }
   1184   address -= source_segment_length_;
   1185   // address is now based at start of target window
   1186   const char* const target_segment_ptr = parent_->decoded_target()->data() +
   1187                                          target_window_start_pos_;
   1188   while (size > (target_bytes_decoded - address)) {
   1189     // Recursive copy that extends into the yet-to-be-copied target data
   1190     const size_t partial_copy_size = target_bytes_decoded - address;
   1191     CopyBytes(&target_segment_ptr[address], partial_copy_size);
   1192     target_bytes_decoded += partial_copy_size;
   1193     address += partial_copy_size;
   1194     size -= partial_copy_size;
   1195   }
   1196   CopyBytes(&target_segment_ptr[address], size);
   1197   return RESULT_SUCCESS;
   1198 }
   1199 
   1200 int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
   1201   if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
   1202                               != parseable_chunk->UnparsedData())) {
   1203     LOG(DFATAL) << "Internal error: interleaved format is used, but the"
   1204                    " input pointer does not point to the instructions section"
   1205                 << LOG_ENDL;
   1206     return RESULT_ERROR;
   1207   }
   1208   while (TargetBytesDecoded() < target_window_length_) {
   1209     int32_t decoded_size = VCD_INSTRUCTION_ERROR;
   1210     unsigned char mode = 0;
   1211     VCDiffInstructionType instruction =
   1212         reader_.GetNextInstruction(&decoded_size, &mode);
   1213     switch (instruction) {
   1214       case VCD_INSTRUCTION_END_OF_DATA:
   1215         UpdateInstructionPointer(parseable_chunk);
   1216         return RESULT_END_OF_DATA;
   1217       case VCD_INSTRUCTION_ERROR:
   1218         return RESULT_ERROR;
   1219       default:
   1220         break;
   1221     }
   1222     const size_t size = static_cast<size_t>(decoded_size);
   1223     // The value of "size" itself could be enormous (say, INT32_MAX)
   1224     // so check it individually against the limit to protect against
   1225     // overflow when adding it to something else.
   1226     if ((size > target_window_length_) ||
   1227         ((size + TargetBytesDecoded()) > target_window_length_)) {
   1228       LOG(ERROR) << VCDiffInstructionName(instruction)
   1229                  << " with size " << size
   1230                  << " plus existing " << TargetBytesDecoded()
   1231                  << " bytes of target data exceeds length of target"
   1232                     " window (" << target_window_length_ << " bytes)"
   1233                  << LOG_ENDL;
   1234       return RESULT_ERROR;
   1235     }
   1236     VCDiffResult result = RESULT_SUCCESS;
   1237     switch (instruction) {
   1238       case VCD_ADD:
   1239         result = DecodeAdd(size);
   1240         break;
   1241       case VCD_RUN:
   1242         result = DecodeRun(size);
   1243         break;
   1244       case VCD_COPY:
   1245         result = DecodeCopy(size, mode);
   1246         break;
   1247       default:
   1248         LOG(DFATAL) << "Unexpected instruction type " << instruction
   1249                     << "in opcode stream" << LOG_ENDL;
   1250         return RESULT_ERROR;
   1251     }
   1252     switch (result) {
   1253       case RESULT_END_OF_DATA:
   1254         reader_.UnGetInstruction();
   1255         UpdateInstructionPointer(parseable_chunk);
   1256         return RESULT_END_OF_DATA;
   1257       case RESULT_ERROR:
   1258         return RESULT_ERROR;
   1259       case RESULT_SUCCESS:
   1260         break;
   1261     }
   1262   }
   1263   if (TargetBytesDecoded() != target_window_length_) {
   1264     LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded()
   1265                << " bytes) does not match expected size ("
   1266                << target_window_length_ << " bytes)" << LOG_ENDL;
   1267     return RESULT_ERROR;
   1268   }
   1269   const char* const target_window_start =
   1270       parent_->decoded_target()->data() + target_window_start_pos_;
   1271   if (has_checksum_ &&
   1272       (ComputeAdler32(target_window_start, target_window_length_)
   1273            != expected_checksum_)) {
   1274     LOG(ERROR) << "Target data does not match checksum; this could mean "
   1275                   "that the wrong dictionary was used" << LOG_ENDL;
   1276     return RESULT_ERROR;
   1277   }
   1278   if (!instructions_and_sizes_.Empty()) {
   1279     LOG(ERROR) << "Excess instructions and sizes left over "
   1280                   "after decoding target window" << LOG_ENDL;
   1281       return RESULT_ERROR;
   1282   }
   1283   if (!IsInterleaved()) {
   1284     // Standard format is being used, with three separate sections for the
   1285     // instructions, data, and addresses.
   1286     if (!data_for_add_and_run_.Empty()) {
   1287       LOG(ERROR) << "Excess ADD/RUN data left over "
   1288                     "after decoding target window" << LOG_ENDL;
   1289         return RESULT_ERROR;
   1290     }
   1291     if (!addresses_for_copy_.Empty()) {
   1292       LOG(ERROR) << "Excess COPY addresses left over "
   1293                     "after decoding target window" << LOG_ENDL;
   1294         return RESULT_ERROR;
   1295     }
   1296     // Reached the end of the window.  Update the ParseableChunk to point to the
   1297     // end of the addresses section, which is the last section in the window.
   1298     parseable_chunk->SetPosition(addresses_for_copy_.End());
   1299   } else {
   1300     // Interleaved format is being used.
   1301     UpdateInstructionPointer(parseable_chunk);
   1302   }
   1303   return RESULT_SUCCESS;
   1304 }
   1305 
   1306 VCDiffResult VCDiffDeltaFileWindow::DecodeWindows(
   1307     ParseableChunk* parseable_chunk) {
   1308   if (!parent_) {
   1309     LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() "
   1310                    "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL;
   1311     return RESULT_ERROR;
   1312   }
   1313   while (!parseable_chunk->Empty()) {
   1314     if (!found_header_) {
   1315       switch (ReadHeader(parseable_chunk)) {
   1316         case RESULT_END_OF_DATA:
   1317           return RESULT_END_OF_DATA;
   1318         case RESULT_ERROR:
   1319           return RESULT_ERROR;
   1320         default:
   1321           // Reset address cache between windows (RFC section 5.1)
   1322           if (!parent_->addr_cache()->Init()) {
   1323             LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL;
   1324             return RESULT_ERROR;
   1325           }
   1326       }
   1327     } else {
   1328       // We are resuming a window that was partially decoded before a
   1329       // RESULT_END_OF_DATA was returned.  This can only happen on the first
   1330       // loop iteration, and only if the interleaved format is enabled and used.
   1331       if (!IsInterleaved()) {
   1332         LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window"
   1333                        " when interleaved format is not being used" << LOG_ENDL;
   1334         return RESULT_ERROR;
   1335       }
   1336       UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
   1337                                        parseable_chunk->End());
   1338       reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
   1339                              instructions_and_sizes_.End());
   1340     }
   1341     switch (DecodeBody(parseable_chunk)) {
   1342       case RESULT_END_OF_DATA:
   1343         if (MoreDataExpected()) {
   1344           return RESULT_END_OF_DATA;
   1345         } else {
   1346           LOG(ERROR) << "End of data reached while decoding VCDIFF delta file"
   1347                      << LOG_ENDL;
   1348           // fall through to RESULT_ERROR case
   1349         }
   1350       case RESULT_ERROR:
   1351         return RESULT_ERROR;
   1352       default:
   1353         break;  // DecodeBody succeeded
   1354     }
   1355     // Get ready to read a new delta window
   1356     Reset();
   1357     if (parent_->ReachedPlannedTargetFileSize()) {
   1358       // Found exactly the length we expected.  Stop decoding.
   1359       return RESULT_SUCCESS;
   1360     }
   1361   }
   1362   return RESULT_SUCCESS;
   1363 }
   1364 
   1365 // *** Methods for VCDiffStreamingDecoder
   1366 
   1367 VCDiffStreamingDecoder::VCDiffStreamingDecoder()
   1368 : impl_(new VCDiffStreamingDecoderImpl) { }
   1369 
   1370 VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
   1371 
   1372 void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
   1373   impl_->StartDecoding(source, len);
   1374 }
   1375 
   1376 bool VCDiffStreamingDecoder::DecodeChunkToInterface(
   1377     const char* data,
   1378     size_t len,
   1379     OutputStringInterface* output_string) {
   1380   return impl_->DecodeChunk(data, len, output_string);
   1381 }
   1382 
   1383 bool VCDiffStreamingDecoder::FinishDecoding() {
   1384   return impl_->FinishDecoding();
   1385 }
   1386 
   1387 bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
   1388     size_t new_maximum_target_file_size) {
   1389   return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
   1390 }
   1391 
   1392 bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
   1393     size_t new_maximum_target_window_size) {
   1394   return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
   1395 }
   1396 
   1397 void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
   1398   impl_->SetAllowVcdTarget(allow_vcd_target);
   1399 }
   1400 
   1401 bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
   1402                                       size_t dictionary_size,
   1403                                       const string& encoding,
   1404                                       OutputStringInterface* target) {
   1405   target->clear();
   1406   decoder_.StartDecoding(dictionary_ptr, dictionary_size);
   1407   if (!decoder_.DecodeChunkToInterface(encoding.data(),
   1408                                        encoding.size(),
   1409                                        target)) {
   1410     return false;
   1411   }
   1412   return decoder_.FinishDecoding();
   1413 }
   1414 
   1415 }  // namespace open_vcdiff
   1416