Home | History | Annotate | Download | only in bsdiff
      1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef _BSDIFF_ENDSLEY_PATCH_WRITER_H_
      6 #define _BSDIFF_ENDSLEY_PATCH_WRITER_H_
      7 
      8 #include <memory>
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "bsdiff/compressor_interface.h"
     13 #include "bsdiff/constants.h"
     14 #include "bsdiff/patch_writer_interface.h"
     15 
     16 namespace bsdiff {
     17 
     18 // A PatchWriterInterface class compatible with the format used by Android Play
     19 // Store's bsdiff implementation, which is based on Matthew Endsley's bsdiff
     20 // implementation. See https://github.com/mendsley/bsdiff for the original
     21 // implementation of this format. See also Google's APK patch size estimator for
     22 // more information on the file-by-file format used by Play Store:
     23 // https://github.com/googlesamples/apk-patch-size-estimator
     24 
     25 // This format, identified by the "ENDSLEY/BSDIFF43" magic string, uses a single
     26 // stream with the control entries, diff data and extra data interleaved. After
     27 // the header, each Control Entry is stored in 24 bytes followed by the diff
     28 // stream data for that entry only, and then followed by the extra stream data
     29 // for that entry only. The format doesn't handle the compression of the data,
     30 // instead, the whole file (including the magic string) is compressed with any
     31 // compression algorithm.
     32 
     33 // This format is easier to parse and allows the patch to be streamed, but by
     34 // mixing the diff and extra data into the same compression context offers a
     35 // slightly worse compression ratio (about 3.5% compared to upstream's format).
     36 
     37 class EndsleyPatchWriter : public PatchWriterInterface {
     38  public:
     39   // Create the patch writer that will write the data to the passed vector
     40   // |patch|, resizing it as needed. The |patch| vector must be valid until
     41   // Close() is called or this patch is destroyed. The data in |patch| will be
     42   // compressed using the compressor type |type|.
     43   EndsleyPatchWriter(std::vector<uint8_t>* patch,
     44                      CompressorType type,
     45                      int quality)
     46       : patch_(patch), compressor_type_(type), quality_(quality) {}
     47 
     48   // PatchWriterInterface overrides.
     49   bool Init(size_t new_size) override;
     50   bool WriteDiffStream(const uint8_t* data, size_t size) override;
     51   bool WriteExtraStream(const uint8_t* data, size_t size) override;
     52   bool AddControlEntry(const ControlEntry& entry) override;
     53   bool Close() override;
     54 
     55  private:
     56   // Emit at the end of the |patch_| vector the passed control entry.
     57   void EmitControlEntry(const ControlEntry& entry);
     58 
     59   // Emit at the end of the |patch_| vector the passed buffer.
     60   void EmitBuffer(const uint8_t* data, size_t size);
     61 
     62   // Flush as much as possible of the pending data.
     63   void Flush();
     64 
     65   // The vector we are writing to, owned by the caller.
     66   std::vector<uint8_t>* patch_;
     67 
     68   // The compressor type to use and its quality (if any).
     69   CompressorType compressor_type_;
     70   int quality_;
     71 
     72   std::unique_ptr<CompressorInterface> compressor_;
     73 
     74   // The pending diff and extra data to be encoded in the file. These vectors
     75   // would not be used whenever is possible to the data directly to the patch_
     76   // vector; namely when the control, diff and extra stream data are provided in
     77   // that order for each control entry.
     78   std::vector<uint8_t> diff_data_;
     79   std::vector<uint8_t> extra_data_;
     80   std::vector<ControlEntry> control_;
     81 
     82   // Defined as the sum of all the diff_size and extra_size values in
     83   // |control_|. This is used to determine whether it is worth Flushing the
     84   // pending data.
     85   size_t pending_control_data_{0};
     86 
     87   // Number of bytes in the diff and extra stream that are pending in the
     88   // last control entry encoded in the |patch_|. If both are zero the last
     89   // control entry was completely emitted.
     90   size_t pending_diff_{0};
     91   size_t pending_extra_{0};
     92 };
     93 
     94 }  // namespace bsdiff
     95 
     96 #endif  // _BSDIFF_ENDSLEY_PATCH_WRITER_H_
     97