Home | History | Annotate | Download | only in bsdiff
      1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef _BSDIFF_ENDSLEY_PATCH_WRITER_H_
      6 #define _BSDIFF_ENDSLEY_PATCH_WRITER_H_
      7 
      8 #include <memory>
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "bsdiff/compressor_interface.h"
     13 #include "bsdiff/constants.h"
     14 #include "bsdiff/patch_writer_interface.h"
     15 
     16 namespace bsdiff {
     17 
     18 // A PatchWriterInterface class compatible with the format used by Android Play
     19 // Store's bsdiff implementation, which is based on Matthew Endsley's bsdiff
     20 // implementation. See https://github.com/mendsley/bsdiff for the original
     21 // implementation of this format. See also Google's APK patch size estimator for
     22 // more information on the file-by-file format used by Play Store:
     23 // https://github.com/googlesamples/apk-patch-size-estimator
     24 
     25 // This format, identified by the "ENDSLEY/BSDIFF43" magic string, uses a single
     26 // stream with the control entries, diff data and extra data interleaved. After
     27 // the header, each Control Entry is stored in 24 bytes followed by the diff
     28 // stream data for that entry only, and then followed by the extra stream data
     29 // for that entry only. The format doesn't handle the compression of the data,
     30 // instead, the whole file (including the magic string) is compressed with any
     31 // compression algorithm.
     32 
     33 // This format is easier to parse and allows the patch to be streamed, but by
     34 // mixing the diff and extra data into the same compression context offers a
     35 // slightly worse compression ratio (about 3.5% compared to upstream's format).
     36 
     37 class EndsleyPatchWriter : public PatchWriterInterface {
     38  public:
     39   // Create the patch writer that will write the data to the passed vector
     40   // |patch|, resizing it as needed. The |patch| vector must be valid until
     41   // Close() is called or this patch is destroyed. The data in |patch| will be
     42   // compressed using the compressor type |type|.
     43   EndsleyPatchWriter(std::vector<uint8_t>* patch,
     44                      CompressorType type,
     45                      int brotli_quality)
     46       : patch_(patch),
     47         compressor_type_(type),
     48         brotli_quality_(brotli_quality) {}
     49 
     50   // PatchWriterInterface overrides.
     51   bool Init(size_t new_size) override;
     52   bool WriteDiffStream(const uint8_t* data, size_t size) override;
     53   bool WriteExtraStream(const uint8_t* data, size_t size) override;
     54   bool AddControlEntry(const ControlEntry& entry) override;
     55   bool Close() override;
     56 
     57  private:
     58   // Emit at the end of the |patch_| vector the passed control entry.
     59   void EmitControlEntry(const ControlEntry& entry);
     60 
     61   // Emit at the end of the |patch_| vector the passed buffer.
     62   void EmitBuffer(const uint8_t* data, size_t size);
     63 
     64   // Flush as much as possible of the pending data.
     65   void Flush();
     66 
     67   // The vector we are writing to, owned by the caller.
     68   std::vector<uint8_t>* patch_;
     69 
     70   // The compressor type to use and its quality (if any).
     71   CompressorType compressor_type_;
     72   int brotli_quality_;
     73 
     74   std::unique_ptr<CompressorInterface> compressor_;
     75 
     76   // The pending diff and extra data to be encoded in the file. These vectors
     77   // would not be used whenever is possible to the data directly to the patch_
     78   // vector; namely when the control, diff and extra stream data are provided in
     79   // that order for each control entry.
     80   std::vector<uint8_t> diff_data_;
     81   std::vector<uint8_t> extra_data_;
     82   std::vector<ControlEntry> control_;
     83 
     84   // Defined as the sum of all the diff_size and extra_size values in
     85   // |control_|. This is used to determine whether it is worth Flushing the
     86   // pending data.
     87   size_t pending_control_data_{0};
     88 
     89   // Number of bytes in the diff and extra stream that are pending in the
     90   // last control entry encoded in the |patch_|. If both are zero the last
     91   // control entry was completely emitted.
     92   size_t pending_diff_{0};
     93   size_t pending_extra_{0};
     94 };
     95 
     96 }  // namespace bsdiff
     97 
     98 #endif  // _BSDIFF_ENDSLEY_PATCH_WRITER_H_
     99