Home | History | Annotate | Download | only in applypatch
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef _APPLYPATCH_IMGDIFF_IMAGE_H
     18 #define _APPLYPATCH_IMGDIFF_IMAGE_H
     19 
     20 #include <stddef.h>
     21 #include <stdio.h>
     22 #include <sys/types.h>
     23 
     24 #include <string>
     25 #include <vector>
     26 
     27 #include <bsdiff/bsdiff.h>
     28 #include <ziparchive/zip_archive.h>
     29 #include <zlib.h>
     30 
     31 #include "imgdiff.h"
     32 #include "otautil/rangeset.h"
     33 
     34 class ImageChunk {
     35  public:
     36   static constexpr auto WINDOWBITS = -15;  // 32kb window; negative to indicate a raw stream.
     37   static constexpr auto MEMLEVEL = 8;      // the default value.
     38   static constexpr auto METHOD = Z_DEFLATED;
     39   static constexpr auto STRATEGY = Z_DEFAULT_STRATEGY;
     40 
     41   ImageChunk(int type, size_t start, const std::vector<uint8_t>* file_content, size_t raw_data_len,
     42              std::string entry_name = {});
     43 
     44   int GetType() const {
     45     return type_;
     46   }
     47   size_t GetRawDataLength() const {
     48     return raw_data_len_;
     49   }
     50   const std::string& GetEntryName() const {
     51     return entry_name_;
     52   }
     53   size_t GetStartOffset() const {
     54     return start_;
     55   }
     56   int GetCompressLevel() const {
     57     return compress_level_;
     58   }
     59 
     60   // CHUNK_DEFLATE will return the uncompressed data for diff, while other types will simply return
     61   // the raw data.
     62   const uint8_t* DataForPatch() const;
     63   size_t DataLengthForPatch() const;
     64 
     65   void Dump(size_t index) const;
     66 
     67   void SetUncompressedData(std::vector<uint8_t> data);
     68   bool SetBonusData(const std::vector<uint8_t>& bonus_data);
     69 
     70   bool operator==(const ImageChunk& other) const;
     71   bool operator!=(const ImageChunk& other) const {
     72     return !(*this == other);
     73   }
     74 
     75   /*
     76    * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob of uninterpreted data).
     77    * The resulting patch will likely be about as big as the target file, but it lets us handle
     78    * the case of images where some gzip chunks are reconstructible but others aren't (by treating
     79    * the ones that aren't as normal chunks).
     80    */
     81   void ChangeDeflateChunkToNormal();
     82 
     83   /*
     84    * Verify that we can reproduce exactly the same compressed data that we started with.  Sets the
     85    * level, method, windowBits, memLevel, and strategy fields in the chunk to the encoding
     86    * parameters needed to produce the right output.
     87    */
     88   bool ReconstructDeflateChunk();
     89   bool IsAdjacentNormal(const ImageChunk& other) const;
     90   void MergeAdjacentNormal(const ImageChunk& other);
     91 
     92   /*
     93    * Compute a bsdiff patch between |src| and |tgt|; Store the result in the patch_data.
     94    * |bsdiff_cache| can be used to cache the suffix array if the same |src| chunk is used
     95    * repeatedly, pass nullptr if not needed.
     96    */
     97   static bool MakePatch(const ImageChunk& tgt, const ImageChunk& src,
     98                         std::vector<uint8_t>* patch_data,
     99                         bsdiff::SuffixArrayIndexInterface** bsdiff_cache);
    100 
    101  private:
    102   const uint8_t* GetRawData() const;
    103   bool TryReconstruction(int level);
    104 
    105   int type_;                                    // CHUNK_NORMAL, CHUNK_DEFLATE, CHUNK_RAW
    106   size_t start_;                                // offset of chunk in the original input file
    107   const std::vector<uint8_t>* input_file_ptr_;  // ptr to the full content of original input file
    108   size_t raw_data_len_;
    109 
    110   // deflate encoder parameters
    111   int compress_level_;
    112 
    113   // --- for CHUNK_DEFLATE chunks only: ---
    114   std::vector<uint8_t> uncompressed_data_;
    115   std::string entry_name_;  // used for zip entries
    116 };
    117 
    118 // PatchChunk stores the patch data between a source chunk and a target chunk. It also keeps track
    119 // of the metadata of src&tgt chunks (e.g. offset, raw data length, uncompressed data length).
    120 class PatchChunk {
    121  public:
    122   PatchChunk(const ImageChunk& tgt, const ImageChunk& src, std::vector<uint8_t> data);
    123 
    124   // Construct a CHUNK_RAW patch from the target data directly.
    125   explicit PatchChunk(const ImageChunk& tgt);
    126 
    127   // Return true if raw data size is smaller than the patch size.
    128   static bool RawDataIsSmaller(const ImageChunk& tgt, size_t patch_size);
    129 
    130   // Update the source start with the new offset within the source range.
    131   void UpdateSourceOffset(const SortedRangeSet& src_range);
    132 
    133   // Return the total size (header + data) of the patch.
    134   size_t PatchSize() const;
    135 
    136   static bool WritePatchDataToFd(const std::vector<PatchChunk>& patch_chunks, int patch_fd);
    137 
    138  private:
    139   size_t GetHeaderSize() const;
    140   size_t WriteHeaderToFd(int fd, size_t offset, size_t index) const;
    141 
    142   // The patch chunk type is the same as the target chunk type. The only exception is we change
    143   // the |type_| to CHUNK_RAW if target length is smaller than the patch size.
    144   int type_;
    145 
    146   size_t source_start_;
    147   size_t source_len_;
    148   size_t source_uncompressed_len_;
    149 
    150   size_t target_start_;  // offset of the target chunk within the target file
    151   size_t target_len_;
    152   size_t target_uncompressed_len_;
    153   size_t target_compress_level_;  // the deflate compression level of the target chunk.
    154 
    155   std::vector<uint8_t> data_;  // storage for the patch data
    156 };
    157 
    158 // Interface for zip_mode and image_mode images. We initialize the image from an input file and
    159 // split the file content into a list of image chunks.
    160 class Image {
    161  public:
    162   explicit Image(bool is_source) : is_source_(is_source) {}
    163 
    164   virtual ~Image() {}
    165 
    166   // Create a list of image chunks from input file.
    167   virtual bool Initialize(const std::string& filename) = 0;
    168 
    169   // Look for runs of adjacent normal chunks and compress them down into a single chunk.  (Such
    170   // runs can be produced when deflate chunks are changed to normal chunks.)
    171   void MergeAdjacentNormalChunks();
    172 
    173   void DumpChunks() const;
    174 
    175   // Non const iterators to access the stored ImageChunks.
    176   std::vector<ImageChunk>::iterator begin() {
    177     return chunks_.begin();
    178   }
    179 
    180   std::vector<ImageChunk>::iterator end() {
    181     return chunks_.end();
    182   }
    183 
    184   std::vector<ImageChunk>::const_iterator cbegin() const {
    185     return chunks_.cbegin();
    186   }
    187 
    188   std::vector<ImageChunk>::const_iterator cend() const {
    189     return chunks_.cend();
    190   }
    191 
    192   ImageChunk& operator[](size_t i);
    193   const ImageChunk& operator[](size_t i) const;
    194 
    195   size_t NumOfChunks() const {
    196     return chunks_.size();
    197   }
    198 
    199  protected:
    200   bool ReadFile(const std::string& filename, std::vector<uint8_t>* file_content);
    201 
    202   bool is_source_;                     // True if it's for source chunks.
    203   std::vector<ImageChunk> chunks_;     // Internal storage of ImageChunk.
    204   std::vector<uint8_t> file_content_;  // Store the whole input file in memory.
    205 };
    206 
    207 class ZipModeImage : public Image {
    208  public:
    209   explicit ZipModeImage(bool is_source, size_t limit = 0) : Image(is_source), limit_(limit) {}
    210 
    211   bool Initialize(const std::string& filename) override;
    212 
    213   // Initialize a dummy ZipModeImage from an existing ImageChunk vector. For src img pieces, we
    214   // reconstruct a new file_content based on the source ranges; but it's not needed for the tgt img
    215   // pieces; because for each chunk both the data and their offset within the file are unchanged.
    216   void Initialize(const std::vector<ImageChunk>& chunks, const std::vector<uint8_t>& file_content) {
    217     chunks_ = chunks;
    218     file_content_ = file_content;
    219   }
    220 
    221   // The pesudo source chunk for bsdiff if there's no match for the given target chunk. It's in
    222   // fact the whole source file.
    223   ImageChunk PseudoSource() const;
    224 
    225   // Find the matching deflate source chunk by entry name. Search for normal chunks also if
    226   // |find_normal| is true.
    227   ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false);
    228 
    229   const ImageChunk* FindChunkByName(const std::string& name, bool find_normal = false) const;
    230 
    231   // Verify that we can reconstruct the deflate chunks; also change the type to CHUNK_NORMAL if
    232   // src and tgt are identical.
    233   static bool CheckAndProcessChunks(ZipModeImage* tgt_image, ZipModeImage* src_image);
    234 
    235   // Compute the patch between tgt & src images, and write the data into |patch_name|.
    236   static bool GeneratePatches(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
    237                               const std::string& patch_name);
    238 
    239   // Compute the patch based on the lists of split src and tgt images. Generate patches for each
    240   // pair of split pieces and write the data to |patch_name|. If |debug_dir| is specified, write
    241   // each split src data and patch data into that directory.
    242   static bool GeneratePatches(const std::vector<ZipModeImage>& split_tgt_images,
    243                               const std::vector<ZipModeImage>& split_src_images,
    244                               const std::vector<SortedRangeSet>& split_src_ranges,
    245                               const std::string& patch_name, const std::string& split_info_file,
    246                               const std::string& debug_dir);
    247 
    248   // Split the tgt chunks and src chunks based on the size limit.
    249   static bool SplitZipModeImageWithLimit(const ZipModeImage& tgt_image,
    250                                          const ZipModeImage& src_image,
    251                                          std::vector<ZipModeImage>* split_tgt_images,
    252                                          std::vector<ZipModeImage>* split_src_images,
    253                                          std::vector<SortedRangeSet>* split_src_ranges);
    254 
    255  private:
    256   // Initialize image chunks based on the zip entries.
    257   bool InitializeChunks(const std::string& filename, ZipArchiveHandle handle);
    258   // Add the a zip entry to the list.
    259   bool AddZipEntryToChunks(ZipArchiveHandle handle, const std::string& entry_name, ZipEntry* entry);
    260   // Return the real size of the zip file. (omit the trailing zeros that used for alignment)
    261   bool GetZipFileSize(size_t* input_file_size);
    262 
    263   static void ValidateSplitImages(const std::vector<ZipModeImage>& split_tgt_images,
    264                                   const std::vector<ZipModeImage>& split_src_images,
    265                                   std::vector<SortedRangeSet>& split_src_ranges,
    266                                   size_t total_tgt_size);
    267   // Construct the dummy split images based on the chunks info and source ranges; and move them into
    268   // the given vectors. Return true if we add a new split image into |split_tgt_images|, and
    269   // false otherwise.
    270   static bool AddSplitImageFromChunkList(const ZipModeImage& tgt_image,
    271                                          const ZipModeImage& src_image,
    272                                          const SortedRangeSet& split_src_ranges,
    273                                          const std::vector<ImageChunk>& split_tgt_chunks,
    274                                          const std::vector<ImageChunk>& split_src_chunks,
    275                                          std::vector<ZipModeImage>* split_tgt_images,
    276                                          std::vector<ZipModeImage>* split_src_images);
    277 
    278   // Function that actually iterates the tgt_chunks and makes patches.
    279   static bool GeneratePatchesInternal(const ZipModeImage& tgt_image, const ZipModeImage& src_image,
    280                                       std::vector<PatchChunk>* patch_chunks);
    281 
    282   // size limit in bytes of each chunk. Also, if the length of one zip_entry exceeds the limit,
    283   // we'll split that entry into several smaller chunks in advance.
    284   size_t limit_;
    285 };
    286 
    287 class ImageModeImage : public Image {
    288  public:
    289   explicit ImageModeImage(bool is_source) : Image(is_source) {}
    290 
    291   // Initialize the image chunks list by searching the magic numbers in an image file.
    292   bool Initialize(const std::string& filename) override;
    293 
    294   bool SetBonusData(const std::vector<uint8_t>& bonus_data);
    295 
    296   // In Image Mode, verify that the source and target images have the same chunk structure (ie, the
    297   // same sequence of deflate and normal chunks).
    298   static bool CheckAndProcessChunks(ImageModeImage* tgt_image, ImageModeImage* src_image);
    299 
    300   // In image mode, generate patches against the given source chunks and bonus_data; write the
    301   // result to |patch_name|.
    302   static bool GeneratePatches(const ImageModeImage& tgt_image, const ImageModeImage& src_image,
    303                               const std::string& patch_name);
    304 };
    305 
    306 #endif  // _APPLYPATCH_IMGDIFF_IMAGE_H
    307