Home | History | Annotate | Download | only in bsdiff
      1 /*-
      2  * Copyright 2003-2005 Colin Percival
      3  * All rights reserved
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted providing that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     24  * POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #if 0
     28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
     29 #endif
     30 
     31 #include "bsdiff/bspatch.h"
     32 
     33 #include <errno.h>
     34 #include <fcntl.h>
     35 #include <inttypes.h>
     36 #include <stdio.h>
     37 #include <stdlib.h>
     38 #include <string.h>
     39 #include <sys/stat.h>
     40 #include <sys/types.h>
     41 #include <unistd.h>
     42 
     43 #include <algorithm>
     44 #include <memory>
     45 #include <vector>
     46 
     47 #include "bsdiff/buffer_file.h"
     48 #include "bsdiff/control_entry.h"
     49 #include "bsdiff/extents.h"
     50 #include "bsdiff/extents_file.h"
     51 #include "bsdiff/file.h"
     52 #include "bsdiff/file_interface.h"
     53 #include "bsdiff/logging.h"
     54 #include "bsdiff/memory_file.h"
     55 #include "bsdiff/patch_reader.h"
     56 #include "bsdiff/sink_file.h"
     57 #include "bsdiff/utils.h"
     58 
     59 namespace {
     60 // Read the data in |stream| and write |size| decompressed data to |file|;
     61 // using the buffer in |buf| with size |buf_size|.
     62 // Returns 0 on success, 1 on I/O error and 2 on data error.
     63 int ReadStreamAndWriteAll(
     64     const std::unique_ptr<bsdiff::FileInterface>& file,
     65     size_t size,
     66     uint8_t* buf,
     67     size_t buf_size,
     68     const std::function<bool(uint8_t*, size_t)>& read_func) {
     69   while (size > 0) {
     70     size_t bytes_to_output = std::min(size, buf_size);
     71     if (!read_func(buf, bytes_to_output)) {
     72       LOG(ERROR) << "Failed to read stream.";
     73       return 2;
     74     }
     75 
     76     if (!WriteAll(file, buf, bytes_to_output)) {
     77       PLOG(ERROR) << "WriteAll() failed.";
     78       return 1;
     79     }
     80     size -= bytes_to_output;
     81   }
     82   return 0;
     83 }
     84 
     85 }  // namespace
     86 
     87 namespace bsdiff {
     88 
     89 bool ReadAll(const std::unique_ptr<FileInterface>& file,
     90              uint8_t* data,
     91              size_t size) {
     92   size_t offset = 0, read;
     93   while (offset < size) {
     94     if (!file->Read(data + offset, size - offset, &read) || read == 0)
     95       return false;
     96     offset += read;
     97   }
     98   return true;
     99 }
    100 
    101 bool WriteAll(const std::unique_ptr<FileInterface>& file,
    102               const uint8_t* data,
    103               size_t size) {
    104   size_t offset = 0, written;
    105   while (offset < size) {
    106     if (!file->Write(data + offset, size - offset, &written) || written == 0)
    107       return false;
    108     offset += written;
    109   }
    110   return true;
    111 }
    112 
    113 bool IsOverlapping(const char* old_filename,
    114                    const char* new_filename,
    115                    const std::vector<ex_t>& old_extents,
    116                    const std::vector<ex_t>& new_extents) {
    117   struct stat old_stat, new_stat;
    118   if (stat(new_filename, &new_stat) == -1) {
    119     if (errno == ENOENT)
    120       return false;
    121     PLOG(ERROR) << "Error stat the new file: " << new_filename;
    122     return true;
    123   }
    124   if (stat(old_filename, &old_stat) == -1) {
    125     PLOG(ERROR) << "Error stat the old file: " << old_filename;
    126     return true;
    127   }
    128 
    129   if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino)
    130     return false;
    131 
    132   if (old_extents.empty() && new_extents.empty())
    133     return true;
    134 
    135   for (ex_t old_ex : old_extents)
    136     for (ex_t new_ex : new_extents)
    137       if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len &&
    138           static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len)
    139         return true;
    140 
    141   return false;
    142 }
    143 
    144 // Patch |old_filename| with |patch_filename| and save it to |new_filename|.
    145 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
    146 // extents of |old_filename| and |new_filename|.
    147 // Returns 0 on success, 1 on I/O error and 2 on data error.
    148 int bspatch(const char* old_filename,
    149             const char* new_filename,
    150             const char* patch_filename,
    151             const char* old_extents,
    152             const char* new_extents) {
    153   std::unique_ptr<FileInterface> patch_file =
    154       File::FOpen(patch_filename, O_RDONLY);
    155   if (!patch_file) {
    156     PLOG(ERROR) << "Error opening the patch file: " << patch_filename;
    157     return 1;
    158   }
    159   uint64_t patch_size;
    160   patch_file->GetSize(&patch_size);
    161   std::vector<uint8_t> patch(patch_size);
    162   if (!ReadAll(patch_file, patch.data(), patch_size)) {
    163     PLOG(ERROR) << "Error reading the patch file: " << patch_filename;
    164     return 1;
    165   }
    166   patch_file.reset();
    167 
    168   return bspatch(old_filename, new_filename, patch.data(), patch_size,
    169                  old_extents, new_extents);
    170 }
    171 
    172 // Patch |old_filename| with |patch_data| and save it to |new_filename|.
    173 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
    174 // extents of |old_filename| and |new_filename|.
    175 // Returns 0 on success, 1 on I/O error and 2 on data error.
    176 int bspatch(const char* old_filename,
    177             const char* new_filename,
    178             const uint8_t* patch_data,
    179             size_t patch_size,
    180             const char* old_extents,
    181             const char* new_extents) {
    182   int using_extents = (old_extents != NULL || new_extents != NULL);
    183 
    184   // Open input file for reading.
    185   std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY);
    186   if (!old_file) {
    187     PLOG(ERROR) << "Error opening the old file: " << old_filename;
    188     return 1;
    189   }
    190 
    191   std::vector<ex_t> parsed_old_extents;
    192   if (using_extents) {
    193     if (!ParseExtentStr(old_extents, &parsed_old_extents)) {
    194       LOG(ERROR) << "Error parsing the old extents.";
    195       return 2;
    196     }
    197     old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents));
    198   }
    199 
    200   // Open output file for writing.
    201   std::unique_ptr<FileInterface> new_file =
    202       File::FOpen(new_filename, O_CREAT | O_WRONLY);
    203   if (!new_file) {
    204     PLOG(ERROR) << "Error opening the new file: " << new_filename;
    205     return 1;
    206   }
    207 
    208   std::vector<ex_t> parsed_new_extents;
    209   if (using_extents) {
    210     if (!ParseExtentStr(new_extents, &parsed_new_extents)) {
    211       LOG(ERROR) << "Error parsing the new extents.";
    212       return 2;
    213     }
    214     new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents));
    215   }
    216 
    217   if (IsOverlapping(old_filename, new_filename, parsed_old_extents,
    218                     parsed_new_extents)) {
    219     // New and old file is overlapping, we can not stream output to new file,
    220     // cache it in a buffer and write to the file at the end.
    221     uint64_t newsize = ParseInt64(patch_data + 24);
    222     new_file.reset(new BufferFile(std::move(new_file), newsize));
    223   }
    224 
    225   return bspatch(old_file, new_file, patch_data, patch_size);
    226 }
    227 
    228 // Patch |old_data| with |patch_data| and save it by calling sink function.
    229 // Returns 0 on success, 1 on I/O error and 2 on data error.
    230 int bspatch(const uint8_t* old_data,
    231             size_t old_size,
    232             const uint8_t* patch_data,
    233             size_t patch_size,
    234             const sink_func& sink) {
    235   std::unique_ptr<FileInterface> old_file(new MemoryFile(old_data, old_size));
    236   std::unique_ptr<FileInterface> new_file(new SinkFile(sink));
    237 
    238   return bspatch(old_file, new_file, patch_data, patch_size);
    239 }
    240 
    241 // Patch |old_file| with |patch_data| and save it to |new_file|.
    242 // Returns 0 on success, 1 on I/O error and 2 on data error.
    243 int bspatch(const std::unique_ptr<FileInterface>& old_file,
    244             const std::unique_ptr<FileInterface>& new_file,
    245             const uint8_t* patch_data,
    246             size_t patch_size) {
    247   BsdiffPatchReader patch_reader;
    248   if (!patch_reader.Init(patch_data, patch_size)) {
    249     LOG(ERROR) << "Failed to initialize patch reader.";
    250     return 2;
    251   }
    252 
    253   uint64_t old_file_size;
    254   if (!old_file->GetSize(&old_file_size)) {
    255     LOG(ERROR) << "Cannot obtain the size of old file.";
    256     return 1;
    257   }
    258 
    259   // The oldpos can be negative, but the new pos is only incremented linearly.
    260   int64_t oldpos = 0;
    261   uint64_t newpos = 0;
    262   std::vector<uint8_t> old_buf(1024 * 1024);
    263   std::vector<uint8_t> new_buf(1024 * 1024);
    264   uint64_t old_file_pos = 0;
    265   while (newpos < patch_reader.new_file_size()) {
    266     ControlEntry control_entry(0, 0, 0);
    267     if (!patch_reader.ParseControlEntry(&control_entry)) {
    268       LOG(ERROR) << "Failed to read control stream.";
    269       return 2;
    270     }
    271 
    272     // Sanity-check.
    273     if (newpos + control_entry.diff_size > patch_reader.new_file_size()) {
    274       LOG(ERROR) << "Corrupt patch.";
    275       return 2;
    276     }
    277 
    278     int ret = 0;
    279     // Add old data to diff string. It is enough to fseek once, at
    280     // the beginning of the sequence, to avoid unnecessary overhead.
    281     int64_t seek_offset = oldpos;
    282     if (seek_offset < 0) {
    283       // Write diff block directly to new file without adding old data,
    284       // because we will skip part where |oldpos| < 0.
    285       ret = ReadStreamAndWriteAll(
    286           new_file, oldpos - old_file_size, new_buf.data(), new_buf.size(),
    287           std::bind(&BsdiffPatchReader::ReadDiffStream, &patch_reader,
    288                     std::placeholders::_1, std::placeholders::_2));
    289       if (ret)
    290         return ret;
    291       seek_offset = 0;
    292     }
    293 
    294     // We just checked that |seek_offset| is not negative.
    295     if (static_cast<uint64_t>(seek_offset) != old_file_pos &&
    296         !old_file->Seek(seek_offset)) {
    297       PLOG(ERROR) << "Error seeking input file to offset: " << seek_offset;
    298       return 1;
    299     }
    300 
    301     old_file_pos =
    302         std::min<uint64_t>(oldpos + control_entry.diff_size, old_file_size);
    303     size_t chunk_size = old_file_pos - seek_offset;
    304     while (chunk_size > 0) {
    305       size_t read_bytes;
    306       size_t bytes_to_read = std::min(chunk_size, old_buf.size());
    307       if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes)) {
    308         PLOG(ERROR) << "Error reading from input file.";
    309         return 1;
    310       }
    311       if (!read_bytes) {
    312         LOG(ERROR) << "EOF reached while reading from input file.";
    313         return 2;
    314       }
    315       // Read same amount of bytes from diff block
    316       if (!patch_reader.ReadDiffStream(new_buf.data(), read_bytes)) {
    317         LOG(ERROR) << "Failed to read diff stream.";
    318         return 2;
    319       }
    320       // new_buf already has data from diff block, adds old data to it.
    321       for (size_t k = 0; k < read_bytes; k++)
    322         new_buf[k] += old_buf[k];
    323       if (!WriteAll(new_file, new_buf.data(), read_bytes)) {
    324         PLOG(ERROR) << "Error writing to new file.";
    325         return 1;
    326       }
    327       chunk_size -= read_bytes;
    328     }
    329 
    330     // Adjust pointers.
    331     newpos += control_entry.diff_size;
    332     oldpos += control_entry.diff_size;
    333 
    334     if (oldpos > static_cast<int64_t>(old_file_size)) {
    335       // Write diff block directly to new file without adding old data,
    336       // because we skipped part where |oldpos| > old_file_size.
    337       ret = ReadStreamAndWriteAll(
    338           new_file, oldpos - old_file_size, new_buf.data(), new_buf.size(),
    339           std::bind(&BsdiffPatchReader::ReadDiffStream, &patch_reader,
    340                     std::placeholders::_1, std::placeholders::_2));
    341       if (ret)
    342         return ret;
    343     }
    344 
    345     // Sanity-check.
    346     if (newpos + control_entry.extra_size > patch_reader.new_file_size()) {
    347       LOG(ERROR) << "Corrupt patch.";
    348       return 2;
    349     }
    350 
    351     // Read extra block.
    352     ret = ReadStreamAndWriteAll(
    353         new_file, control_entry.extra_size, new_buf.data(), new_buf.size(),
    354         std::bind(&BsdiffPatchReader::ReadExtraStream, &patch_reader,
    355                   std::placeholders::_1, std::placeholders::_2));
    356     if (ret)
    357       return ret;
    358 
    359     // Adjust pointers.
    360     newpos += control_entry.extra_size;
    361     oldpos += control_entry.offset_increment;
    362   }
    363 
    364   // Close input file.
    365   old_file->Close();
    366 
    367   if (!patch_reader.Finish()) {
    368     LOG(ERROR) << "Failed to finish the patch reader.";
    369     return 2;
    370   }
    371 
    372   if (!new_file->Close()) {
    373     PLOG(ERROR) << "Error closing new file.";
    374     return 1;
    375   }
    376 
    377   return 0;
    378 }
    379 
    380 }  // namespace bsdiff
    381