Home | History | Annotate | Download | only in bsdiff
      1 /*-
      2  * Copyright 2003-2005 Colin Percival
      3  * All rights reserved
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted providing that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     24  * POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #if 0
     28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
     29 #endif
     30 
     31 #include "bspatch.h"
     32 
     33 #include <bzlib.h>
     34 #include <errno.h>
     35 #include <fcntl.h>
     36 #include <inttypes.h>
     37 #include <stdlib.h>
     38 #include <string.h>
     39 #include <unistd.h>
     40 #include <sys/stat.h>
     41 #include <sys/types.h>
     42 
     43 #include <algorithm>
     44 #include <memory>
     45 #include <limits>
     46 #include <vector>
     47 
     48 #include "buffer_file.h"
     49 #include "extents.h"
     50 #include "extents_file.h"
     51 #include "file.h"
     52 #include "file_interface.h"
     53 #include "memory_file.h"
     54 #include "sink_file.h"
     55 
     56 namespace {
     57 
     58 int64_t ParseInt64(const u_char* buf) {
     59   int64_t y;
     60 
     61   y = buf[7] & 0x7F;
     62   y = y * 256;
     63   y += buf[6];
     64   y = y * 256;
     65   y += buf[5];
     66   y = y * 256;
     67   y += buf[4];
     68   y = y * 256;
     69   y += buf[3];
     70   y = y * 256;
     71   y += buf[2];
     72   y = y * 256;
     73   y += buf[1];
     74   y = y * 256;
     75   y += buf[0];
     76 
     77   if (buf[7] & 0x80)
     78     y = -y;
     79 
     80   return y;
     81 }
     82 
     83 bool ReadBZ2(bz_stream* stream, uint8_t* data, size_t size) {
     84   stream->next_out = (char*)data;
     85   while (size > 0) {
     86     unsigned int read_size = std::min(
     87         static_cast<size_t>(std::numeric_limits<unsigned int>::max()), size);
     88     stream->avail_out = read_size;
     89     int bz2err = BZ2_bzDecompress(stream);
     90     if (bz2err != BZ_OK && bz2err != BZ_STREAM_END)
     91       return false;
     92     size -= read_size - stream->avail_out;
     93   }
     94   return true;
     95 }
     96 
     97 int ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface>& file,
     98                        bz_stream* stream,
     99                        size_t size,
    100                        uint8_t* buf,
    101                        size_t buf_size) {
    102   while (size > 0) {
    103     size_t bytes_to_read = std::min(size, buf_size);
    104     if (!ReadBZ2(stream, buf, bytes_to_read)) {
    105       fprintf(stderr, "Failed to read bzip stream.\n");
    106       return 2;
    107     }
    108     if (!WriteAll(file, buf, bytes_to_read)) {
    109       perror("WriteAll() failed");
    110       return 1;
    111     }
    112     size -= bytes_to_read;
    113   }
    114   return 0;
    115 }
    116 
    117 }  // namespace
    118 
    119 namespace bsdiff {
    120 
    121 bool ReadAll(const std::unique_ptr<FileInterface>& file,
    122              uint8_t* data,
    123              size_t size) {
    124   size_t offset = 0, read;
    125   while (offset < size) {
    126     if (!file->Read(data + offset, size - offset, &read) || read == 0)
    127       return false;
    128     offset += read;
    129   }
    130   return true;
    131 }
    132 
    133 bool WriteAll(const std::unique_ptr<FileInterface>& file,
    134               const uint8_t* data,
    135               size_t size) {
    136   size_t offset = 0, written;
    137   while (offset < size) {
    138     if (!file->Write(data + offset, size - offset, &written) || written == 0)
    139       return false;
    140     offset += written;
    141   }
    142   return true;
    143 }
    144 
    145 bool IsOverlapping(const char* old_filename,
    146                    const char* new_filename,
    147                    const std::vector<ex_t>& old_extents,
    148                    const std::vector<ex_t>& new_extents) {
    149   struct stat old_stat, new_stat;
    150   if (stat(new_filename, &new_stat) == -1) {
    151     if (errno == ENOENT)
    152       return false;
    153     fprintf(stderr, "Error stat the new file %s: %s\n", new_filename,
    154             strerror(errno));
    155     return true;
    156   }
    157   if (stat(old_filename, &old_stat) == -1) {
    158     fprintf(stderr, "Error stat the old file %s: %s\n", old_filename,
    159             strerror(errno));
    160     return true;
    161   }
    162 
    163   if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino)
    164     return false;
    165 
    166   if (old_extents.empty() && new_extents.empty())
    167     return true;
    168 
    169   for (ex_t old_ex : old_extents)
    170     for (ex_t new_ex : new_extents)
    171       if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len &&
    172           static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len)
    173         return true;
    174 
    175   return false;
    176 }
    177 
    178 // Patch |old_filename| with |patch_filename| and save it to |new_filename|.
    179 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
    180 // extents of |old_filename| and |new_filename|.
    181 // Returns 0 on success, 1 on I/O error and 2 on data error.
    182 int bspatch(const char* old_filename,
    183             const char* new_filename,
    184             const char* patch_filename,
    185             const char* old_extents,
    186             const char* new_extents) {
    187   std::unique_ptr<FileInterface> patch_file =
    188       File::FOpen(patch_filename, O_RDONLY);
    189   if (!patch_file) {
    190     fprintf(stderr, "Error opening the patch file %s: %s\n", patch_filename,
    191             strerror(errno));
    192     return 1;
    193   }
    194   uint64_t patch_size;
    195   patch_file->GetSize(&patch_size);
    196   std::vector<uint8_t> patch(patch_size);
    197   if (!ReadAll(patch_file, patch.data(), patch_size)) {
    198     fprintf(stderr, "Error reading the patch file %s: %s\n", patch_filename,
    199             strerror(errno));
    200     return 1;
    201   }
    202   patch_file.reset();
    203 
    204   return bspatch(old_filename, new_filename, patch.data(), patch_size,
    205                  old_extents, new_extents);
    206 }
    207 
    208 // Patch |old_filename| with |patch_data| and save it to |new_filename|.
    209 // |old_extents| and |new_extents| are comma-separated lists of "offset:length"
    210 // extents of |old_filename| and |new_filename|.
    211 // Returns 0 on success, 1 on I/O error and 2 on data error.
    212 int bspatch(const char* old_filename,
    213             const char* new_filename,
    214             const uint8_t* patch_data,
    215             size_t patch_size,
    216             const char* old_extents,
    217             const char* new_extents) {
    218   int using_extents = (old_extents != NULL || new_extents != NULL);
    219 
    220   // Open input file for reading.
    221   std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY);
    222   if (!old_file) {
    223     fprintf(stderr, "Error opening the old file %s: %s\n", old_filename,
    224             strerror(errno));
    225     return 1;
    226   }
    227 
    228   std::vector<ex_t> parsed_old_extents;
    229   if (using_extents) {
    230     if (!ParseExtentStr(old_extents, &parsed_old_extents)) {
    231       fprintf(stderr, "Error parsing the old extents\n");
    232       return 2;
    233     }
    234     old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents));
    235   }
    236 
    237   // Open output file for writing.
    238   std::unique_ptr<FileInterface> new_file =
    239       File::FOpen(new_filename, O_CREAT | O_WRONLY);
    240   if (!new_file) {
    241     fprintf(stderr, "Error opening the new file %s: %s\n", new_filename,
    242             strerror(errno));
    243     return 1;
    244   }
    245 
    246   std::vector<ex_t> parsed_new_extents;
    247   if (using_extents) {
    248     if (!ParseExtentStr(new_extents, &parsed_new_extents)) {
    249       fprintf(stderr, "Error parsing the new extents\n");
    250       return 2;
    251     }
    252     new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents));
    253   }
    254 
    255   if (IsOverlapping(old_filename, new_filename, parsed_old_extents,
    256                     parsed_new_extents)) {
    257     // New and old file is overlapping, we can not stream output to new file,
    258     // cache it in a buffer and write to the file at the end.
    259     uint64_t newsize = ParseInt64(patch_data + 24);
    260     new_file.reset(new BufferFile(std::move(new_file), newsize));
    261   }
    262 
    263   return bspatch(old_file, new_file, patch_data, patch_size);
    264 }
    265 
    266 // Patch |old_data| with |patch_data| and save it by calling sink function.
    267 // Returns 0 on success, 1 on I/O error and 2 on data error.
    268 int bspatch(const uint8_t* old_data,
    269             size_t old_size,
    270             const uint8_t* patch_data,
    271             size_t patch_size,
    272             const sink_func& sink) {
    273   std::unique_ptr<FileInterface> old_file(new MemoryFile(old_data, old_size));
    274   std::unique_ptr<FileInterface> new_file(new SinkFile(sink));
    275 
    276   return bspatch(old_file, new_file, patch_data, patch_size);
    277 }
    278 
    279 // Patch |old_file| with |patch_data| and save it to |new_file|.
    280 // Returns 0 on success, 1 on I/O error and 2 on data error.
    281 int bspatch(const std::unique_ptr<FileInterface>& old_file,
    282             const std::unique_ptr<FileInterface>& new_file,
    283             const uint8_t* patch_data,
    284             size_t patch_size) {
    285   int bz2err;
    286   u_char buf[8];
    287   off_t ctrl[3];
    288 
    289   // File format:
    290   //   0       8    "BSDIFF40"
    291   //   8       8    X
    292   //   16      8    Y
    293   //   24      8    sizeof(new_filename)
    294   //   32      X    bzip2(control block)
    295   //   32+X    Y    bzip2(diff block)
    296   //   32+X+Y  ???  bzip2(extra block)
    297   // with control block a set of triples (x,y,z) meaning "add x bytes
    298   // from oldfile to x bytes from the diff block; copy y bytes from the
    299   // extra block; seek forwards in oldfile by z bytes".
    300 
    301   // Check for appropriate magic.
    302   if (memcmp(patch_data, "BSDIFF40", 8) != 0) {
    303     fprintf(stderr, "Not a bsdiff patch.\n");
    304     return 2;
    305   }
    306 
    307   // Read lengths from header.
    308   uint64_t oldsize, newsize;
    309   int64_t ctrl_len = ParseInt64(patch_data + 8);
    310   int64_t data_len = ParseInt64(patch_data + 16);
    311   int64_t signed_newsize = ParseInt64(patch_data + 24);
    312   newsize = signed_newsize;
    313   if ((ctrl_len < 0) || (data_len < 0) || (signed_newsize < 0) ||
    314       (32 + ctrl_len + data_len > static_cast<int64_t>(patch_size))) {
    315     fprintf(stderr, "Corrupt patch.\n");
    316     return 2;
    317   }
    318 
    319   bz_stream cstream;
    320   cstream.next_in = (char*)patch_data + 32;
    321   cstream.avail_in = ctrl_len;
    322   cstream.bzalloc = nullptr;
    323   cstream.bzfree = nullptr;
    324   cstream.opaque = nullptr;
    325   if ((bz2err = BZ2_bzDecompressInit(&cstream, 0, 0)) != BZ_OK) {
    326     fprintf(stderr, "Failed to bzinit control stream (%d)\n", bz2err);
    327     return 2;
    328   }
    329 
    330   bz_stream dstream;
    331   dstream.next_in = (char*)patch_data + 32 + ctrl_len;
    332   dstream.avail_in = data_len;
    333   dstream.bzalloc = nullptr;
    334   dstream.bzfree = nullptr;
    335   dstream.opaque = nullptr;
    336   if ((bz2err = BZ2_bzDecompressInit(&dstream, 0, 0)) != BZ_OK) {
    337     fprintf(stderr, "Failed to bzinit diff stream (%d)\n", bz2err);
    338     return 2;
    339   }
    340 
    341   bz_stream estream;
    342   estream.next_in = (char*)patch_data + 32 + ctrl_len + data_len;
    343   estream.avail_in = patch_size - (32 + ctrl_len + data_len);
    344   estream.bzalloc = nullptr;
    345   estream.bzfree = nullptr;
    346   estream.opaque = nullptr;
    347   if ((bz2err = BZ2_bzDecompressInit(&estream, 0, 0)) != BZ_OK) {
    348     fprintf(stderr, "Failed to bzinit extra stream (%d)\n", bz2err);
    349     return 2;
    350   }
    351 
    352   uint64_t old_file_pos = 0;
    353 
    354   if (!old_file->GetSize(&oldsize)) {
    355     fprintf(stderr, "Cannot obtain the size of old file.\n");
    356     return 1;
    357   }
    358 
    359   // The oldpos can be negative, but the new pos is only incremented linearly.
    360   int64_t oldpos = 0;
    361   uint64_t newpos = 0;
    362   std::vector<uint8_t> old_buf(1024 * 1024), new_buf(1024 * 1024);
    363   while (newpos < newsize) {
    364     int64_t i;
    365     // Read control data.
    366     for (i = 0; i <= 2; i++) {
    367       if (!ReadBZ2(&cstream, buf, 8)) {
    368         fprintf(stderr, "Failed to read control stream.\n");
    369         return 2;
    370       }
    371       ctrl[i] = ParseInt64(buf);
    372     }
    373 
    374     // Sanity-check.
    375     if (ctrl[0] < 0 || ctrl[1] < 0) {
    376       fprintf(stderr, "Corrupt patch.\n");
    377       return 2;
    378     }
    379 
    380     // Sanity-check.
    381     if (newpos + ctrl[0] > newsize) {
    382       fprintf(stderr, "Corrupt patch.\n");
    383       return 2;
    384     }
    385 
    386     int ret = 0;
    387     // Add old data to diff string. It is enough to fseek once, at
    388     // the beginning of the sequence, to avoid unnecessary overhead.
    389     if ((i = oldpos) < 0) {
    390       // Write diff block directly to new file without adding old data,
    391       // because we will skip part where |oldpos| < 0.
    392       ret = ReadBZ2AndWriteAll(new_file, &dstream, -i, new_buf.data(),
    393                                new_buf.size());
    394       if (ret)
    395         return ret;
    396       i = 0;
    397     }
    398 
    399     // We just checked that |i| is not negative.
    400     if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i)) {
    401       fprintf(stderr, "Error seeking input file to offset %" PRId64 ": %s\n", i,
    402               strerror(errno));
    403       return 1;
    404     }
    405     if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
    406       old_file_pos = oldsize;
    407 
    408     size_t chunk_size = old_file_pos - i;
    409     while (chunk_size > 0) {
    410       size_t read_bytes;
    411       size_t bytes_to_read = std::min(chunk_size, old_buf.size());
    412       if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes)) {
    413         perror("Error reading from input file");
    414         return 1;
    415       }
    416       if (!read_bytes) {
    417         fprintf(stderr, "EOF reached while reading from input file.\n");
    418         return 2;
    419       }
    420       // Read same amount of bytes from diff block
    421       if (!ReadBZ2(&dstream, new_buf.data(), read_bytes)) {
    422         fprintf(stderr, "Failed to read diff stream.\n");
    423         return 2;
    424       }
    425       // new_buf already has data from diff block, adds old data to it.
    426       for (size_t k = 0; k < read_bytes; k++)
    427         new_buf[k] += old_buf[k];
    428       if (!WriteAll(new_file, new_buf.data(), read_bytes)) {
    429         perror("Error writing to new file");
    430         return 1;
    431       }
    432       chunk_size -= read_bytes;
    433     }
    434 
    435     // Adjust pointers.
    436     newpos += ctrl[0];
    437     oldpos += ctrl[0];
    438 
    439     if (oldpos > static_cast<int64_t>(oldsize)) {
    440       // Write diff block directly to new file without adding old data,
    441       // because we skipped part where |oldpos| > oldsize.
    442       ret = ReadBZ2AndWriteAll(new_file, &dstream, oldpos - oldsize,
    443                                new_buf.data(), new_buf.size());
    444       if (ret)
    445         return ret;
    446     }
    447 
    448     // Sanity-check.
    449     if (newpos + ctrl[1] > newsize) {
    450       fprintf(stderr, "Corrupt patch.\n");
    451       return 2;
    452     }
    453 
    454     // Read extra block.
    455     ret = ReadBZ2AndWriteAll(new_file, &estream, ctrl[1], new_buf.data(),
    456                              new_buf.size());
    457     if (ret)
    458       return ret;
    459 
    460     // Adjust pointers.
    461     newpos += ctrl[1];
    462     oldpos += ctrl[2];
    463   }
    464 
    465   // Close input file.
    466   old_file->Close();
    467 
    468   // Clean up the bzip2 reads.
    469   BZ2_bzDecompressEnd(&cstream);
    470   BZ2_bzDecompressEnd(&dstream);
    471   BZ2_bzDecompressEnd(&estream);
    472 
    473   if (!new_file->Close()) {
    474     perror("Error closing new file");
    475     return 1;
    476   }
    477 
    478   return 0;
    479 }
    480 
    481 }  // namespace bsdiff
    482