Home | History | Annotate | Download | only in bsdiff
      1 /*-
      2  * Copyright 2003-2005 Colin Percival
      3  * All rights reserved
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted providing that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     24  * POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #if 0
     28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
     29 #endif
     30 
     31 #include "bspatch.h"
     32 
     33 #include <bzlib.h>
     34 #include <err.h>
     35 #include <errno.h>
     36 #include <fcntl.h>
     37 #include <inttypes.h>
     38 #include <stdlib.h>
     39 #include <string.h>
     40 #include <unistd.h>
     41 #include <sys/stat.h>
     42 #include <sys/types.h>
     43 
     44 #include <algorithm>
     45 #include <memory>
     46 #include <limits>
     47 #include <vector>
     48 
     49 #include "extents.h"
     50 #include "extents_file.h"
     51 #include "file.h"
     52 #include "file_interface.h"
     53 #include "memory_file.h"
     54 
     55 namespace {
     56 
     57 int64_t ParseInt64(u_char* buf) {
     58   int64_t y;
     59 
     60   y = buf[7] & 0x7F;
     61   y = y * 256;
     62   y += buf[6];
     63   y = y * 256;
     64   y += buf[5];
     65   y = y * 256;
     66   y += buf[4];
     67   y = y * 256;
     68   y += buf[3];
     69   y = y * 256;
     70   y += buf[2];
     71   y = y * 256;
     72   y += buf[1];
     73   y = y * 256;
     74   y += buf[0];
     75 
     76   if (buf[7] & 0x80)
     77     y = -y;
     78 
     79   return y;
     80 }
     81 
     82 bool ReadBZ2(BZFILE* pfbz2, uint8_t* data, size_t size) {
     83   int bz2err;
     84   size_t lenread = BZ2_bzRead(&bz2err, pfbz2, data, size);
     85   if (lenread < size || (bz2err != BZ_OK && bz2err != BZ_STREAM_END))
     86     return false;
     87   return true;
     88 }
     89 
     90 bool ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface>& file,
     91                         BZFILE* pfbz2,
     92                         size_t size,
     93                         uint8_t* buf,
     94                         size_t buf_size) {
     95   while (size > 0) {
     96     size_t bytes_to_read = std::min(size, buf_size);
     97     if (!ReadBZ2(pfbz2, buf, bytes_to_read))
     98       return false;
     99     if (!WriteAll(file, buf, bytes_to_read))
    100       return false;
    101     size -= bytes_to_read;
    102   }
    103   return true;
    104 }
    105 
    106 }  // namespace
    107 
    108 namespace bsdiff {
    109 
    110 bool WriteAll(const std::unique_ptr<FileInterface>& file,
    111               const uint8_t* data,
    112               size_t size) {
    113   size_t offset = 0, written;
    114   while (offset < size) {
    115     if (!file->Write(data + offset, size - offset, &written) || written == 0)
    116       return false;
    117     offset += written;
    118   }
    119   return true;
    120 }
    121 
    122 bool IsOverlapping(const char* old_filename,
    123                    const char* new_filename,
    124                    const std::vector<ex_t>& old_extents,
    125                    const std::vector<ex_t>& new_extents) {
    126   struct stat old_stat, new_stat;
    127   if (stat(new_filename, &new_stat) == -1) {
    128     if (errno == ENOENT)
    129       return false;
    130     err(1, "Error stat the new filename %s", new_filename);
    131   }
    132   if (stat(old_filename, &old_stat) == -1)
    133     err(1, "Error stat the old filename %s", old_filename);
    134 
    135   if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino)
    136     return false;
    137 
    138   if (old_extents.empty() && new_extents.empty())
    139     return true;
    140 
    141   for (ex_t old_ex : old_extents)
    142     for (ex_t new_ex : new_extents)
    143       if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len &&
    144           static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len)
    145         return true;
    146 
    147   return false;
    148 }
    149 
    150 int bspatch(
    151     const char* old_filename, const char* new_filename,
    152     const char* patch_filename,
    153     const char* old_extents, const char* new_extents) {
    154   FILE* f, *cpf, *dpf, *epf;
    155   BZFILE* cpfbz2, *dpfbz2, *epfbz2;
    156   int bz2err;
    157   ssize_t bzctrllen, bzdatalen;
    158   u_char header[32], buf[8];
    159   off_t ctrl[3];
    160 
    161   int using_extents = (old_extents != NULL || new_extents != NULL);
    162 
    163   // Open patch file.
    164   if ((f = fopen(patch_filename, "r")) == NULL)
    165     err(1, "fopen(%s)", patch_filename);
    166 
    167   // File format:
    168   //   0       8    "BSDIFF40"
    169   //   8       8    X
    170   //   16      8    Y
    171   //   24      8    sizeof(new_filename)
    172   //   32      X    bzip2(control block)
    173   //   32+X    Y    bzip2(diff block)
    174   //   32+X+Y  ???  bzip2(extra block)
    175   // with control block a set of triples (x,y,z) meaning "add x bytes
    176   // from oldfile to x bytes from the diff block; copy y bytes from the
    177   // extra block; seek forwards in oldfile by z bytes".
    178 
    179   // Read header.
    180   if (fread(header, 1, 32, f) < 32) {
    181     if (feof(f))
    182       errx(1, "Corrupt patch\n");
    183     err(1, "fread(%s)", patch_filename);
    184   }
    185 
    186   // Check for appropriate magic.
    187   if (memcmp(header, "BSDIFF40", 8) != 0)
    188     errx(1, "Corrupt patch\n");
    189 
    190   // Read lengths from header.
    191   uint64_t oldsize, newsize;
    192   bzctrllen = ParseInt64(header + 8);
    193   bzdatalen = ParseInt64(header + 16);
    194   int64_t signed_newsize = ParseInt64(header + 24);
    195   newsize = signed_newsize;
    196   if ((bzctrllen < 0) || (bzdatalen < 0) || (signed_newsize < 0))
    197     errx(1, "Corrupt patch\n");
    198 
    199   // Close patch file and re-open it via libbzip2 at the right places.
    200   if (fclose(f))
    201     err(1, "fclose(%s)", patch_filename);
    202   if ((cpf = fopen(patch_filename, "r")) == NULL)
    203     err(1, "fopen(%s)", patch_filename);
    204   if (fseek(cpf, 32, SEEK_SET))
    205     err(1, "fseeko(%s, %lld)", patch_filename, (long long)32);
    206   if ((cpfbz2 = BZ2_bzReadOpen(&bz2err, cpf, 0, 0, NULL, 0)) == NULL)
    207     errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err);
    208   if ((dpf = fopen(patch_filename, "r")) == NULL)
    209     err(1, "fopen(%s)", patch_filename);
    210   if (fseek(dpf, 32 + bzctrllen, SEEK_SET))
    211     err(1, "fseeko(%s, %lld)", patch_filename, (long long)(32 + bzctrllen));
    212   if ((dpfbz2 = BZ2_bzReadOpen(&bz2err, dpf, 0, 0, NULL, 0)) == NULL)
    213     errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err);
    214   if ((epf = fopen(patch_filename, "r")) == NULL)
    215     err(1, "fopen(%s)", patch_filename);
    216   if (fseek(epf, 32 + bzctrllen + bzdatalen, SEEK_SET))
    217     err(1, "fseeko(%s, %lld)", patch_filename,
    218         (long long)(32 + bzctrllen + bzdatalen));
    219   if ((epfbz2 = BZ2_bzReadOpen(&bz2err, epf, 0, 0, NULL, 0)) == NULL)
    220     errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err);
    221 
    222   // Open input file for reading.
    223   std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY);
    224   if (!old_file)
    225     err(1, "Error opening the old filename");
    226 
    227   std::vector<ex_t> parsed_old_extents;
    228   if (using_extents) {
    229     if (!ParseExtentStr(old_extents, &parsed_old_extents))
    230       errx(1, "Error parsing the old extents");
    231     old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents));
    232   }
    233 
    234   if (!old_file->GetSize(&oldsize))
    235     err(1, "cannot obtain the size of %s", old_filename);
    236   uint64_t old_file_pos = 0;
    237 
    238   // Open output file for writing.
    239   std::unique_ptr<FileInterface> new_file =
    240       File::FOpen(new_filename, O_CREAT | O_WRONLY);
    241   if (!new_file)
    242     err(1, "Error opening the new filename %s", new_filename);
    243 
    244   std::vector<ex_t> parsed_new_extents;
    245   if (using_extents) {
    246     if (!ParseExtentStr(new_extents, &parsed_new_extents))
    247       errx(1, "Error parsing the new extents");
    248     new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents));
    249   }
    250 
    251   if (IsOverlapping(old_filename, new_filename, parsed_old_extents,
    252                     parsed_new_extents)) {
    253     // New and old file is overlapping, we can not stream output to new file,
    254     // cache it in the memory and write to the file at the end.
    255     new_file.reset(new MemoryFile(std::move(new_file), newsize));
    256   }
    257 
    258   // The oldpos can be negative, but the new pos is only incremented linearly.
    259   int64_t oldpos = 0;
    260   uint64_t newpos = 0;
    261   std::vector<uint8_t> old_buf(1024 * 1024), new_buf(1024 * 1024);
    262   while (newpos < newsize) {
    263     int64_t i;
    264     // Read control data.
    265     for (i = 0; i <= 2; i++) {
    266       if (!ReadBZ2(cpfbz2, buf, 8))
    267         errx(1, "Corrupt patch\n");
    268       ctrl[i] = ParseInt64(buf);
    269     }
    270 
    271     // Sanity-check.
    272     if (ctrl[0] < 0 || ctrl[1] < 0)
    273       errx(1, "Corrupt patch\n");
    274 
    275     // Sanity-check.
    276     if (newpos + ctrl[0] > newsize)
    277       errx(1, "Corrupt patch\n");
    278 
    279     // Add old data to diff string. It is enough to fseek once, at
    280     // the beginning of the sequence, to avoid unnecessary overhead.
    281     if ((i = oldpos) < 0) {
    282       // Write diff block directly to new file without adding old data,
    283       // because we will skip part where |oldpos| < 0.
    284       if (!ReadBZ2AndWriteAll(new_file, dpfbz2, -i, new_buf.data(),
    285                               new_buf.size()))
    286         errx(1, "Error during ReadBZ2AndWriteAll()");
    287 
    288       i = 0;
    289     }
    290 
    291     // We just checked that |i| is not negative.
    292     if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i))
    293       err(1, "error seeking input file to offset %" PRId64, i);
    294     if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
    295       old_file_pos = oldsize;
    296 
    297     size_t chunk_size = old_file_pos - i;
    298     while (chunk_size > 0) {
    299       size_t read_bytes;
    300       size_t bytes_to_read = std::min(chunk_size, old_buf.size());
    301       if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes))
    302         err(1, "error reading from input file");
    303       if (!read_bytes)
    304         errx(1, "EOF reached while reading from input file");
    305       // Read same amount of bytes from diff block
    306       if (!ReadBZ2(dpfbz2, new_buf.data(), read_bytes))
    307         errx(1, "Corrupt patch\n");
    308       // new_buf already has data from diff block, adds old data to it.
    309       for (size_t k = 0; k < read_bytes; k++)
    310         new_buf[k] += old_buf[k];
    311       if (!WriteAll(new_file, new_buf.data(), read_bytes))
    312         err(1, "Error writing new file.");
    313       chunk_size -= read_bytes;
    314     }
    315 
    316     // Adjust pointers.
    317     newpos += ctrl[0];
    318     oldpos += ctrl[0];
    319 
    320     if (oldpos > static_cast<int64_t>(oldsize)) {
    321       // Write diff block directly to new file without adding old data,
    322       // because we skipped part where |oldpos| > oldsize.
    323       if (!ReadBZ2AndWriteAll(new_file, dpfbz2, oldpos - oldsize,
    324                               new_buf.data(), new_buf.size()))
    325         errx(1, "Error during ReadBZ2AndWriteAll()");
    326     }
    327 
    328     // Sanity-check.
    329     if (newpos + ctrl[1] > newsize)
    330       errx(1, "Corrupt patch\n");
    331 
    332     // Read extra block.
    333     if (!ReadBZ2AndWriteAll(new_file, epfbz2, ctrl[1], new_buf.data(),
    334                             new_buf.size()))
    335       errx(1, "Error during ReadBZ2AndWriteAll()");
    336 
    337     // Adjust pointers.
    338     newpos += ctrl[1];
    339     oldpos += ctrl[2];
    340   }
    341 
    342   // Close input file.
    343   old_file->Close();
    344 
    345   // Clean up the bzip2 reads.
    346   BZ2_bzReadClose(&bz2err, cpfbz2);
    347   BZ2_bzReadClose(&bz2err, dpfbz2);
    348   BZ2_bzReadClose(&bz2err, epfbz2);
    349   if (fclose(cpf) || fclose(dpf) || fclose(epf))
    350     err(1, "fclose(%s)", patch_filename);
    351 
    352   if (!new_file->Close())
    353     err(1, "Error closing new file %s", new_filename);
    354 
    355   return 0;
    356 }
    357 
    358 }  // namespace bsdiff
    359