1 /*- 2 * Copyright 2003-2005 Colin Percival 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted providing that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #if 0 28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $"); 29 #endif 30 31 #include "bspatch.h" 32 33 #include <bzlib.h> 34 #include <err.h> 35 #include <errno.h> 36 #include <fcntl.h> 37 #include <inttypes.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <sys/stat.h> 42 #include <sys/types.h> 43 44 #include <algorithm> 45 #include <memory> 46 #include <limits> 47 #include <vector> 48 49 #include "extents.h" 50 #include "extents_file.h" 51 #include "file.h" 52 #include "file_interface.h" 53 #include "memory_file.h" 54 55 namespace { 56 57 int64_t ParseInt64(u_char* buf) { 58 int64_t y; 59 60 y = buf[7] & 0x7F; 61 y = y * 256; 62 y += buf[6]; 63 y = y * 256; 64 y += buf[5]; 65 y = y * 256; 66 y += buf[4]; 67 y = y * 256; 68 y += buf[3]; 69 y = y * 256; 70 y += buf[2]; 71 y = y * 256; 72 y += buf[1]; 73 y = y * 256; 74 y += buf[0]; 75 76 if (buf[7] & 0x80) 77 y = -y; 78 79 return y; 80 } 81 82 bool ReadBZ2(BZFILE* pfbz2, uint8_t* data, size_t size) { 83 int bz2err; 84 size_t lenread = BZ2_bzRead(&bz2err, pfbz2, data, size); 85 if (lenread < size || (bz2err != BZ_OK && bz2err != BZ_STREAM_END)) 86 return false; 87 return true; 88 } 89 90 bool ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface>& file, 91 BZFILE* pfbz2, 92 size_t size, 93 uint8_t* buf, 94 size_t buf_size) { 95 while (size > 0) { 96 size_t bytes_to_read = std::min(size, buf_size); 97 if (!ReadBZ2(pfbz2, buf, bytes_to_read)) 98 return false; 99 if (!WriteAll(file, buf, bytes_to_read)) 100 return false; 101 size -= bytes_to_read; 102 } 103 return true; 104 } 105 106 } // namespace 107 108 namespace bsdiff { 109 110 bool WriteAll(const std::unique_ptr<FileInterface>& file, 111 const uint8_t* data, 112 size_t size) { 113 size_t offset = 0, written; 114 while (offset < size) { 115 if (!file->Write(data + offset, size - offset, &written) || written == 0) 116 return false; 117 offset += written; 118 } 119 return true; 120 } 121 122 bool IsOverlapping(const char* old_filename, 123 const char* new_filename, 124 const std::vector<ex_t>& old_extents, 125 const std::vector<ex_t>& new_extents) { 126 struct stat old_stat, new_stat; 127 if (stat(new_filename, &new_stat) == -1) { 128 if (errno == ENOENT) 129 return false; 130 err(1, "Error stat the new filename %s", new_filename); 131 } 132 if (stat(old_filename, &old_stat) == -1) 133 err(1, "Error stat the old filename %s", old_filename); 134 135 if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino) 136 return false; 137 138 if (old_extents.empty() && new_extents.empty()) 139 return true; 140 141 for (ex_t old_ex : old_extents) 142 for (ex_t new_ex : new_extents) 143 if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len && 144 static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len) 145 return true; 146 147 return false; 148 } 149 150 int bspatch( 151 const char* old_filename, const char* new_filename, 152 const char* patch_filename, 153 const char* old_extents, const char* new_extents) { 154 FILE* f, *cpf, *dpf, *epf; 155 BZFILE* cpfbz2, *dpfbz2, *epfbz2; 156 int bz2err; 157 ssize_t bzctrllen, bzdatalen; 158 u_char header[32], buf[8]; 159 off_t ctrl[3]; 160 161 int using_extents = (old_extents != NULL || new_extents != NULL); 162 163 // Open patch file. 164 if ((f = fopen(patch_filename, "r")) == NULL) 165 err(1, "fopen(%s)", patch_filename); 166 167 // File format: 168 // 0 8 "BSDIFF40" 169 // 8 8 X 170 // 16 8 Y 171 // 24 8 sizeof(new_filename) 172 // 32 X bzip2(control block) 173 // 32+X Y bzip2(diff block) 174 // 32+X+Y ??? bzip2(extra block) 175 // with control block a set of triples (x,y,z) meaning "add x bytes 176 // from oldfile to x bytes from the diff block; copy y bytes from the 177 // extra block; seek forwards in oldfile by z bytes". 178 179 // Read header. 180 if (fread(header, 1, 32, f) < 32) { 181 if (feof(f)) 182 errx(1, "Corrupt patch\n"); 183 err(1, "fread(%s)", patch_filename); 184 } 185 186 // Check for appropriate magic. 187 if (memcmp(header, "BSDIFF40", 8) != 0) 188 errx(1, "Corrupt patch\n"); 189 190 // Read lengths from header. 191 uint64_t oldsize, newsize; 192 bzctrllen = ParseInt64(header + 8); 193 bzdatalen = ParseInt64(header + 16); 194 int64_t signed_newsize = ParseInt64(header + 24); 195 newsize = signed_newsize; 196 if ((bzctrllen < 0) || (bzdatalen < 0) || (signed_newsize < 0)) 197 errx(1, "Corrupt patch\n"); 198 199 // Close patch file and re-open it via libbzip2 at the right places. 200 if (fclose(f)) 201 err(1, "fclose(%s)", patch_filename); 202 if ((cpf = fopen(patch_filename, "r")) == NULL) 203 err(1, "fopen(%s)", patch_filename); 204 if (fseek(cpf, 32, SEEK_SET)) 205 err(1, "fseeko(%s, %lld)", patch_filename, (long long)32); 206 if ((cpfbz2 = BZ2_bzReadOpen(&bz2err, cpf, 0, 0, NULL, 0)) == NULL) 207 errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err); 208 if ((dpf = fopen(patch_filename, "r")) == NULL) 209 err(1, "fopen(%s)", patch_filename); 210 if (fseek(dpf, 32 + bzctrllen, SEEK_SET)) 211 err(1, "fseeko(%s, %lld)", patch_filename, (long long)(32 + bzctrllen)); 212 if ((dpfbz2 = BZ2_bzReadOpen(&bz2err, dpf, 0, 0, NULL, 0)) == NULL) 213 errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err); 214 if ((epf = fopen(patch_filename, "r")) == NULL) 215 err(1, "fopen(%s)", patch_filename); 216 if (fseek(epf, 32 + bzctrllen + bzdatalen, SEEK_SET)) 217 err(1, "fseeko(%s, %lld)", patch_filename, 218 (long long)(32 + bzctrllen + bzdatalen)); 219 if ((epfbz2 = BZ2_bzReadOpen(&bz2err, epf, 0, 0, NULL, 0)) == NULL) 220 errx(1, "BZ2_bzReadOpen, bz2err = %d", bz2err); 221 222 // Open input file for reading. 223 std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY); 224 if (!old_file) 225 err(1, "Error opening the old filename"); 226 227 std::vector<ex_t> parsed_old_extents; 228 if (using_extents) { 229 if (!ParseExtentStr(old_extents, &parsed_old_extents)) 230 errx(1, "Error parsing the old extents"); 231 old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents)); 232 } 233 234 if (!old_file->GetSize(&oldsize)) 235 err(1, "cannot obtain the size of %s", old_filename); 236 uint64_t old_file_pos = 0; 237 238 // Open output file for writing. 239 std::unique_ptr<FileInterface> new_file = 240 File::FOpen(new_filename, O_CREAT | O_WRONLY); 241 if (!new_file) 242 err(1, "Error opening the new filename %s", new_filename); 243 244 std::vector<ex_t> parsed_new_extents; 245 if (using_extents) { 246 if (!ParseExtentStr(new_extents, &parsed_new_extents)) 247 errx(1, "Error parsing the new extents"); 248 new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents)); 249 } 250 251 if (IsOverlapping(old_filename, new_filename, parsed_old_extents, 252 parsed_new_extents)) { 253 // New and old file is overlapping, we can not stream output to new file, 254 // cache it in the memory and write to the file at the end. 255 new_file.reset(new MemoryFile(std::move(new_file), newsize)); 256 } 257 258 // The oldpos can be negative, but the new pos is only incremented linearly. 259 int64_t oldpos = 0; 260 uint64_t newpos = 0; 261 std::vector<uint8_t> old_buf(1024 * 1024), new_buf(1024 * 1024); 262 while (newpos < newsize) { 263 int64_t i; 264 // Read control data. 265 for (i = 0; i <= 2; i++) { 266 if (!ReadBZ2(cpfbz2, buf, 8)) 267 errx(1, "Corrupt patch\n"); 268 ctrl[i] = ParseInt64(buf); 269 } 270 271 // Sanity-check. 272 if (ctrl[0] < 0 || ctrl[1] < 0) 273 errx(1, "Corrupt patch\n"); 274 275 // Sanity-check. 276 if (newpos + ctrl[0] > newsize) 277 errx(1, "Corrupt patch\n"); 278 279 // Add old data to diff string. It is enough to fseek once, at 280 // the beginning of the sequence, to avoid unnecessary overhead. 281 if ((i = oldpos) < 0) { 282 // Write diff block directly to new file without adding old data, 283 // because we will skip part where |oldpos| < 0. 284 if (!ReadBZ2AndWriteAll(new_file, dpfbz2, -i, new_buf.data(), 285 new_buf.size())) 286 errx(1, "Error during ReadBZ2AndWriteAll()"); 287 288 i = 0; 289 } 290 291 // We just checked that |i| is not negative. 292 if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i)) 293 err(1, "error seeking input file to offset %" PRId64, i); 294 if ((old_file_pos = oldpos + ctrl[0]) > oldsize) 295 old_file_pos = oldsize; 296 297 size_t chunk_size = old_file_pos - i; 298 while (chunk_size > 0) { 299 size_t read_bytes; 300 size_t bytes_to_read = std::min(chunk_size, old_buf.size()); 301 if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes)) 302 err(1, "error reading from input file"); 303 if (!read_bytes) 304 errx(1, "EOF reached while reading from input file"); 305 // Read same amount of bytes from diff block 306 if (!ReadBZ2(dpfbz2, new_buf.data(), read_bytes)) 307 errx(1, "Corrupt patch\n"); 308 // new_buf already has data from diff block, adds old data to it. 309 for (size_t k = 0; k < read_bytes; k++) 310 new_buf[k] += old_buf[k]; 311 if (!WriteAll(new_file, new_buf.data(), read_bytes)) 312 err(1, "Error writing new file."); 313 chunk_size -= read_bytes; 314 } 315 316 // Adjust pointers. 317 newpos += ctrl[0]; 318 oldpos += ctrl[0]; 319 320 if (oldpos > static_cast<int64_t>(oldsize)) { 321 // Write diff block directly to new file without adding old data, 322 // because we skipped part where |oldpos| > oldsize. 323 if (!ReadBZ2AndWriteAll(new_file, dpfbz2, oldpos - oldsize, 324 new_buf.data(), new_buf.size())) 325 errx(1, "Error during ReadBZ2AndWriteAll()"); 326 } 327 328 // Sanity-check. 329 if (newpos + ctrl[1] > newsize) 330 errx(1, "Corrupt patch\n"); 331 332 // Read extra block. 333 if (!ReadBZ2AndWriteAll(new_file, epfbz2, ctrl[1], new_buf.data(), 334 new_buf.size())) 335 errx(1, "Error during ReadBZ2AndWriteAll()"); 336 337 // Adjust pointers. 338 newpos += ctrl[1]; 339 oldpos += ctrl[2]; 340 } 341 342 // Close input file. 343 old_file->Close(); 344 345 // Clean up the bzip2 reads. 346 BZ2_bzReadClose(&bz2err, cpfbz2); 347 BZ2_bzReadClose(&bz2err, dpfbz2); 348 BZ2_bzReadClose(&bz2err, epfbz2); 349 if (fclose(cpf) || fclose(dpf) || fclose(epf)) 350 err(1, "fclose(%s)", patch_filename); 351 352 if (!new_file->Close()) 353 err(1, "Error closing new file %s", new_filename); 354 355 return 0; 356 } 357 358 } // namespace bsdiff 359