1 /*- 2 * Copyright 2003-2005 Colin Percival 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted providing that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #if 0 28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $"); 29 #endif 30 31 #include "bsdiff/bspatch.h" 32 33 #include <bzlib.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <inttypes.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <unistd.h> 40 #include <sys/stat.h> 41 #include <sys/types.h> 42 43 #include <algorithm> 44 #include <memory> 45 #include <limits> 46 #include <vector> 47 48 #include "bsdiff/extents_file.h" 49 #include "bsdiff/file_interface.h" 50 #include "buffer_file.h" 51 #include "extents.h" 52 #include "file.h" 53 #include "memory_file.h" 54 #include "sink_file.h" 55 56 namespace { 57 58 int64_t ParseInt64(const u_char* buf) { 59 int64_t y; 60 61 y = buf[7] & 0x7F; 62 y = y * 256; 63 y += buf[6]; 64 y = y * 256; 65 y += buf[5]; 66 y = y * 256; 67 y += buf[4]; 68 y = y * 256; 69 y += buf[3]; 70 y = y * 256; 71 y += buf[2]; 72 y = y * 256; 73 y += buf[1]; 74 y = y * 256; 75 y += buf[0]; 76 77 if (buf[7] & 0x80) 78 y = -y; 79 80 return y; 81 } 82 83 bool ReadBZ2(bz_stream* stream, uint8_t* data, size_t size) { 84 stream->next_out = (char*)data; 85 while (size > 0) { 86 unsigned int read_size = std::min( 87 static_cast<size_t>(std::numeric_limits<unsigned int>::max()), size); 88 stream->avail_out = read_size; 89 int bz2err = BZ2_bzDecompress(stream); 90 if (bz2err != BZ_OK && bz2err != BZ_STREAM_END) 91 return false; 92 size -= read_size - stream->avail_out; 93 } 94 return true; 95 } 96 97 int ReadBZ2AndWriteAll(const std::unique_ptr<bsdiff::FileInterface>& file, 98 bz_stream* stream, 99 size_t size, 100 uint8_t* buf, 101 size_t buf_size) { 102 while (size > 0) { 103 size_t bytes_to_read = std::min(size, buf_size); 104 if (!ReadBZ2(stream, buf, bytes_to_read)) { 105 fprintf(stderr, "Failed to read bzip stream.\n"); 106 return 2; 107 } 108 if (!WriteAll(file, buf, bytes_to_read)) { 109 perror("WriteAll() failed"); 110 return 1; 111 } 112 size -= bytes_to_read; 113 } 114 return 0; 115 } 116 117 } // namespace 118 119 namespace bsdiff { 120 121 bool ReadAll(const std::unique_ptr<FileInterface>& file, 122 uint8_t* data, 123 size_t size) { 124 size_t offset = 0, read; 125 while (offset < size) { 126 if (!file->Read(data + offset, size - offset, &read) || read == 0) 127 return false; 128 offset += read; 129 } 130 return true; 131 } 132 133 bool WriteAll(const std::unique_ptr<FileInterface>& file, 134 const uint8_t* data, 135 size_t size) { 136 size_t offset = 0, written; 137 while (offset < size) { 138 if (!file->Write(data + offset, size - offset, &written) || written == 0) 139 return false; 140 offset += written; 141 } 142 return true; 143 } 144 145 bool IsOverlapping(const char* old_filename, 146 const char* new_filename, 147 const std::vector<ex_t>& old_extents, 148 const std::vector<ex_t>& new_extents) { 149 struct stat old_stat, new_stat; 150 if (stat(new_filename, &new_stat) == -1) { 151 if (errno == ENOENT) 152 return false; 153 fprintf(stderr, "Error stat the new file %s: %s\n", new_filename, 154 strerror(errno)); 155 return true; 156 } 157 if (stat(old_filename, &old_stat) == -1) { 158 fprintf(stderr, "Error stat the old file %s: %s\n", old_filename, 159 strerror(errno)); 160 return true; 161 } 162 163 if (old_stat.st_dev != new_stat.st_dev || old_stat.st_ino != new_stat.st_ino) 164 return false; 165 166 if (old_extents.empty() && new_extents.empty()) 167 return true; 168 169 for (ex_t old_ex : old_extents) 170 for (ex_t new_ex : new_extents) 171 if (static_cast<uint64_t>(old_ex.off) < new_ex.off + new_ex.len && 172 static_cast<uint64_t>(new_ex.off) < old_ex.off + old_ex.len) 173 return true; 174 175 return false; 176 } 177 178 // Patch |old_filename| with |patch_filename| and save it to |new_filename|. 179 // |old_extents| and |new_extents| are comma-separated lists of "offset:length" 180 // extents of |old_filename| and |new_filename|. 181 // Returns 0 on success, 1 on I/O error and 2 on data error. 182 int bspatch(const char* old_filename, 183 const char* new_filename, 184 const char* patch_filename, 185 const char* old_extents, 186 const char* new_extents) { 187 std::unique_ptr<FileInterface> patch_file = 188 File::FOpen(patch_filename, O_RDONLY); 189 if (!patch_file) { 190 fprintf(stderr, "Error opening the patch file %s: %s\n", patch_filename, 191 strerror(errno)); 192 return 1; 193 } 194 uint64_t patch_size; 195 patch_file->GetSize(&patch_size); 196 std::vector<uint8_t> patch(patch_size); 197 if (!ReadAll(patch_file, patch.data(), patch_size)) { 198 fprintf(stderr, "Error reading the patch file %s: %s\n", patch_filename, 199 strerror(errno)); 200 return 1; 201 } 202 patch_file.reset(); 203 204 return bspatch(old_filename, new_filename, patch.data(), patch_size, 205 old_extents, new_extents); 206 } 207 208 // Patch |old_filename| with |patch_data| and save it to |new_filename|. 209 // |old_extents| and |new_extents| are comma-separated lists of "offset:length" 210 // extents of |old_filename| and |new_filename|. 211 // Returns 0 on success, 1 on I/O error and 2 on data error. 212 int bspatch(const char* old_filename, 213 const char* new_filename, 214 const uint8_t* patch_data, 215 size_t patch_size, 216 const char* old_extents, 217 const char* new_extents) { 218 int using_extents = (old_extents != NULL || new_extents != NULL); 219 220 // Open input file for reading. 221 std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY); 222 if (!old_file) { 223 fprintf(stderr, "Error opening the old file %s: %s\n", old_filename, 224 strerror(errno)); 225 return 1; 226 } 227 228 std::vector<ex_t> parsed_old_extents; 229 if (using_extents) { 230 if (!ParseExtentStr(old_extents, &parsed_old_extents)) { 231 fprintf(stderr, "Error parsing the old extents\n"); 232 return 2; 233 } 234 old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents)); 235 } 236 237 // Open output file for writing. 238 std::unique_ptr<FileInterface> new_file = 239 File::FOpen(new_filename, O_CREAT | O_WRONLY); 240 if (!new_file) { 241 fprintf(stderr, "Error opening the new file %s: %s\n", new_filename, 242 strerror(errno)); 243 return 1; 244 } 245 246 std::vector<ex_t> parsed_new_extents; 247 if (using_extents) { 248 if (!ParseExtentStr(new_extents, &parsed_new_extents)) { 249 fprintf(stderr, "Error parsing the new extents\n"); 250 return 2; 251 } 252 new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents)); 253 } 254 255 if (IsOverlapping(old_filename, new_filename, parsed_old_extents, 256 parsed_new_extents)) { 257 // New and old file is overlapping, we can not stream output to new file, 258 // cache it in a buffer and write to the file at the end. 259 uint64_t newsize = ParseInt64(patch_data + 24); 260 new_file.reset(new BufferFile(std::move(new_file), newsize)); 261 } 262 263 return bspatch(old_file, new_file, patch_data, patch_size); 264 } 265 266 // Patch |old_data| with |patch_data| and save it by calling sink function. 267 // Returns 0 on success, 1 on I/O error and 2 on data error. 268 int bspatch(const uint8_t* old_data, 269 size_t old_size, 270 const uint8_t* patch_data, 271 size_t patch_size, 272 const sink_func& sink) { 273 std::unique_ptr<FileInterface> old_file(new MemoryFile(old_data, old_size)); 274 std::unique_ptr<FileInterface> new_file(new SinkFile(sink)); 275 276 return bspatch(old_file, new_file, patch_data, patch_size); 277 } 278 279 // Patch |old_file| with |patch_data| and save it to |new_file|. 280 // Returns 0 on success, 1 on I/O error and 2 on data error. 281 int bspatch(const std::unique_ptr<FileInterface>& old_file, 282 const std::unique_ptr<FileInterface>& new_file, 283 const uint8_t* patch_data, 284 size_t patch_size) { 285 int bz2err; 286 u_char buf[8]; 287 off_t ctrl[3]; 288 289 // File format: 290 // 0 8 "BSDIFF40" 291 // 8 8 X 292 // 16 8 Y 293 // 24 8 sizeof(new_filename) 294 // 32 X bzip2(control block) 295 // 32+X Y bzip2(diff block) 296 // 32+X+Y ??? bzip2(extra block) 297 // with control block a set of triples (x,y,z) meaning "add x bytes 298 // from oldfile to x bytes from the diff block; copy y bytes from the 299 // extra block; seek forwards in oldfile by z bytes". 300 301 // Check for appropriate magic. 302 if (memcmp(patch_data, "BSDIFF40", 8) != 0) { 303 fprintf(stderr, "Not a bsdiff patch.\n"); 304 return 2; 305 } 306 307 // Read lengths from header. 308 uint64_t oldsize, newsize; 309 int64_t ctrl_len = ParseInt64(patch_data + 8); 310 int64_t data_len = ParseInt64(patch_data + 16); 311 int64_t signed_newsize = ParseInt64(patch_data + 24); 312 newsize = signed_newsize; 313 if ((ctrl_len < 0) || (data_len < 0) || (signed_newsize < 0) || 314 (32 + ctrl_len + data_len > static_cast<int64_t>(patch_size))) { 315 fprintf(stderr, "Corrupt patch.\n"); 316 return 2; 317 } 318 319 bz_stream cstream; 320 cstream.next_in = (char*)patch_data + 32; 321 cstream.avail_in = ctrl_len; 322 cstream.bzalloc = nullptr; 323 cstream.bzfree = nullptr; 324 cstream.opaque = nullptr; 325 if ((bz2err = BZ2_bzDecompressInit(&cstream, 0, 0)) != BZ_OK) { 326 fprintf(stderr, "Failed to bzinit control stream (%d)\n", bz2err); 327 return 2; 328 } 329 330 bz_stream dstream; 331 dstream.next_in = (char*)patch_data + 32 + ctrl_len; 332 dstream.avail_in = data_len; 333 dstream.bzalloc = nullptr; 334 dstream.bzfree = nullptr; 335 dstream.opaque = nullptr; 336 if ((bz2err = BZ2_bzDecompressInit(&dstream, 0, 0)) != BZ_OK) { 337 fprintf(stderr, "Failed to bzinit diff stream (%d)\n", bz2err); 338 return 2; 339 } 340 341 bz_stream estream; 342 estream.next_in = (char*)patch_data + 32 + ctrl_len + data_len; 343 estream.avail_in = patch_size - (32 + ctrl_len + data_len); 344 estream.bzalloc = nullptr; 345 estream.bzfree = nullptr; 346 estream.opaque = nullptr; 347 if ((bz2err = BZ2_bzDecompressInit(&estream, 0, 0)) != BZ_OK) { 348 fprintf(stderr, "Failed to bzinit extra stream (%d)\n", bz2err); 349 return 2; 350 } 351 352 uint64_t old_file_pos = 0; 353 354 if (!old_file->GetSize(&oldsize)) { 355 fprintf(stderr, "Cannot obtain the size of old file.\n"); 356 return 1; 357 } 358 359 // The oldpos can be negative, but the new pos is only incremented linearly. 360 int64_t oldpos = 0; 361 uint64_t newpos = 0; 362 std::vector<uint8_t> old_buf(1024 * 1024), new_buf(1024 * 1024); 363 while (newpos < newsize) { 364 int64_t i; 365 // Read control data. 366 for (i = 0; i <= 2; i++) { 367 if (!ReadBZ2(&cstream, buf, 8)) { 368 fprintf(stderr, "Failed to read control stream.\n"); 369 return 2; 370 } 371 ctrl[i] = ParseInt64(buf); 372 } 373 374 // Sanity-check. 375 if (ctrl[0] < 0 || ctrl[1] < 0) { 376 fprintf(stderr, "Corrupt patch.\n"); 377 return 2; 378 } 379 380 // Sanity-check. 381 if (newpos + ctrl[0] > newsize) { 382 fprintf(stderr, "Corrupt patch.\n"); 383 return 2; 384 } 385 386 int ret = 0; 387 // Add old data to diff string. It is enough to fseek once, at 388 // the beginning of the sequence, to avoid unnecessary overhead. 389 if ((i = oldpos) < 0) { 390 // Write diff block directly to new file without adding old data, 391 // because we will skip part where |oldpos| < 0. 392 ret = ReadBZ2AndWriteAll(new_file, &dstream, -i, new_buf.data(), 393 new_buf.size()); 394 if (ret) 395 return ret; 396 i = 0; 397 } 398 399 // We just checked that |i| is not negative. 400 if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i)) { 401 fprintf(stderr, "Error seeking input file to offset %" PRId64 ": %s\n", i, 402 strerror(errno)); 403 return 1; 404 } 405 if ((old_file_pos = oldpos + ctrl[0]) > oldsize) 406 old_file_pos = oldsize; 407 408 size_t chunk_size = old_file_pos - i; 409 while (chunk_size > 0) { 410 size_t read_bytes; 411 size_t bytes_to_read = std::min(chunk_size, old_buf.size()); 412 if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes)) { 413 perror("Error reading from input file"); 414 return 1; 415 } 416 if (!read_bytes) { 417 fprintf(stderr, "EOF reached while reading from input file.\n"); 418 return 2; 419 } 420 // Read same amount of bytes from diff block 421 if (!ReadBZ2(&dstream, new_buf.data(), read_bytes)) { 422 fprintf(stderr, "Failed to read diff stream.\n"); 423 return 2; 424 } 425 // new_buf already has data from diff block, adds old data to it. 426 for (size_t k = 0; k < read_bytes; k++) 427 new_buf[k] += old_buf[k]; 428 if (!WriteAll(new_file, new_buf.data(), read_bytes)) { 429 perror("Error writing to new file"); 430 return 1; 431 } 432 chunk_size -= read_bytes; 433 } 434 435 // Adjust pointers. 436 newpos += ctrl[0]; 437 oldpos += ctrl[0]; 438 439 if (oldpos > static_cast<int64_t>(oldsize)) { 440 // Write diff block directly to new file without adding old data, 441 // because we skipped part where |oldpos| > oldsize. 442 ret = ReadBZ2AndWriteAll(new_file, &dstream, oldpos - oldsize, 443 new_buf.data(), new_buf.size()); 444 if (ret) 445 return ret; 446 } 447 448 // Sanity-check. 449 if (newpos + ctrl[1] > newsize) { 450 fprintf(stderr, "Corrupt patch.\n"); 451 return 2; 452 } 453 454 // Read extra block. 455 ret = ReadBZ2AndWriteAll(new_file, &estream, ctrl[1], new_buf.data(), 456 new_buf.size()); 457 if (ret) 458 return ret; 459 460 // Adjust pointers. 461 newpos += ctrl[1]; 462 oldpos += ctrl[2]; 463 } 464 465 // Close input file. 466 old_file->Close(); 467 468 // Clean up the bzip2 reads. 469 BZ2_bzDecompressEnd(&cstream); 470 BZ2_bzDecompressEnd(&dstream); 471 BZ2_bzDecompressEnd(&estream); 472 473 if (!new_file->Close()) { 474 perror("Error closing new file"); 475 return 1; 476 } 477 478 return 0; 479 } 480 481 } // namespace bsdiff 482