1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * This program constructs binary patches for images -- such as boot.img 19 * and recovery.img -- that consist primarily of large chunks of gzipped 20 * data interspersed with uncompressed data. Doing a naive bsdiff of 21 * these files is not useful because small changes in the data lead to 22 * large changes in the compressed bitstream; bsdiff patches of gzipped 23 * data are typically as large as the data itself. 24 * 25 * To patch these usefully, we break the source and target images up into 26 * chunks of two types: "normal" and "gzip". Normal chunks are simply 27 * patched using a plain bsdiff. Gzip chunks are first expanded, then a 28 * bsdiff is applied to the uncompressed data, then the patched data is 29 * gzipped using the same encoder parameters. Patched chunks are 30 * concatenated together to create the output file; the output image 31 * should be *exactly* the same series of bytes as the target image used 32 * originally to generate the patch. 33 * 34 * To work well with this tool, the gzipped sections of the target 35 * image must have been generated using the same deflate encoder that 36 * is available in applypatch, namely, the one in the zlib library. 37 * In practice this means that images should be compressed using the 38 * "minigzip" tool included in the zlib distribution, not the GNU gzip 39 * program. 40 * 41 * An "imgdiff" patch consists of a header describing the chunk structure 42 * of the file and any encoding parameters needed for the gzipped 43 * chunks, followed by N bsdiff patches, one per chunk. 44 * 45 * For a diff to be generated, the source and target images must have the 46 * same "chunk" structure: that is, the same number of gzipped and normal 47 * chunks in the same order. Android boot and recovery images currently 48 * consist of five chunks: a small normal header, a gzipped kernel, a 49 * small normal section, a gzipped ramdisk, and finally a small normal 50 * footer. 51 * 52 * Caveats: we locate gzipped sections within the source and target 53 * images by searching for the byte sequence 1f8b0800: 1f8b is the gzip 54 * magic number; 08 specifies the "deflate" encoding [the only encoding 55 * supported by the gzip standard]; and 00 is the flags byte. We do not 56 * currently support any extra header fields (which would be indicated by 57 * a nonzero flags byte). We also don't handle the case when that byte 58 * sequence appears spuriously in the file. (Note that it would have to 59 * occur spuriously within a normal chunk to be a problem.) 60 * 61 * 62 * The imgdiff patch header looks like this: 63 * 64 * "IMGDIFF1" (8) [magic number and version] 65 * chunk count (4) 66 * for each chunk: 67 * chunk type (4) [CHUNK_{NORMAL, GZIP, DEFLATE, RAW}] 68 * if chunk type == CHUNK_NORMAL: 69 * source start (8) 70 * source len (8) 71 * bsdiff patch offset (8) [from start of patch file] 72 * if chunk type == CHUNK_GZIP: (version 1 only) 73 * source start (8) 74 * source len (8) 75 * bsdiff patch offset (8) [from start of patch file] 76 * source expanded len (8) [size of uncompressed source] 77 * target expected len (8) [size of uncompressed target] 78 * gzip level (4) 79 * method (4) 80 * windowBits (4) 81 * memLevel (4) 82 * strategy (4) 83 * gzip header len (4) 84 * gzip header (gzip header len) 85 * gzip footer (8) 86 * if chunk type == CHUNK_DEFLATE: (version 2 only) 87 * source start (8) 88 * source len (8) 89 * bsdiff patch offset (8) [from start of patch file] 90 * source expanded len (8) [size of uncompressed source] 91 * target expected len (8) [size of uncompressed target] 92 * gzip level (4) 93 * method (4) 94 * windowBits (4) 95 * memLevel (4) 96 * strategy (4) 97 * if chunk type == RAW: (version 2 only) 98 * target len (4) 99 * data (target len) 100 * 101 * All integers are little-endian. "source start" and "source len" 102 * specify the section of the input image that comprises this chunk, 103 * including the gzip header and footer for gzip chunks. "source 104 * expanded len" is the size of the uncompressed source data. "target 105 * expected len" is the size of the uncompressed data after applying 106 * the bsdiff patch. The next five parameters specify the zlib 107 * parameters to be used when compressing the patched data, and the 108 * next three specify the header and footer to be wrapped around the 109 * compressed data to create the output chunk (so that header contents 110 * like the timestamp are recreated exactly). 111 * 112 * After the header there are 'chunk count' bsdiff patches; the offset 113 * of each from the beginning of the file is specified in the header. 114 * 115 * This tool can take an optional file of "bonus data". This is an 116 * extra file of data that is appended to chunk #1 after it is 117 * compressed (it must be a CHUNK_DEFLATE chunk). The same file must 118 * be available (and passed to applypatch with -b) when applying the 119 * patch. This is used to reduce the size of recovery-from-boot 120 * patches by combining the boot image with recovery ramdisk 121 * information that is stored on the system partition. 122 */ 123 124 #include <errno.h> 125 #include <stdio.h> 126 #include <stdlib.h> 127 #include <string.h> 128 #include <sys/stat.h> 129 #include <unistd.h> 130 #include <sys/types.h> 131 132 #include "zlib.h" 133 #include "imgdiff.h" 134 #include "utils.h" 135 136 typedef struct { 137 int type; // CHUNK_NORMAL, CHUNK_DEFLATE 138 size_t start; // offset of chunk in original image file 139 140 size_t len; 141 unsigned char* data; // data to be patched (uncompressed, for deflate chunks) 142 143 size_t source_start; 144 size_t source_len; 145 146 off_t* I; // used by bsdiff 147 148 // --- for CHUNK_DEFLATE chunks only: --- 149 150 // original (compressed) deflate data 151 size_t deflate_len; 152 unsigned char* deflate_data; 153 154 char* filename; // used for zip entries 155 156 // deflate encoder parameters 157 int level, method, windowBits, memLevel, strategy; 158 159 size_t source_uncompressed_len; 160 } ImageChunk; 161 162 typedef struct { 163 int data_offset; 164 int deflate_len; 165 int uncomp_len; 166 char* filename; 167 } ZipFileEntry; 168 169 static int fileentry_compare(const void* a, const void* b) { 170 int ao = ((ZipFileEntry*)a)->data_offset; 171 int bo = ((ZipFileEntry*)b)->data_offset; 172 if (ao < bo) { 173 return -1; 174 } else if (ao > bo) { 175 return 1; 176 } else { 177 return 0; 178 } 179 } 180 181 // from bsdiff.c 182 int bsdiff(u_char* old, off_t oldsize, off_t** IP, u_char* new, off_t newsize, 183 const char* patch_filename); 184 185 unsigned char* ReadZip(const char* filename, 186 int* num_chunks, ImageChunk** chunks, 187 int include_pseudo_chunk) { 188 struct stat st; 189 if (stat(filename, &st) != 0) { 190 printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); 191 return NULL; 192 } 193 194 unsigned char* img = malloc(st.st_size); 195 FILE* f = fopen(filename, "rb"); 196 if (fread(img, 1, st.st_size, f) != st.st_size) { 197 printf("failed to read \"%s\" %s\n", filename, strerror(errno)); 198 fclose(f); 199 return NULL; 200 } 201 fclose(f); 202 203 // look for the end-of-central-directory record. 204 205 int i; 206 for (i = st.st_size-20; i >= 0 && i > st.st_size - 65600; --i) { 207 if (img[i] == 0x50 && img[i+1] == 0x4b && 208 img[i+2] == 0x05 && img[i+3] == 0x06) { 209 break; 210 } 211 } 212 // double-check: this archive consists of a single "disk" 213 if (!(img[i+4] == 0 && img[i+5] == 0 && img[i+6] == 0 && img[i+7] == 0)) { 214 printf("can't process multi-disk archive\n"); 215 return NULL; 216 } 217 218 int cdcount = Read2(img+i+8); 219 int cdoffset = Read4(img+i+16); 220 221 ZipFileEntry* temp_entries = malloc(cdcount * sizeof(ZipFileEntry)); 222 int entrycount = 0; 223 224 unsigned char* cd = img+cdoffset; 225 for (i = 0; i < cdcount; ++i) { 226 if (!(cd[0] == 0x50 && cd[1] == 0x4b && cd[2] == 0x01 && cd[3] == 0x02)) { 227 printf("bad central directory entry %d\n", i); 228 return NULL; 229 } 230 231 int clen = Read4(cd+20); // compressed len 232 int ulen = Read4(cd+24); // uncompressed len 233 int nlen = Read2(cd+28); // filename len 234 int xlen = Read2(cd+30); // extra field len 235 int mlen = Read2(cd+32); // file comment len 236 int hoffset = Read4(cd+42); // local header offset 237 238 char* filename = malloc(nlen+1); 239 memcpy(filename, cd+46, nlen); 240 filename[nlen] = '\0'; 241 242 int method = Read2(cd+10); 243 244 cd += 46 + nlen + xlen + mlen; 245 246 if (method != 8) { // 8 == deflate 247 free(filename); 248 continue; 249 } 250 251 unsigned char* lh = img + hoffset; 252 253 if (!(lh[0] == 0x50 && lh[1] == 0x4b && lh[2] == 0x03 && lh[3] == 0x04)) { 254 printf("bad local file header entry %d\n", i); 255 return NULL; 256 } 257 258 if (Read2(lh+26) != nlen || memcmp(lh+30, filename, nlen) != 0) { 259 printf("central dir filename doesn't match local header\n"); 260 return NULL; 261 } 262 263 xlen = Read2(lh+28); // extra field len; might be different from CD entry? 264 265 temp_entries[entrycount].data_offset = hoffset+30+nlen+xlen; 266 temp_entries[entrycount].deflate_len = clen; 267 temp_entries[entrycount].uncomp_len = ulen; 268 temp_entries[entrycount].filename = filename; 269 ++entrycount; 270 } 271 272 qsort(temp_entries, entrycount, sizeof(ZipFileEntry), fileentry_compare); 273 274 #if 0 275 printf("found %d deflated entries\n", entrycount); 276 for (i = 0; i < entrycount; ++i) { 277 printf("off %10d len %10d unlen %10d %p %s\n", 278 temp_entries[i].data_offset, 279 temp_entries[i].deflate_len, 280 temp_entries[i].uncomp_len, 281 temp_entries[i].filename, 282 temp_entries[i].filename); 283 } 284 #endif 285 286 *num_chunks = 0; 287 *chunks = malloc((entrycount*2+2) * sizeof(ImageChunk)); 288 ImageChunk* curr = *chunks; 289 290 if (include_pseudo_chunk) { 291 curr->type = CHUNK_NORMAL; 292 curr->start = 0; 293 curr->len = st.st_size; 294 curr->data = img; 295 curr->filename = NULL; 296 curr->I = NULL; 297 ++curr; 298 ++*num_chunks; 299 } 300 301 int pos = 0; 302 int nextentry = 0; 303 304 while (pos < st.st_size) { 305 if (nextentry < entrycount && pos == temp_entries[nextentry].data_offset) { 306 curr->type = CHUNK_DEFLATE; 307 curr->start = pos; 308 curr->deflate_len = temp_entries[nextentry].deflate_len; 309 curr->deflate_data = img + pos; 310 curr->filename = temp_entries[nextentry].filename; 311 curr->I = NULL; 312 313 curr->len = temp_entries[nextentry].uncomp_len; 314 curr->data = malloc(curr->len); 315 316 z_stream strm; 317 strm.zalloc = Z_NULL; 318 strm.zfree = Z_NULL; 319 strm.opaque = Z_NULL; 320 strm.avail_in = curr->deflate_len; 321 strm.next_in = curr->deflate_data; 322 323 // -15 means we are decoding a 'raw' deflate stream; zlib will 324 // not expect zlib headers. 325 int ret = inflateInit2(&strm, -15); 326 327 strm.avail_out = curr->len; 328 strm.next_out = curr->data; 329 ret = inflate(&strm, Z_NO_FLUSH); 330 if (ret != Z_STREAM_END) { 331 printf("failed to inflate \"%s\"; %d\n", curr->filename, ret); 332 return NULL; 333 } 334 335 inflateEnd(&strm); 336 337 pos += curr->deflate_len; 338 ++nextentry; 339 ++*num_chunks; 340 ++curr; 341 continue; 342 } 343 344 // use a normal chunk to take all the data up to the start of the 345 // next deflate section. 346 347 curr->type = CHUNK_NORMAL; 348 curr->start = pos; 349 if (nextentry < entrycount) { 350 curr->len = temp_entries[nextentry].data_offset - pos; 351 } else { 352 curr->len = st.st_size - pos; 353 } 354 curr->data = img + pos; 355 curr->filename = NULL; 356 curr->I = NULL; 357 pos += curr->len; 358 359 ++*num_chunks; 360 ++curr; 361 } 362 363 free(temp_entries); 364 return img; 365 } 366 367 /* 368 * Read the given file and break it up into chunks, putting the number 369 * of chunks and their info in *num_chunks and **chunks, 370 * respectively. Returns a malloc'd block of memory containing the 371 * contents of the file; various pointers in the output chunk array 372 * will point into this block of memory. The caller should free the 373 * return value when done with all the chunks. Returns NULL on 374 * failure. 375 */ 376 unsigned char* ReadImage(const char* filename, 377 int* num_chunks, ImageChunk** chunks) { 378 struct stat st; 379 if (stat(filename, &st) != 0) { 380 printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); 381 return NULL; 382 } 383 384 unsigned char* img = malloc(st.st_size + 4); 385 FILE* f = fopen(filename, "rb"); 386 if (fread(img, 1, st.st_size, f) != st.st_size) { 387 printf("failed to read \"%s\" %s\n", filename, strerror(errno)); 388 fclose(f); 389 return NULL; 390 } 391 fclose(f); 392 393 // append 4 zero bytes to the data so we can always search for the 394 // four-byte string 1f8b0800 starting at any point in the actual 395 // file data, without special-casing the end of the data. 396 memset(img+st.st_size, 0, 4); 397 398 size_t pos = 0; 399 400 *num_chunks = 0; 401 *chunks = NULL; 402 403 while (pos < st.st_size) { 404 unsigned char* p = img+pos; 405 406 if (st.st_size - pos >= 4 && 407 p[0] == 0x1f && p[1] == 0x8b && 408 p[2] == 0x08 && // deflate compression 409 p[3] == 0x00) { // no header flags 410 // 'pos' is the offset of the start of a gzip chunk. 411 412 *num_chunks += 3; 413 *chunks = realloc(*chunks, *num_chunks * sizeof(ImageChunk)); 414 ImageChunk* curr = *chunks + (*num_chunks-3); 415 416 // create a normal chunk for the header. 417 curr->start = pos; 418 curr->type = CHUNK_NORMAL; 419 curr->len = GZIP_HEADER_LEN; 420 curr->data = p; 421 curr->I = NULL; 422 423 pos += curr->len; 424 p += curr->len; 425 ++curr; 426 427 curr->type = CHUNK_DEFLATE; 428 curr->filename = NULL; 429 curr->I = NULL; 430 431 // We must decompress this chunk in order to discover where it 432 // ends, and so we can put the uncompressed data and its length 433 // into curr->data and curr->len. 434 435 size_t allocated = 32768; 436 curr->len = 0; 437 curr->data = malloc(allocated); 438 curr->start = pos; 439 curr->deflate_data = p; 440 441 z_stream strm; 442 strm.zalloc = Z_NULL; 443 strm.zfree = Z_NULL; 444 strm.opaque = Z_NULL; 445 strm.avail_in = st.st_size - pos; 446 strm.next_in = p; 447 448 // -15 means we are decoding a 'raw' deflate stream; zlib will 449 // not expect zlib headers. 450 int ret = inflateInit2(&strm, -15); 451 452 do { 453 strm.avail_out = allocated - curr->len; 454 strm.next_out = curr->data + curr->len; 455 ret = inflate(&strm, Z_NO_FLUSH); 456 curr->len = allocated - strm.avail_out; 457 if (strm.avail_out == 0) { 458 allocated *= 2; 459 curr->data = realloc(curr->data, allocated); 460 } 461 } while (ret != Z_STREAM_END); 462 463 curr->deflate_len = st.st_size - strm.avail_in - pos; 464 inflateEnd(&strm); 465 pos += curr->deflate_len; 466 p += curr->deflate_len; 467 ++curr; 468 469 // create a normal chunk for the footer 470 471 curr->type = CHUNK_NORMAL; 472 curr->start = pos; 473 curr->len = GZIP_FOOTER_LEN; 474 curr->data = img+pos; 475 curr->I = NULL; 476 477 pos += curr->len; 478 p += curr->len; 479 ++curr; 480 481 // The footer (that we just skipped over) contains the size of 482 // the uncompressed data. Double-check to make sure that it 483 // matches the size of the data we got when we actually did 484 // the decompression. 485 size_t footer_size = Read4(p-4); 486 if (footer_size != curr[-2].len) { 487 printf("Error: footer size %d != decompressed size %d\n", 488 footer_size, curr[-2].len); 489 free(img); 490 return NULL; 491 } 492 } else { 493 // Reallocate the list for every chunk; we expect the number of 494 // chunks to be small (5 for typical boot and recovery images). 495 ++*num_chunks; 496 *chunks = realloc(*chunks, *num_chunks * sizeof(ImageChunk)); 497 ImageChunk* curr = *chunks + (*num_chunks-1); 498 curr->start = pos; 499 curr->I = NULL; 500 501 // 'pos' is not the offset of the start of a gzip chunk, so scan 502 // forward until we find a gzip header. 503 curr->type = CHUNK_NORMAL; 504 curr->data = p; 505 506 for (curr->len = 0; curr->len < (st.st_size - pos); ++curr->len) { 507 if (p[curr->len] == 0x1f && 508 p[curr->len+1] == 0x8b && 509 p[curr->len+2] == 0x08 && 510 p[curr->len+3] == 0x00) { 511 break; 512 } 513 } 514 pos += curr->len; 515 } 516 } 517 518 return img; 519 } 520 521 #define BUFFER_SIZE 32768 522 523 /* 524 * Takes the uncompressed data stored in the chunk, compresses it 525 * using the zlib parameters stored in the chunk, and checks that it 526 * matches exactly the compressed data we started with (also stored in 527 * the chunk). Return 0 on success. 528 */ 529 int TryReconstruction(ImageChunk* chunk, unsigned char* out) { 530 size_t p = 0; 531 532 #if 0 533 printf("trying %d %d %d %d %d\n", 534 chunk->level, chunk->method, chunk->windowBits, 535 chunk->memLevel, chunk->strategy); 536 #endif 537 538 z_stream strm; 539 strm.zalloc = Z_NULL; 540 strm.zfree = Z_NULL; 541 strm.opaque = Z_NULL; 542 strm.avail_in = chunk->len; 543 strm.next_in = chunk->data; 544 int ret; 545 ret = deflateInit2(&strm, chunk->level, chunk->method, chunk->windowBits, 546 chunk->memLevel, chunk->strategy); 547 do { 548 strm.avail_out = BUFFER_SIZE; 549 strm.next_out = out; 550 ret = deflate(&strm, Z_FINISH); 551 size_t have = BUFFER_SIZE - strm.avail_out; 552 553 if (memcmp(out, chunk->deflate_data+p, have) != 0) { 554 // mismatch; data isn't the same. 555 deflateEnd(&strm); 556 return -1; 557 } 558 p += have; 559 } while (ret != Z_STREAM_END); 560 deflateEnd(&strm); 561 if (p != chunk->deflate_len) { 562 // mismatch; ran out of data before we should have. 563 return -1; 564 } 565 return 0; 566 } 567 568 /* 569 * Verify that we can reproduce exactly the same compressed data that 570 * we started with. Sets the level, method, windowBits, memLevel, and 571 * strategy fields in the chunk to the encoding parameters needed to 572 * produce the right output. Returns 0 on success. 573 */ 574 int ReconstructDeflateChunk(ImageChunk* chunk) { 575 if (chunk->type != CHUNK_DEFLATE) { 576 printf("attempt to reconstruct non-deflate chunk\n"); 577 return -1; 578 } 579 580 size_t p = 0; 581 unsigned char* out = malloc(BUFFER_SIZE); 582 583 // We only check two combinations of encoder parameters: level 6 584 // (the default) and level 9 (the maximum). 585 for (chunk->level = 6; chunk->level <= 9; chunk->level += 3) { 586 chunk->windowBits = -15; // 32kb window; negative to indicate a raw stream. 587 chunk->memLevel = 8; // the default value. 588 chunk->method = Z_DEFLATED; 589 chunk->strategy = Z_DEFAULT_STRATEGY; 590 591 if (TryReconstruction(chunk, out) == 0) { 592 free(out); 593 return 0; 594 } 595 } 596 597 free(out); 598 return -1; 599 } 600 601 /* 602 * Given source and target chunks, compute a bsdiff patch between them 603 * by running bsdiff in a subprocess. Return the patch data, placing 604 * its length in *size. Return NULL on failure. We expect the bsdiff 605 * program to be in the path. 606 */ 607 unsigned char* MakePatch(ImageChunk* src, ImageChunk* tgt, size_t* size) { 608 if (tgt->type == CHUNK_NORMAL) { 609 if (tgt->len <= 160) { 610 tgt->type = CHUNK_RAW; 611 *size = tgt->len; 612 return tgt->data; 613 } 614 } 615 616 char ptemp[] = "/tmp/imgdiff-patch-XXXXXX"; 617 mkstemp(ptemp); 618 619 int r = bsdiff(src->data, src->len, &(src->I), tgt->data, tgt->len, ptemp); 620 if (r != 0) { 621 printf("bsdiff() failed: %d\n", r); 622 return NULL; 623 } 624 625 struct stat st; 626 if (stat(ptemp, &st) != 0) { 627 printf("failed to stat patch file %s: %s\n", 628 ptemp, strerror(errno)); 629 return NULL; 630 } 631 632 unsigned char* data = malloc(st.st_size); 633 634 if (tgt->type == CHUNK_NORMAL && tgt->len <= st.st_size) { 635 unlink(ptemp); 636 637 tgt->type = CHUNK_RAW; 638 *size = tgt->len; 639 return tgt->data; 640 } 641 642 *size = st.st_size; 643 644 FILE* f = fopen(ptemp, "rb"); 645 if (f == NULL) { 646 printf("failed to open patch %s: %s\n", ptemp, strerror(errno)); 647 return NULL; 648 } 649 if (fread(data, 1, st.st_size, f) != st.st_size) { 650 printf("failed to read patch %s: %s\n", ptemp, strerror(errno)); 651 return NULL; 652 } 653 fclose(f); 654 655 unlink(ptemp); 656 657 tgt->source_start = src->start; 658 switch (tgt->type) { 659 case CHUNK_NORMAL: 660 tgt->source_len = src->len; 661 break; 662 case CHUNK_DEFLATE: 663 tgt->source_len = src->deflate_len; 664 tgt->source_uncompressed_len = src->len; 665 break; 666 } 667 668 return data; 669 } 670 671 /* 672 * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob 673 * of uninterpreted data). The resulting patch will likely be about 674 * as big as the target file, but it lets us handle the case of images 675 * where some gzip chunks are reconstructible but others aren't (by 676 * treating the ones that aren't as normal chunks). 677 */ 678 void ChangeDeflateChunkToNormal(ImageChunk* ch) { 679 if (ch->type != CHUNK_DEFLATE) return; 680 ch->type = CHUNK_NORMAL; 681 free(ch->data); 682 ch->data = ch->deflate_data; 683 ch->len = ch->deflate_len; 684 } 685 686 /* 687 * Return true if the data in the chunk is identical (including the 688 * compressed representation, for gzip chunks). 689 */ 690 int AreChunksEqual(ImageChunk* a, ImageChunk* b) { 691 if (a->type != b->type) return 0; 692 693 switch (a->type) { 694 case CHUNK_NORMAL: 695 return a->len == b->len && memcmp(a->data, b->data, a->len) == 0; 696 697 case CHUNK_DEFLATE: 698 return a->deflate_len == b->deflate_len && 699 memcmp(a->deflate_data, b->deflate_data, a->deflate_len) == 0; 700 701 default: 702 printf("unknown chunk type %d\n", a->type); 703 return 0; 704 } 705 } 706 707 /* 708 * Look for runs of adjacent normal chunks and compress them down into 709 * a single chunk. (Such runs can be produced when deflate chunks are 710 * changed to normal chunks.) 711 */ 712 void MergeAdjacentNormalChunks(ImageChunk* chunks, int* num_chunks) { 713 int out = 0; 714 int in_start = 0, in_end; 715 while (in_start < *num_chunks) { 716 if (chunks[in_start].type != CHUNK_NORMAL) { 717 in_end = in_start+1; 718 } else { 719 // in_start is a normal chunk. Look for a run of normal chunks 720 // that constitute a solid block of data (ie, each chunk begins 721 // where the previous one ended). 722 for (in_end = in_start+1; 723 in_end < *num_chunks && chunks[in_end].type == CHUNK_NORMAL && 724 (chunks[in_end].start == 725 chunks[in_end-1].start + chunks[in_end-1].len && 726 chunks[in_end].data == 727 chunks[in_end-1].data + chunks[in_end-1].len); 728 ++in_end); 729 } 730 731 if (in_end == in_start+1) { 732 #if 0 733 printf("chunk %d is now %d\n", in_start, out); 734 #endif 735 if (out != in_start) { 736 memcpy(chunks+out, chunks+in_start, sizeof(ImageChunk)); 737 } 738 } else { 739 #if 0 740 printf("collapse normal chunks %d-%d into %d\n", in_start, in_end-1, out); 741 #endif 742 743 // Merge chunks [in_start, in_end-1] into one chunk. Since the 744 // data member of each chunk is just a pointer into an in-memory 745 // copy of the file, this can be done without recopying (the 746 // output chunk has the first chunk's start location and data 747 // pointer, and length equal to the sum of the input chunk 748 // lengths). 749 chunks[out].type = CHUNK_NORMAL; 750 chunks[out].start = chunks[in_start].start; 751 chunks[out].data = chunks[in_start].data; 752 chunks[out].len = chunks[in_end-1].len + 753 (chunks[in_end-1].start - chunks[in_start].start); 754 } 755 756 ++out; 757 in_start = in_end; 758 } 759 *num_chunks = out; 760 } 761 762 ImageChunk* FindChunkByName(const char* name, 763 ImageChunk* chunks, int num_chunks) { 764 int i; 765 for (i = 0; i < num_chunks; ++i) { 766 if (chunks[i].type == CHUNK_DEFLATE && chunks[i].filename && 767 strcmp(name, chunks[i].filename) == 0) { 768 return chunks+i; 769 } 770 } 771 return NULL; 772 } 773 774 void DumpChunks(ImageChunk* chunks, int num_chunks) { 775 int i; 776 for (i = 0; i < num_chunks; ++i) { 777 printf("chunk %d: type %d start %d len %d\n", 778 i, chunks[i].type, chunks[i].start, chunks[i].len); 779 } 780 } 781 782 int main(int argc, char** argv) { 783 int zip_mode = 0; 784 785 if (argc >= 2 && strcmp(argv[1], "-z") == 0) { 786 zip_mode = 1; 787 --argc; 788 ++argv; 789 } 790 791 size_t bonus_size = 0; 792 unsigned char* bonus_data = NULL; 793 if (argc >= 3 && strcmp(argv[1], "-b") == 0) { 794 struct stat st; 795 if (stat(argv[2], &st) != 0) { 796 printf("failed to stat bonus file %s: %s\n", argv[2], strerror(errno)); 797 return 1; 798 } 799 bonus_size = st.st_size; 800 bonus_data = malloc(bonus_size); 801 FILE* f = fopen(argv[2], "rb"); 802 if (f == NULL) { 803 printf("failed to open bonus file %s: %s\n", argv[2], strerror(errno)); 804 return 1; 805 } 806 if (fread(bonus_data, 1, bonus_size, f) != bonus_size) { 807 printf("failed to read bonus file %s: %s\n", argv[2], strerror(errno)); 808 return 1; 809 } 810 fclose(f); 811 812 argc -= 2; 813 argv += 2; 814 } 815 816 if (argc != 4) { 817 usage: 818 printf("usage: %s [-z] [-b <bonus-file>] <src-img> <tgt-img> <patch-file>\n", 819 argv[0]); 820 return 2; 821 } 822 823 int num_src_chunks; 824 ImageChunk* src_chunks; 825 int num_tgt_chunks; 826 ImageChunk* tgt_chunks; 827 int i; 828 829 if (zip_mode) { 830 if (ReadZip(argv[1], &num_src_chunks, &src_chunks, 1) == NULL) { 831 printf("failed to break apart source zip file\n"); 832 return 1; 833 } 834 if (ReadZip(argv[2], &num_tgt_chunks, &tgt_chunks, 0) == NULL) { 835 printf("failed to break apart target zip file\n"); 836 return 1; 837 } 838 } else { 839 if (ReadImage(argv[1], &num_src_chunks, &src_chunks) == NULL) { 840 printf("failed to break apart source image\n"); 841 return 1; 842 } 843 if (ReadImage(argv[2], &num_tgt_chunks, &tgt_chunks) == NULL) { 844 printf("failed to break apart target image\n"); 845 return 1; 846 } 847 848 // Verify that the source and target images have the same chunk 849 // structure (ie, the same sequence of deflate and normal chunks). 850 851 if (!zip_mode) { 852 // Merge the gzip header and footer in with any adjacent 853 // normal chunks. 854 MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks); 855 MergeAdjacentNormalChunks(src_chunks, &num_src_chunks); 856 } 857 858 if (num_src_chunks != num_tgt_chunks) { 859 printf("source and target don't have same number of chunks!\n"); 860 printf("source chunks:\n"); 861 DumpChunks(src_chunks, num_src_chunks); 862 printf("target chunks:\n"); 863 DumpChunks(tgt_chunks, num_tgt_chunks); 864 return 1; 865 } 866 for (i = 0; i < num_src_chunks; ++i) { 867 if (src_chunks[i].type != tgt_chunks[i].type) { 868 printf("source and target don't have same chunk " 869 "structure! (chunk %d)\n", i); 870 printf("source chunks:\n"); 871 DumpChunks(src_chunks, num_src_chunks); 872 printf("target chunks:\n"); 873 DumpChunks(tgt_chunks, num_tgt_chunks); 874 return 1; 875 } 876 } 877 } 878 879 for (i = 0; i < num_tgt_chunks; ++i) { 880 if (tgt_chunks[i].type == CHUNK_DEFLATE) { 881 // Confirm that given the uncompressed chunk data in the target, we 882 // can recompress it and get exactly the same bits as are in the 883 // input target image. If this fails, treat the chunk as a normal 884 // non-deflated chunk. 885 if (ReconstructDeflateChunk(tgt_chunks+i) < 0) { 886 printf("failed to reconstruct target deflate chunk %d [%s]; " 887 "treating as normal\n", i, tgt_chunks[i].filename); 888 ChangeDeflateChunkToNormal(tgt_chunks+i); 889 if (zip_mode) { 890 ImageChunk* src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks); 891 if (src) { 892 ChangeDeflateChunkToNormal(src); 893 } 894 } else { 895 ChangeDeflateChunkToNormal(src_chunks+i); 896 } 897 continue; 898 } 899 900 // If two deflate chunks are identical (eg, the kernel has not 901 // changed between two builds), treat them as normal chunks. 902 // This makes applypatch much faster -- it can apply a trivial 903 // patch to the compressed data, rather than uncompressing and 904 // recompressing to apply the trivial patch to the uncompressed 905 // data. 906 ImageChunk* src; 907 if (zip_mode) { 908 src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks); 909 } else { 910 src = src_chunks+i; 911 } 912 913 if (src == NULL || AreChunksEqual(tgt_chunks+i, src)) { 914 ChangeDeflateChunkToNormal(tgt_chunks+i); 915 if (src) { 916 ChangeDeflateChunkToNormal(src); 917 } 918 } 919 } 920 } 921 922 // Merging neighboring normal chunks. 923 if (zip_mode) { 924 // For zips, we only need to do this to the target: deflated 925 // chunks are matched via filename, and normal chunks are patched 926 // using the entire source file as the source. 927 MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks); 928 } else { 929 // For images, we need to maintain the parallel structure of the 930 // chunk lists, so do the merging in both the source and target 931 // lists. 932 MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks); 933 MergeAdjacentNormalChunks(src_chunks, &num_src_chunks); 934 if (num_src_chunks != num_tgt_chunks) { 935 // This shouldn't happen. 936 printf("merging normal chunks went awry\n"); 937 return 1; 938 } 939 } 940 941 // Compute bsdiff patches for each chunk's data (the uncompressed 942 // data, in the case of deflate chunks). 943 944 DumpChunks(src_chunks, num_src_chunks); 945 946 printf("Construct patches for %d chunks...\n", num_tgt_chunks); 947 unsigned char** patch_data = malloc(num_tgt_chunks * sizeof(unsigned char*)); 948 size_t* patch_size = malloc(num_tgt_chunks * sizeof(size_t)); 949 for (i = 0; i < num_tgt_chunks; ++i) { 950 if (zip_mode) { 951 ImageChunk* src; 952 if (tgt_chunks[i].type == CHUNK_DEFLATE && 953 (src = FindChunkByName(tgt_chunks[i].filename, src_chunks, 954 num_src_chunks))) { 955 patch_data[i] = MakePatch(src, tgt_chunks+i, patch_size+i); 956 } else { 957 patch_data[i] = MakePatch(src_chunks, tgt_chunks+i, patch_size+i); 958 } 959 } else { 960 if (i == 1 && bonus_data) { 961 printf(" using %d bytes of bonus data for chunk %d\n", bonus_size, i); 962 src_chunks[i].data = realloc(src_chunks[i].data, src_chunks[i].len + bonus_size); 963 memcpy(src_chunks[i].data+src_chunks[i].len, bonus_data, bonus_size); 964 src_chunks[i].len += bonus_size; 965 } 966 967 patch_data[i] = MakePatch(src_chunks+i, tgt_chunks+i, patch_size+i); 968 } 969 printf("patch %3d is %d bytes (of %d)\n", 970 i, patch_size[i], tgt_chunks[i].source_len); 971 } 972 973 // Figure out how big the imgdiff file header is going to be, so 974 // that we can correctly compute the offset of each bsdiff patch 975 // within the file. 976 977 size_t total_header_size = 12; 978 for (i = 0; i < num_tgt_chunks; ++i) { 979 total_header_size += 4; 980 switch (tgt_chunks[i].type) { 981 case CHUNK_NORMAL: 982 total_header_size += 8*3; 983 break; 984 case CHUNK_DEFLATE: 985 total_header_size += 8*5 + 4*5; 986 break; 987 case CHUNK_RAW: 988 total_header_size += 4 + patch_size[i]; 989 break; 990 } 991 } 992 993 size_t offset = total_header_size; 994 995 FILE* f = fopen(argv[3], "wb"); 996 997 // Write out the headers. 998 999 fwrite("IMGDIFF2", 1, 8, f); 1000 Write4(num_tgt_chunks, f); 1001 for (i = 0; i < num_tgt_chunks; ++i) { 1002 Write4(tgt_chunks[i].type, f); 1003 1004 switch (tgt_chunks[i].type) { 1005 case CHUNK_NORMAL: 1006 printf("chunk %3d: normal (%10d, %10d) %10d\n", i, 1007 tgt_chunks[i].start, tgt_chunks[i].len, patch_size[i]); 1008 Write8(tgt_chunks[i].source_start, f); 1009 Write8(tgt_chunks[i].source_len, f); 1010 Write8(offset, f); 1011 offset += patch_size[i]; 1012 break; 1013 1014 case CHUNK_DEFLATE: 1015 printf("chunk %3d: deflate (%10d, %10d) %10d %s\n", i, 1016 tgt_chunks[i].start, tgt_chunks[i].deflate_len, patch_size[i], 1017 tgt_chunks[i].filename); 1018 Write8(tgt_chunks[i].source_start, f); 1019 Write8(tgt_chunks[i].source_len, f); 1020 Write8(offset, f); 1021 Write8(tgt_chunks[i].source_uncompressed_len, f); 1022 Write8(tgt_chunks[i].len, f); 1023 Write4(tgt_chunks[i].level, f); 1024 Write4(tgt_chunks[i].method, f); 1025 Write4(tgt_chunks[i].windowBits, f); 1026 Write4(tgt_chunks[i].memLevel, f); 1027 Write4(tgt_chunks[i].strategy, f); 1028 offset += patch_size[i]; 1029 break; 1030 1031 case CHUNK_RAW: 1032 printf("chunk %3d: raw (%10d, %10d)\n", i, 1033 tgt_chunks[i].start, tgt_chunks[i].len); 1034 Write4(patch_size[i], f); 1035 fwrite(patch_data[i], 1, patch_size[i], f); 1036 break; 1037 } 1038 } 1039 1040 // Append each chunk's bsdiff patch, in order. 1041 1042 for (i = 0; i < num_tgt_chunks; ++i) { 1043 if (tgt_chunks[i].type != CHUNK_RAW) { 1044 fwrite(patch_data[i], 1, patch_size[i], f); 1045 } 1046 } 1047 1048 fclose(f); 1049 1050 return 0; 1051 } 1052