1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * This program constructs binary patches for images -- such as boot.img 19 * and recovery.img -- that consist primarily of large chunks of gzipped 20 * data interspersed with uncompressed data. Doing a naive bsdiff of 21 * these files is not useful because small changes in the data lead to 22 * large changes in the compressed bitstream; bsdiff patches of gzipped 23 * data are typically as large as the data itself. 24 * 25 * To patch these usefully, we break the source and target images up into 26 * chunks of two types: "normal" and "gzip". Normal chunks are simply 27 * patched using a plain bsdiff. Gzip chunks are first expanded, then a 28 * bsdiff is applied to the uncompressed data, then the patched data is 29 * gzipped using the same encoder parameters. Patched chunks are 30 * concatenated together to create the output file; the output image 31 * should be *exactly* the same series of bytes as the target image used 32 * originally to generate the patch. 33 * 34 * To work well with this tool, the gzipped sections of the target 35 * image must have been generated using the same deflate encoder that 36 * is available in applypatch, namely, the one in the zlib library. 37 * In practice this means that images should be compressed using the 38 * "minigzip" tool included in the zlib distribution, not the GNU gzip 39 * program. 40 * 41 * An "imgdiff" patch consists of a header describing the chunk structure 42 * of the file and any encoding parameters needed for the gzipped 43 * chunks, followed by N bsdiff patches, one per chunk. 44 * 45 * For a diff to be generated, the source and target images must have the 46 * same "chunk" structure: that is, the same number of gzipped and normal 47 * chunks in the same order. Android boot and recovery images currently 48 * consist of five chunks: a small normal header, a gzipped kernel, a 49 * small normal section, a gzipped ramdisk, and finally a small normal 50 * footer. 51 * 52 * Caveats: we locate gzipped sections within the source and target 53 * images by searching for the byte sequence 1f8b0800: 1f8b is the gzip 54 * magic number; 08 specifies the "deflate" encoding [the only encoding 55 * supported by the gzip standard]; and 00 is the flags byte. We do not 56 * currently support any extra header fields (which would be indicated by 57 * a nonzero flags byte). We also don't handle the case when that byte 58 * sequence appears spuriously in the file. (Note that it would have to 59 * occur spuriously within a normal chunk to be a problem.) 60 * 61 * 62 * The imgdiff patch header looks like this: 63 * 64 * "IMGDIFF1" (8) [magic number and version] 65 * chunk count (4) 66 * for each chunk: 67 * chunk type (4) [CHUNK_{NORMAL, GZIP, DEFLATE, RAW}] 68 * if chunk type == CHUNK_NORMAL: 69 * source start (8) 70 * source len (8) 71 * bsdiff patch offset (8) [from start of patch file] 72 * if chunk type == CHUNK_GZIP: (version 1 only) 73 * source start (8) 74 * source len (8) 75 * bsdiff patch offset (8) [from start of patch file] 76 * source expanded len (8) [size of uncompressed source] 77 * target expected len (8) [size of uncompressed target] 78 * gzip level (4) 79 * method (4) 80 * windowBits (4) 81 * memLevel (4) 82 * strategy (4) 83 * gzip header len (4) 84 * gzip header (gzip header len) 85 * gzip footer (8) 86 * if chunk type == CHUNK_DEFLATE: (version 2 only) 87 * source start (8) 88 * source len (8) 89 * bsdiff patch offset (8) [from start of patch file] 90 * source expanded len (8) [size of uncompressed source] 91 * target expected len (8) [size of uncompressed target] 92 * gzip level (4) 93 * method (4) 94 * windowBits (4) 95 * memLevel (4) 96 * strategy (4) 97 * if chunk type == RAW: (version 2 only) 98 * target len (4) 99 * data (target len) 100 * 101 * All integers are little-endian. "source start" and "source len" 102 * specify the section of the input image that comprises this chunk, 103 * including the gzip header and footer for gzip chunks. "source 104 * expanded len" is the size of the uncompressed source data. "target 105 * expected len" is the size of the uncompressed data after applying 106 * the bsdiff patch. The next five parameters specify the zlib 107 * parameters to be used when compressing the patched data, and the 108 * next three specify the header and footer to be wrapped around the 109 * compressed data to create the output chunk (so that header contents 110 * like the timestamp are recreated exactly). 111 * 112 * After the header there are 'chunk count' bsdiff patches; the offset 113 * of each from the beginning of the file is specified in the header. 114 */ 115 116 #include <errno.h> 117 #include <stdio.h> 118 #include <stdlib.h> 119 #include <string.h> 120 #include <sys/stat.h> 121 #include <unistd.h> 122 #include <sys/types.h> 123 124 #include "zlib.h" 125 #include "imgdiff.h" 126 #include "utils.h" 127 128 typedef struct { 129 int type; // CHUNK_NORMAL, CHUNK_DEFLATE 130 size_t start; // offset of chunk in original image file 131 132 size_t len; 133 unsigned char* data; // data to be patched (uncompressed, for deflate chunks) 134 135 size_t source_start; 136 size_t source_len; 137 138 off_t* I; // used by bsdiff 139 140 // --- for CHUNK_DEFLATE chunks only: --- 141 142 // original (compressed) deflate data 143 size_t deflate_len; 144 unsigned char* deflate_data; 145 146 char* filename; // used for zip entries 147 148 // deflate encoder parameters 149 int level, method, windowBits, memLevel, strategy; 150 151 size_t source_uncompressed_len; 152 } ImageChunk; 153 154 typedef struct { 155 int data_offset; 156 int deflate_len; 157 int uncomp_len; 158 char* filename; 159 } ZipFileEntry; 160 161 static int fileentry_compare(const void* a, const void* b) { 162 int ao = ((ZipFileEntry*)a)->data_offset; 163 int bo = ((ZipFileEntry*)b)->data_offset; 164 if (ao < bo) { 165 return -1; 166 } else if (ao > bo) { 167 return 1; 168 } else { 169 return 0; 170 } 171 } 172 173 // from bsdiff.c 174 int bsdiff(u_char* old, off_t oldsize, off_t** IP, u_char* new, off_t newsize, 175 const char* patch_filename); 176 177 unsigned char* ReadZip(const char* filename, 178 int* num_chunks, ImageChunk** chunks, 179 int include_pseudo_chunk) { 180 struct stat st; 181 if (stat(filename, &st) != 0) { 182 printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); 183 return NULL; 184 } 185 186 unsigned char* img = malloc(st.st_size); 187 FILE* f = fopen(filename, "rb"); 188 if (fread(img, 1, st.st_size, f) != st.st_size) { 189 printf("failed to read \"%s\" %s\n", filename, strerror(errno)); 190 fclose(f); 191 return NULL; 192 } 193 fclose(f); 194 195 // look for the end-of-central-directory record. 196 197 int i; 198 for (i = st.st_size-20; i >= 0 && i > st.st_size - 65600; --i) { 199 if (img[i] == 0x50 && img[i+1] == 0x4b && 200 img[i+2] == 0x05 && img[i+3] == 0x06) { 201 break; 202 } 203 } 204 // double-check: this archive consists of a single "disk" 205 if (!(img[i+4] == 0 && img[i+5] == 0 && img[i+6] == 0 && img[i+7] == 0)) { 206 printf("can't process multi-disk archive\n"); 207 return NULL; 208 } 209 210 int cdcount = Read2(img+i+8); 211 int cdoffset = Read4(img+i+16); 212 213 ZipFileEntry* temp_entries = malloc(cdcount * sizeof(ZipFileEntry)); 214 int entrycount = 0; 215 216 unsigned char* cd = img+cdoffset; 217 for (i = 0; i < cdcount; ++i) { 218 if (!(cd[0] == 0x50 && cd[1] == 0x4b && cd[2] == 0x01 && cd[3] == 0x02)) { 219 printf("bad central directory entry %d\n", i); 220 return NULL; 221 } 222 223 int clen = Read4(cd+20); // compressed len 224 int ulen = Read4(cd+24); // uncompressed len 225 int nlen = Read2(cd+28); // filename len 226 int xlen = Read2(cd+30); // extra field len 227 int mlen = Read2(cd+32); // file comment len 228 int hoffset = Read4(cd+42); // local header offset 229 230 char* filename = malloc(nlen+1); 231 memcpy(filename, cd+46, nlen); 232 filename[nlen] = '\0'; 233 234 int method = Read2(cd+10); 235 236 cd += 46 + nlen + xlen + mlen; 237 238 if (method != 8) { // 8 == deflate 239 free(filename); 240 continue; 241 } 242 243 unsigned char* lh = img + hoffset; 244 245 if (!(lh[0] == 0x50 && lh[1] == 0x4b && lh[2] == 0x03 && lh[3] == 0x04)) { 246 printf("bad local file header entry %d\n", i); 247 return NULL; 248 } 249 250 if (Read2(lh+26) != nlen || memcmp(lh+30, filename, nlen) != 0) { 251 printf("central dir filename doesn't match local header\n"); 252 return NULL; 253 } 254 255 xlen = Read2(lh+28); // extra field len; might be different from CD entry? 256 257 temp_entries[entrycount].data_offset = hoffset+30+nlen+xlen; 258 temp_entries[entrycount].deflate_len = clen; 259 temp_entries[entrycount].uncomp_len = ulen; 260 temp_entries[entrycount].filename = filename; 261 ++entrycount; 262 } 263 264 qsort(temp_entries, entrycount, sizeof(ZipFileEntry), fileentry_compare); 265 266 #if 0 267 printf("found %d deflated entries\n", entrycount); 268 for (i = 0; i < entrycount; ++i) { 269 printf("off %10d len %10d unlen %10d %p %s\n", 270 temp_entries[i].data_offset, 271 temp_entries[i].deflate_len, 272 temp_entries[i].uncomp_len, 273 temp_entries[i].filename, 274 temp_entries[i].filename); 275 } 276 #endif 277 278 *num_chunks = 0; 279 *chunks = malloc((entrycount*2+2) * sizeof(ImageChunk)); 280 ImageChunk* curr = *chunks; 281 282 if (include_pseudo_chunk) { 283 curr->type = CHUNK_NORMAL; 284 curr->start = 0; 285 curr->len = st.st_size; 286 curr->data = img; 287 curr->filename = NULL; 288 curr->I = NULL; 289 ++curr; 290 ++*num_chunks; 291 } 292 293 int pos = 0; 294 int nextentry = 0; 295 296 while (pos < st.st_size) { 297 if (nextentry < entrycount && pos == temp_entries[nextentry].data_offset) { 298 curr->type = CHUNK_DEFLATE; 299 curr->start = pos; 300 curr->deflate_len = temp_entries[nextentry].deflate_len; 301 curr->deflate_data = img + pos; 302 curr->filename = temp_entries[nextentry].filename; 303 curr->I = NULL; 304 305 curr->len = temp_entries[nextentry].uncomp_len; 306 curr->data = malloc(curr->len); 307 308 z_stream strm; 309 strm.zalloc = Z_NULL; 310 strm.zfree = Z_NULL; 311 strm.opaque = Z_NULL; 312 strm.avail_in = curr->deflate_len; 313 strm.next_in = curr->deflate_data; 314 315 // -15 means we are decoding a 'raw' deflate stream; zlib will 316 // not expect zlib headers. 317 int ret = inflateInit2(&strm, -15); 318 319 strm.avail_out = curr->len; 320 strm.next_out = curr->data; 321 ret = inflate(&strm, Z_NO_FLUSH); 322 if (ret != Z_STREAM_END) { 323 printf("failed to inflate \"%s\"; %d\n", curr->filename, ret); 324 return NULL; 325 } 326 327 inflateEnd(&strm); 328 329 pos += curr->deflate_len; 330 ++nextentry; 331 ++*num_chunks; 332 ++curr; 333 continue; 334 } 335 336 // use a normal chunk to take all the data up to the start of the 337 // next deflate section. 338 339 curr->type = CHUNK_NORMAL; 340 curr->start = pos; 341 if (nextentry < entrycount) { 342 curr->len = temp_entries[nextentry].data_offset - pos; 343 } else { 344 curr->len = st.st_size - pos; 345 } 346 curr->data = img + pos; 347 curr->filename = NULL; 348 curr->I = NULL; 349 pos += curr->len; 350 351 ++*num_chunks; 352 ++curr; 353 } 354 355 free(temp_entries); 356 return img; 357 } 358 359 /* 360 * Read the given file and break it up into chunks, putting the number 361 * of chunks and their info in *num_chunks and **chunks, 362 * respectively. Returns a malloc'd block of memory containing the 363 * contents of the file; various pointers in the output chunk array 364 * will point into this block of memory. The caller should free the 365 * return value when done with all the chunks. Returns NULL on 366 * failure. 367 */ 368 unsigned char* ReadImage(const char* filename, 369 int* num_chunks, ImageChunk** chunks) { 370 struct stat st; 371 if (stat(filename, &st) != 0) { 372 printf("failed to stat \"%s\": %s\n", filename, strerror(errno)); 373 return NULL; 374 } 375 376 unsigned char* img = malloc(st.st_size + 4); 377 FILE* f = fopen(filename, "rb"); 378 if (fread(img, 1, st.st_size, f) != st.st_size) { 379 printf("failed to read \"%s\" %s\n", filename, strerror(errno)); 380 fclose(f); 381 return NULL; 382 } 383 fclose(f); 384 385 // append 4 zero bytes to the data so we can always search for the 386 // four-byte string 1f8b0800 starting at any point in the actual 387 // file data, without special-casing the end of the data. 388 memset(img+st.st_size, 0, 4); 389 390 size_t pos = 0; 391 392 *num_chunks = 0; 393 *chunks = NULL; 394 395 while (pos < st.st_size) { 396 unsigned char* p = img+pos; 397 398 if (st.st_size - pos >= 4 && 399 p[0] == 0x1f && p[1] == 0x8b && 400 p[2] == 0x08 && // deflate compression 401 p[3] == 0x00) { // no header flags 402 // 'pos' is the offset of the start of a gzip chunk. 403 404 *num_chunks += 3; 405 *chunks = realloc(*chunks, *num_chunks * sizeof(ImageChunk)); 406 ImageChunk* curr = *chunks + (*num_chunks-3); 407 408 // create a normal chunk for the header. 409 curr->start = pos; 410 curr->type = CHUNK_NORMAL; 411 curr->len = GZIP_HEADER_LEN; 412 curr->data = p; 413 curr->I = NULL; 414 415 pos += curr->len; 416 p += curr->len; 417 ++curr; 418 419 curr->type = CHUNK_DEFLATE; 420 curr->filename = NULL; 421 curr->I = NULL; 422 423 // We must decompress this chunk in order to discover where it 424 // ends, and so we can put the uncompressed data and its length 425 // into curr->data and curr->len. 426 427 size_t allocated = 32768; 428 curr->len = 0; 429 curr->data = malloc(allocated); 430 curr->start = pos; 431 curr->deflate_data = p; 432 433 z_stream strm; 434 strm.zalloc = Z_NULL; 435 strm.zfree = Z_NULL; 436 strm.opaque = Z_NULL; 437 strm.avail_in = st.st_size - pos; 438 strm.next_in = p; 439 440 // -15 means we are decoding a 'raw' deflate stream; zlib will 441 // not expect zlib headers. 442 int ret = inflateInit2(&strm, -15); 443 444 do { 445 strm.avail_out = allocated - curr->len; 446 strm.next_out = curr->data + curr->len; 447 ret = inflate(&strm, Z_NO_FLUSH); 448 curr->len = allocated - strm.avail_out; 449 if (strm.avail_out == 0) { 450 allocated *= 2; 451 curr->data = realloc(curr->data, allocated); 452 } 453 } while (ret != Z_STREAM_END); 454 455 curr->deflate_len = st.st_size - strm.avail_in - pos; 456 inflateEnd(&strm); 457 pos += curr->deflate_len; 458 p += curr->deflate_len; 459 ++curr; 460 461 // create a normal chunk for the footer 462 463 curr->type = CHUNK_NORMAL; 464 curr->start = pos; 465 curr->len = GZIP_FOOTER_LEN; 466 curr->data = img+pos; 467 curr->I = NULL; 468 469 pos += curr->len; 470 p += curr->len; 471 ++curr; 472 473 // The footer (that we just skipped over) contains the size of 474 // the uncompressed data. Double-check to make sure that it 475 // matches the size of the data we got when we actually did 476 // the decompression. 477 size_t footer_size = Read4(p-4); 478 if (footer_size != curr[-2].len) { 479 printf("Error: footer size %d != decompressed size %d\n", 480 footer_size, curr[-2].len); 481 free(img); 482 return NULL; 483 } 484 } else { 485 // Reallocate the list for every chunk; we expect the number of 486 // chunks to be small (5 for typical boot and recovery images). 487 ++*num_chunks; 488 *chunks = realloc(*chunks, *num_chunks * sizeof(ImageChunk)); 489 ImageChunk* curr = *chunks + (*num_chunks-1); 490 curr->start = pos; 491 curr->I = NULL; 492 493 // 'pos' is not the offset of the start of a gzip chunk, so scan 494 // forward until we find a gzip header. 495 curr->type = CHUNK_NORMAL; 496 curr->data = p; 497 498 for (curr->len = 0; curr->len < (st.st_size - pos); ++curr->len) { 499 if (p[curr->len] == 0x1f && 500 p[curr->len+1] == 0x8b && 501 p[curr->len+2] == 0x08 && 502 p[curr->len+3] == 0x00) { 503 break; 504 } 505 } 506 pos += curr->len; 507 } 508 } 509 510 return img; 511 } 512 513 #define BUFFER_SIZE 32768 514 515 /* 516 * Takes the uncompressed data stored in the chunk, compresses it 517 * using the zlib parameters stored in the chunk, and checks that it 518 * matches exactly the compressed data we started with (also stored in 519 * the chunk). Return 0 on success. 520 */ 521 int TryReconstruction(ImageChunk* chunk, unsigned char* out) { 522 size_t p = 0; 523 524 #if 0 525 printf("trying %d %d %d %d %d\n", 526 chunk->level, chunk->method, chunk->windowBits, 527 chunk->memLevel, chunk->strategy); 528 #endif 529 530 z_stream strm; 531 strm.zalloc = Z_NULL; 532 strm.zfree = Z_NULL; 533 strm.opaque = Z_NULL; 534 strm.avail_in = chunk->len; 535 strm.next_in = chunk->data; 536 int ret; 537 ret = deflateInit2(&strm, chunk->level, chunk->method, chunk->windowBits, 538 chunk->memLevel, chunk->strategy); 539 do { 540 strm.avail_out = BUFFER_SIZE; 541 strm.next_out = out; 542 ret = deflate(&strm, Z_FINISH); 543 size_t have = BUFFER_SIZE - strm.avail_out; 544 545 if (memcmp(out, chunk->deflate_data+p, have) != 0) { 546 // mismatch; data isn't the same. 547 deflateEnd(&strm); 548 return -1; 549 } 550 p += have; 551 } while (ret != Z_STREAM_END); 552 deflateEnd(&strm); 553 if (p != chunk->deflate_len) { 554 // mismatch; ran out of data before we should have. 555 return -1; 556 } 557 return 0; 558 } 559 560 /* 561 * Verify that we can reproduce exactly the same compressed data that 562 * we started with. Sets the level, method, windowBits, memLevel, and 563 * strategy fields in the chunk to the encoding parameters needed to 564 * produce the right output. Returns 0 on success. 565 */ 566 int ReconstructDeflateChunk(ImageChunk* chunk) { 567 if (chunk->type != CHUNK_DEFLATE) { 568 printf("attempt to reconstruct non-deflate chunk\n"); 569 return -1; 570 } 571 572 size_t p = 0; 573 unsigned char* out = malloc(BUFFER_SIZE); 574 575 // We only check two combinations of encoder parameters: level 6 576 // (the default) and level 9 (the maximum). 577 for (chunk->level = 6; chunk->level <= 9; chunk->level += 3) { 578 chunk->windowBits = -15; // 32kb window; negative to indicate a raw stream. 579 chunk->memLevel = 8; // the default value. 580 chunk->method = Z_DEFLATED; 581 chunk->strategy = Z_DEFAULT_STRATEGY; 582 583 if (TryReconstruction(chunk, out) == 0) { 584 free(out); 585 return 0; 586 } 587 } 588 589 free(out); 590 return -1; 591 } 592 593 /* 594 * Given source and target chunks, compute a bsdiff patch between them 595 * by running bsdiff in a subprocess. Return the patch data, placing 596 * its length in *size. Return NULL on failure. We expect the bsdiff 597 * program to be in the path. 598 */ 599 unsigned char* MakePatch(ImageChunk* src, ImageChunk* tgt, size_t* size) { 600 if (tgt->type == CHUNK_NORMAL) { 601 if (tgt->len <= 160) { 602 tgt->type = CHUNK_RAW; 603 *size = tgt->len; 604 return tgt->data; 605 } 606 } 607 608 char ptemp[] = "/tmp/imgdiff-patch-XXXXXX"; 609 mkstemp(ptemp); 610 611 int r = bsdiff(src->data, src->len, &(src->I), tgt->data, tgt->len, ptemp); 612 if (r != 0) { 613 printf("bsdiff() failed: %d\n", r); 614 return NULL; 615 } 616 617 struct stat st; 618 if (stat(ptemp, &st) != 0) { 619 printf("failed to stat patch file %s: %s\n", 620 ptemp, strerror(errno)); 621 return NULL; 622 } 623 624 unsigned char* data = malloc(st.st_size); 625 626 if (tgt->type == CHUNK_NORMAL && tgt->len <= st.st_size) { 627 unlink(ptemp); 628 629 tgt->type = CHUNK_RAW; 630 *size = tgt->len; 631 return tgt->data; 632 } 633 634 *size = st.st_size; 635 636 FILE* f = fopen(ptemp, "rb"); 637 if (f == NULL) { 638 printf("failed to open patch %s: %s\n", ptemp, strerror(errno)); 639 return NULL; 640 } 641 if (fread(data, 1, st.st_size, f) != st.st_size) { 642 printf("failed to read patch %s: %s\n", ptemp, strerror(errno)); 643 return NULL; 644 } 645 fclose(f); 646 647 unlink(ptemp); 648 649 tgt->source_start = src->start; 650 switch (tgt->type) { 651 case CHUNK_NORMAL: 652 tgt->source_len = src->len; 653 break; 654 case CHUNK_DEFLATE: 655 tgt->source_len = src->deflate_len; 656 tgt->source_uncompressed_len = src->len; 657 break; 658 } 659 660 return data; 661 } 662 663 /* 664 * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob 665 * of uninterpreted data). The resulting patch will likely be about 666 * as big as the target file, but it lets us handle the case of images 667 * where some gzip chunks are reconstructible but others aren't (by 668 * treating the ones that aren't as normal chunks). 669 */ 670 void ChangeDeflateChunkToNormal(ImageChunk* ch) { 671 if (ch->type != CHUNK_DEFLATE) return; 672 ch->type = CHUNK_NORMAL; 673 free(ch->data); 674 ch->data = ch->deflate_data; 675 ch->len = ch->deflate_len; 676 } 677 678 /* 679 * Return true if the data in the chunk is identical (including the 680 * compressed representation, for gzip chunks). 681 */ 682 int AreChunksEqual(ImageChunk* a, ImageChunk* b) { 683 if (a->type != b->type) return 0; 684 685 switch (a->type) { 686 case CHUNK_NORMAL: 687 return a->len == b->len && memcmp(a->data, b->data, a->len) == 0; 688 689 case CHUNK_DEFLATE: 690 return a->deflate_len == b->deflate_len && 691 memcmp(a->deflate_data, b->deflate_data, a->deflate_len) == 0; 692 693 default: 694 printf("unknown chunk type %d\n", a->type); 695 return 0; 696 } 697 } 698 699 /* 700 * Look for runs of adjacent normal chunks and compress them down into 701 * a single chunk. (Such runs can be produced when deflate chunks are 702 * changed to normal chunks.) 703 */ 704 void MergeAdjacentNormalChunks(ImageChunk* chunks, int* num_chunks) { 705 int out = 0; 706 int in_start = 0, in_end; 707 while (in_start < *num_chunks) { 708 if (chunks[in_start].type != CHUNK_NORMAL) { 709 in_end = in_start+1; 710 } else { 711 // in_start is a normal chunk. Look for a run of normal chunks 712 // that constitute a solid block of data (ie, each chunk begins 713 // where the previous one ended). 714 for (in_end = in_start+1; 715 in_end < *num_chunks && chunks[in_end].type == CHUNK_NORMAL && 716 (chunks[in_end].start == 717 chunks[in_end-1].start + chunks[in_end-1].len && 718 chunks[in_end].data == 719 chunks[in_end-1].data + chunks[in_end-1].len); 720 ++in_end); 721 } 722 723 if (in_end == in_start+1) { 724 #if 0 725 printf("chunk %d is now %d\n", in_start, out); 726 #endif 727 if (out != in_start) { 728 memcpy(chunks+out, chunks+in_start, sizeof(ImageChunk)); 729 } 730 } else { 731 #if 0 732 printf("collapse normal chunks %d-%d into %d\n", in_start, in_end-1, out); 733 #endif 734 735 // Merge chunks [in_start, in_end-1] into one chunk. Since the 736 // data member of each chunk is just a pointer into an in-memory 737 // copy of the file, this can be done without recopying (the 738 // output chunk has the first chunk's start location and data 739 // pointer, and length equal to the sum of the input chunk 740 // lengths). 741 chunks[out].type = CHUNK_NORMAL; 742 chunks[out].start = chunks[in_start].start; 743 chunks[out].data = chunks[in_start].data; 744 chunks[out].len = chunks[in_end-1].len + 745 (chunks[in_end-1].start - chunks[in_start].start); 746 } 747 748 ++out; 749 in_start = in_end; 750 } 751 *num_chunks = out; 752 } 753 754 ImageChunk* FindChunkByName(const char* name, 755 ImageChunk* chunks, int num_chunks) { 756 int i; 757 for (i = 0; i < num_chunks; ++i) { 758 if (chunks[i].type == CHUNK_DEFLATE && chunks[i].filename && 759 strcmp(name, chunks[i].filename) == 0) { 760 return chunks+i; 761 } 762 } 763 return NULL; 764 } 765 766 void DumpChunks(ImageChunk* chunks, int num_chunks) { 767 int i; 768 for (i = 0; i < num_chunks; ++i) { 769 printf("chunk %d: type %d start %d len %d\n", 770 i, chunks[i].type, chunks[i].start, chunks[i].len); 771 } 772 } 773 774 int main(int argc, char** argv) { 775 if (argc != 4 && argc != 5) { 776 usage: 777 printf("usage: %s [-z] <src-img> <tgt-img> <patch-file>\n", 778 argv[0]); 779 return 2; 780 } 781 782 int zip_mode = 0; 783 784 if (strcmp(argv[1], "-z") == 0) { 785 zip_mode = 1; 786 --argc; 787 ++argv; 788 } 789 790 791 int num_src_chunks; 792 ImageChunk* src_chunks; 793 int num_tgt_chunks; 794 ImageChunk* tgt_chunks; 795 int i; 796 797 if (zip_mode) { 798 if (ReadZip(argv[1], &num_src_chunks, &src_chunks, 1) == NULL) { 799 printf("failed to break apart source zip file\n"); 800 return 1; 801 } 802 if (ReadZip(argv[2], &num_tgt_chunks, &tgt_chunks, 0) == NULL) { 803 printf("failed to break apart target zip file\n"); 804 return 1; 805 } 806 } else { 807 if (ReadImage(argv[1], &num_src_chunks, &src_chunks) == NULL) { 808 printf("failed to break apart source image\n"); 809 return 1; 810 } 811 if (ReadImage(argv[2], &num_tgt_chunks, &tgt_chunks) == NULL) { 812 printf("failed to break apart target image\n"); 813 return 1; 814 } 815 816 // Verify that the source and target images have the same chunk 817 // structure (ie, the same sequence of deflate and normal chunks). 818 819 if (!zip_mode) { 820 // Merge the gzip header and footer in with any adjacent 821 // normal chunks. 822 MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks); 823 MergeAdjacentNormalChunks(src_chunks, &num_src_chunks); 824 } 825 826 if (num_src_chunks != num_tgt_chunks) { 827 printf("source and target don't have same number of chunks!\n"); 828 printf("source chunks:\n"); 829 DumpChunks(src_chunks, num_src_chunks); 830 printf("target chunks:\n"); 831 DumpChunks(tgt_chunks, num_tgt_chunks); 832 return 1; 833 } 834 for (i = 0; i < num_src_chunks; ++i) { 835 if (src_chunks[i].type != tgt_chunks[i].type) { 836 printf("source and target don't have same chunk " 837 "structure! (chunk %d)\n", i); 838 printf("source chunks:\n"); 839 DumpChunks(src_chunks, num_src_chunks); 840 printf("target chunks:\n"); 841 DumpChunks(tgt_chunks, num_tgt_chunks); 842 return 1; 843 } 844 } 845 } 846 847 for (i = 0; i < num_tgt_chunks; ++i) { 848 if (tgt_chunks[i].type == CHUNK_DEFLATE) { 849 // Confirm that given the uncompressed chunk data in the target, we 850 // can recompress it and get exactly the same bits as are in the 851 // input target image. If this fails, treat the chunk as a normal 852 // non-deflated chunk. 853 if (ReconstructDeflateChunk(tgt_chunks+i) < 0) { 854 printf("failed to reconstruct target deflate chunk %d [%s]; " 855 "treating as normal\n", i, tgt_chunks[i].filename); 856 ChangeDeflateChunkToNormal(tgt_chunks+i); 857 if (zip_mode) { 858 ImageChunk* src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks); 859 if (src) { 860 ChangeDeflateChunkToNormal(src); 861 } 862 } else { 863 ChangeDeflateChunkToNormal(src_chunks+i); 864 } 865 continue; 866 } 867 868 // If two deflate chunks are identical (eg, the kernel has not 869 // changed between two builds), treat them as normal chunks. 870 // This makes applypatch much faster -- it can apply a trivial 871 // patch to the compressed data, rather than uncompressing and 872 // recompressing to apply the trivial patch to the uncompressed 873 // data. 874 ImageChunk* src; 875 if (zip_mode) { 876 src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks); 877 } else { 878 src = src_chunks+i; 879 } 880 881 if (src == NULL || AreChunksEqual(tgt_chunks+i, src)) { 882 ChangeDeflateChunkToNormal(tgt_chunks+i); 883 if (src) { 884 ChangeDeflateChunkToNormal(src); 885 } 886 } 887 } 888 } 889 890 // Merging neighboring normal chunks. 891 if (zip_mode) { 892 // For zips, we only need to do this to the target: deflated 893 // chunks are matched via filename, and normal chunks are patched 894 // using the entire source file as the source. 895 MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks); 896 } else { 897 // For images, we need to maintain the parallel structure of the 898 // chunk lists, so do the merging in both the source and target 899 // lists. 900 MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks); 901 MergeAdjacentNormalChunks(src_chunks, &num_src_chunks); 902 if (num_src_chunks != num_tgt_chunks) { 903 // This shouldn't happen. 904 printf("merging normal chunks went awry\n"); 905 return 1; 906 } 907 } 908 909 // Compute bsdiff patches for each chunk's data (the uncompressed 910 // data, in the case of deflate chunks). 911 912 printf("Construct patches for %d chunks...\n", num_tgt_chunks); 913 unsigned char** patch_data = malloc(num_tgt_chunks * sizeof(unsigned char*)); 914 size_t* patch_size = malloc(num_tgt_chunks * sizeof(size_t)); 915 for (i = 0; i < num_tgt_chunks; ++i) { 916 if (zip_mode) { 917 ImageChunk* src; 918 if (tgt_chunks[i].type == CHUNK_DEFLATE && 919 (src = FindChunkByName(tgt_chunks[i].filename, src_chunks, 920 num_src_chunks))) { 921 patch_data[i] = MakePatch(src, tgt_chunks+i, patch_size+i); 922 } else { 923 patch_data[i] = MakePatch(src_chunks, tgt_chunks+i, patch_size+i); 924 } 925 } else { 926 patch_data[i] = MakePatch(src_chunks+i, tgt_chunks+i, patch_size+i); 927 } 928 printf("patch %3d is %d bytes (of %d)\n", 929 i, patch_size[i], tgt_chunks[i].source_len); 930 } 931 932 // Figure out how big the imgdiff file header is going to be, so 933 // that we can correctly compute the offset of each bsdiff patch 934 // within the file. 935 936 size_t total_header_size = 12; 937 for (i = 0; i < num_tgt_chunks; ++i) { 938 total_header_size += 4; 939 switch (tgt_chunks[i].type) { 940 case CHUNK_NORMAL: 941 total_header_size += 8*3; 942 break; 943 case CHUNK_DEFLATE: 944 total_header_size += 8*5 + 4*5; 945 break; 946 case CHUNK_RAW: 947 total_header_size += 4 + patch_size[i]; 948 break; 949 } 950 } 951 952 size_t offset = total_header_size; 953 954 FILE* f = fopen(argv[3], "wb"); 955 956 // Write out the headers. 957 958 fwrite("IMGDIFF2", 1, 8, f); 959 Write4(num_tgt_chunks, f); 960 for (i = 0; i < num_tgt_chunks; ++i) { 961 Write4(tgt_chunks[i].type, f); 962 963 switch (tgt_chunks[i].type) { 964 case CHUNK_NORMAL: 965 printf("chunk %3d: normal (%10d, %10d) %10d\n", i, 966 tgt_chunks[i].start, tgt_chunks[i].len, patch_size[i]); 967 Write8(tgt_chunks[i].source_start, f); 968 Write8(tgt_chunks[i].source_len, f); 969 Write8(offset, f); 970 offset += patch_size[i]; 971 break; 972 973 case CHUNK_DEFLATE: 974 printf("chunk %3d: deflate (%10d, %10d) %10d %s\n", i, 975 tgt_chunks[i].start, tgt_chunks[i].deflate_len, patch_size[i], 976 tgt_chunks[i].filename); 977 Write8(tgt_chunks[i].source_start, f); 978 Write8(tgt_chunks[i].source_len, f); 979 Write8(offset, f); 980 Write8(tgt_chunks[i].source_uncompressed_len, f); 981 Write8(tgt_chunks[i].len, f); 982 Write4(tgt_chunks[i].level, f); 983 Write4(tgt_chunks[i].method, f); 984 Write4(tgt_chunks[i].windowBits, f); 985 Write4(tgt_chunks[i].memLevel, f); 986 Write4(tgt_chunks[i].strategy, f); 987 offset += patch_size[i]; 988 break; 989 990 case CHUNK_RAW: 991 printf("chunk %3d: raw (%10d, %10d)\n", i, 992 tgt_chunks[i].start, tgt_chunks[i].len); 993 Write4(patch_size[i], f); 994 fwrite(patch_data[i], 1, patch_size[i], f); 995 break; 996 } 997 } 998 999 // Append each chunk's bsdiff patch, in order. 1000 1001 for (i = 0; i < num_tgt_chunks; ++i) { 1002 if (tgt_chunks[i].type != CHUNK_RAW) { 1003 fwrite(patch_data[i], 1, patch_size[i], f); 1004 } 1005 } 1006 1007 fclose(f); 1008 1009 return 0; 1010 } 1011