Home | History | Annotate | Download | only in applypatch
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * This program constructs binary patches for images -- such as boot.img
     19  * and recovery.img -- that consist primarily of large chunks of gzipped
     20  * data interspersed with uncompressed data.  Doing a naive bsdiff of
     21  * these files is not useful because small changes in the data lead to
     22  * large changes in the compressed bitstream; bsdiff patches of gzipped
     23  * data are typically as large as the data itself.
     24  *
     25  * To patch these usefully, we break the source and target images up into
     26  * chunks of two types: "normal" and "gzip".  Normal chunks are simply
     27  * patched using a plain bsdiff.  Gzip chunks are first expanded, then a
     28  * bsdiff is applied to the uncompressed data, then the patched data is
     29  * gzipped using the same encoder parameters.  Patched chunks are
     30  * concatenated together to create the output file; the output image
     31  * should be *exactly* the same series of bytes as the target image used
     32  * originally to generate the patch.
     33  *
     34  * To work well with this tool, the gzipped sections of the target
     35  * image must have been generated using the same deflate encoder that
     36  * is available in applypatch, namely, the one in the zlib library.
     37  * In practice this means that images should be compressed using the
     38  * "minigzip" tool included in the zlib distribution, not the GNU gzip
     39  * program.
     40  *
     41  * An "imgdiff" patch consists of a header describing the chunk structure
     42  * of the file and any encoding parameters needed for the gzipped
     43  * chunks, followed by N bsdiff patches, one per chunk.
     44  *
     45  * For a diff to be generated, the source and target images must have the
     46  * same "chunk" structure: that is, the same number of gzipped and normal
     47  * chunks in the same order.  Android boot and recovery images currently
     48  * consist of five chunks:  a small normal header, a gzipped kernel, a
     49  * small normal section, a gzipped ramdisk, and finally a small normal
     50  * footer.
     51  *
     52  * Caveats:  we locate gzipped sections within the source and target
     53  * images by searching for the byte sequence 1f8b0800:  1f8b is the gzip
     54  * magic number; 08 specifies the "deflate" encoding [the only encoding
     55  * supported by the gzip standard]; and 00 is the flags byte.  We do not
     56  * currently support any extra header fields (which would be indicated by
     57  * a nonzero flags byte).  We also don't handle the case when that byte
     58  * sequence appears spuriously in the file.  (Note that it would have to
     59  * occur spuriously within a normal chunk to be a problem.)
     60  *
     61  *
     62  * The imgdiff patch header looks like this:
     63  *
     64  *    "IMGDIFF1"                  (8)   [magic number and version]
     65  *    chunk count                 (4)
     66  *    for each chunk:
     67  *        chunk type              (4)   [CHUNK_{NORMAL, GZIP, DEFLATE, RAW}]
     68  *        if chunk type == CHUNK_NORMAL:
     69  *           source start         (8)
     70  *           source len           (8)
     71  *           bsdiff patch offset  (8)   [from start of patch file]
     72  *        if chunk type == CHUNK_GZIP:      (version 1 only)
     73  *           source start         (8)
     74  *           source len           (8)
     75  *           bsdiff patch offset  (8)   [from start of patch file]
     76  *           source expanded len  (8)   [size of uncompressed source]
     77  *           target expected len  (8)   [size of uncompressed target]
     78  *           gzip level           (4)
     79  *                method          (4)
     80  *                windowBits      (4)
     81  *                memLevel        (4)
     82  *                strategy        (4)
     83  *           gzip header len      (4)
     84  *           gzip header          (gzip header len)
     85  *           gzip footer          (8)
     86  *        if chunk type == CHUNK_DEFLATE:   (version 2 only)
     87  *           source start         (8)
     88  *           source len           (8)
     89  *           bsdiff patch offset  (8)   [from start of patch file]
     90  *           source expanded len  (8)   [size of uncompressed source]
     91  *           target expected len  (8)   [size of uncompressed target]
     92  *           gzip level           (4)
     93  *                method          (4)
     94  *                windowBits      (4)
     95  *                memLevel        (4)
     96  *                strategy        (4)
     97  *        if chunk type == RAW:             (version 2 only)
     98  *           target len           (4)
     99  *           data                 (target len)
    100  *
    101  * All integers are little-endian.  "source start" and "source len"
    102  * specify the section of the input image that comprises this chunk,
    103  * including the gzip header and footer for gzip chunks.  "source
    104  * expanded len" is the size of the uncompressed source data.  "target
    105  * expected len" is the size of the uncompressed data after applying
    106  * the bsdiff patch.  The next five parameters specify the zlib
    107  * parameters to be used when compressing the patched data, and the
    108  * next three specify the header and footer to be wrapped around the
    109  * compressed data to create the output chunk (so that header contents
    110  * like the timestamp are recreated exactly).
    111  *
    112  * After the header there are 'chunk count' bsdiff patches; the offset
    113  * of each from the beginning of the file is specified in the header.
    114  *
    115  * This tool can take an optional file of "bonus data".  This is an
    116  * extra file of data that is appended to chunk #1 after it is
    117  * compressed (it must be a CHUNK_DEFLATE chunk).  The same file must
    118  * be available (and passed to applypatch with -b) when applying the
    119  * patch.  This is used to reduce the size of recovery-from-boot
    120  * patches by combining the boot image with recovery ramdisk
    121  * information that is stored on the system partition.
    122  */
    123 
    124 #include <errno.h>
    125 #include <stdio.h>
    126 #include <stdlib.h>
    127 #include <string.h>
    128 #include <sys/stat.h>
    129 #include <unistd.h>
    130 #include <sys/types.h>
    131 
    132 #include "zlib.h"
    133 #include "imgdiff.h"
    134 #include "utils.h"
    135 
    136 typedef struct {
    137   int type;             // CHUNK_NORMAL, CHUNK_DEFLATE
    138   size_t start;         // offset of chunk in original image file
    139 
    140   size_t len;
    141   unsigned char* data;  // data to be patched (uncompressed, for deflate chunks)
    142 
    143   size_t source_start;
    144   size_t source_len;
    145 
    146   off_t* I;             // used by bsdiff
    147 
    148   // --- for CHUNK_DEFLATE chunks only: ---
    149 
    150   // original (compressed) deflate data
    151   size_t deflate_len;
    152   unsigned char* deflate_data;
    153 
    154   char* filename;       // used for zip entries
    155 
    156   // deflate encoder parameters
    157   int level, method, windowBits, memLevel, strategy;
    158 
    159   size_t source_uncompressed_len;
    160 } ImageChunk;
    161 
    162 typedef struct {
    163   int data_offset;
    164   int deflate_len;
    165   int uncomp_len;
    166   char* filename;
    167 } ZipFileEntry;
    168 
    169 static int fileentry_compare(const void* a, const void* b) {
    170   int ao = ((ZipFileEntry*)a)->data_offset;
    171   int bo = ((ZipFileEntry*)b)->data_offset;
    172   if (ao < bo) {
    173     return -1;
    174   } else if (ao > bo) {
    175     return 1;
    176   } else {
    177     return 0;
    178   }
    179 }
    180 
    181 // from bsdiff.c
    182 int bsdiff(u_char* old, off_t oldsize, off_t** IP, u_char* new, off_t newsize,
    183            const char* patch_filename);
    184 
    185 unsigned char* ReadZip(const char* filename,
    186                        int* num_chunks, ImageChunk** chunks,
    187                        int include_pseudo_chunk) {
    188   struct stat st;
    189   if (stat(filename, &st) != 0) {
    190     printf("failed to stat \"%s\": %s\n", filename, strerror(errno));
    191     return NULL;
    192   }
    193 
    194   unsigned char* img = malloc(st.st_size);
    195   FILE* f = fopen(filename, "rb");
    196   if (fread(img, 1, st.st_size, f) != st.st_size) {
    197     printf("failed to read \"%s\" %s\n", filename, strerror(errno));
    198     fclose(f);
    199     return NULL;
    200   }
    201   fclose(f);
    202 
    203   // look for the end-of-central-directory record.
    204 
    205   int i;
    206   for (i = st.st_size-20; i >= 0 && i > st.st_size - 65600; --i) {
    207     if (img[i] == 0x50 && img[i+1] == 0x4b &&
    208         img[i+2] == 0x05 && img[i+3] == 0x06) {
    209       break;
    210     }
    211   }
    212   // double-check: this archive consists of a single "disk"
    213   if (!(img[i+4] == 0 && img[i+5] == 0 && img[i+6] == 0 && img[i+7] == 0)) {
    214     printf("can't process multi-disk archive\n");
    215     return NULL;
    216   }
    217 
    218   int cdcount = Read2(img+i+8);
    219   int cdoffset = Read4(img+i+16);
    220 
    221   ZipFileEntry* temp_entries = malloc(cdcount * sizeof(ZipFileEntry));
    222   int entrycount = 0;
    223 
    224   unsigned char* cd = img+cdoffset;
    225   for (i = 0; i < cdcount; ++i) {
    226     if (!(cd[0] == 0x50 && cd[1] == 0x4b && cd[2] == 0x01 && cd[3] == 0x02)) {
    227       printf("bad central directory entry %d\n", i);
    228       return NULL;
    229     }
    230 
    231     int clen = Read4(cd+20);   // compressed len
    232     int ulen = Read4(cd+24);   // uncompressed len
    233     int nlen = Read2(cd+28);   // filename len
    234     int xlen = Read2(cd+30);   // extra field len
    235     int mlen = Read2(cd+32);   // file comment len
    236     int hoffset = Read4(cd+42);   // local header offset
    237 
    238     char* filename = malloc(nlen+1);
    239     memcpy(filename, cd+46, nlen);
    240     filename[nlen] = '\0';
    241 
    242     int method = Read2(cd+10);
    243 
    244     cd += 46 + nlen + xlen + mlen;
    245 
    246     if (method != 8) {  // 8 == deflate
    247       free(filename);
    248       continue;
    249     }
    250 
    251     unsigned char* lh = img + hoffset;
    252 
    253     if (!(lh[0] == 0x50 && lh[1] == 0x4b && lh[2] == 0x03 && lh[3] == 0x04)) {
    254       printf("bad local file header entry %d\n", i);
    255       return NULL;
    256     }
    257 
    258     if (Read2(lh+26) != nlen || memcmp(lh+30, filename, nlen) != 0) {
    259       printf("central dir filename doesn't match local header\n");
    260       return NULL;
    261     }
    262 
    263     xlen = Read2(lh+28);   // extra field len; might be different from CD entry?
    264 
    265     temp_entries[entrycount].data_offset = hoffset+30+nlen+xlen;
    266     temp_entries[entrycount].deflate_len = clen;
    267     temp_entries[entrycount].uncomp_len = ulen;
    268     temp_entries[entrycount].filename = filename;
    269     ++entrycount;
    270   }
    271 
    272   qsort(temp_entries, entrycount, sizeof(ZipFileEntry), fileentry_compare);
    273 
    274 #if 0
    275   printf("found %d deflated entries\n", entrycount);
    276   for (i = 0; i < entrycount; ++i) {
    277     printf("off %10d  len %10d unlen %10d   %p %s\n",
    278            temp_entries[i].data_offset,
    279            temp_entries[i].deflate_len,
    280            temp_entries[i].uncomp_len,
    281            temp_entries[i].filename,
    282            temp_entries[i].filename);
    283   }
    284 #endif
    285 
    286   *num_chunks = 0;
    287   *chunks = malloc((entrycount*2+2) * sizeof(ImageChunk));
    288   ImageChunk* curr = *chunks;
    289 
    290   if (include_pseudo_chunk) {
    291     curr->type = CHUNK_NORMAL;
    292     curr->start = 0;
    293     curr->len = st.st_size;
    294     curr->data = img;
    295     curr->filename = NULL;
    296     curr->I = NULL;
    297     ++curr;
    298     ++*num_chunks;
    299   }
    300 
    301   int pos = 0;
    302   int nextentry = 0;
    303 
    304   while (pos < st.st_size) {
    305     if (nextentry < entrycount && pos == temp_entries[nextentry].data_offset) {
    306       curr->type = CHUNK_DEFLATE;
    307       curr->start = pos;
    308       curr->deflate_len = temp_entries[nextentry].deflate_len;
    309       curr->deflate_data = img + pos;
    310       curr->filename = temp_entries[nextentry].filename;
    311       curr->I = NULL;
    312 
    313       curr->len = temp_entries[nextentry].uncomp_len;
    314       curr->data = malloc(curr->len);
    315 
    316       z_stream strm;
    317       strm.zalloc = Z_NULL;
    318       strm.zfree = Z_NULL;
    319       strm.opaque = Z_NULL;
    320       strm.avail_in = curr->deflate_len;
    321       strm.next_in = curr->deflate_data;
    322 
    323       // -15 means we are decoding a 'raw' deflate stream; zlib will
    324       // not expect zlib headers.
    325       int ret = inflateInit2(&strm, -15);
    326 
    327       strm.avail_out = curr->len;
    328       strm.next_out = curr->data;
    329       ret = inflate(&strm, Z_NO_FLUSH);
    330       if (ret != Z_STREAM_END) {
    331         printf("failed to inflate \"%s\"; %d\n", curr->filename, ret);
    332         return NULL;
    333       }
    334 
    335       inflateEnd(&strm);
    336 
    337       pos += curr->deflate_len;
    338       ++nextentry;
    339       ++*num_chunks;
    340       ++curr;
    341       continue;
    342     }
    343 
    344     // use a normal chunk to take all the data up to the start of the
    345     // next deflate section.
    346 
    347     curr->type = CHUNK_NORMAL;
    348     curr->start = pos;
    349     if (nextentry < entrycount) {
    350       curr->len = temp_entries[nextentry].data_offset - pos;
    351     } else {
    352       curr->len = st.st_size - pos;
    353     }
    354     curr->data = img + pos;
    355     curr->filename = NULL;
    356     curr->I = NULL;
    357     pos += curr->len;
    358 
    359     ++*num_chunks;
    360     ++curr;
    361   }
    362 
    363   free(temp_entries);
    364   return img;
    365 }
    366 
    367 /*
    368  * Read the given file and break it up into chunks, putting the number
    369  * of chunks and their info in *num_chunks and **chunks,
    370  * respectively.  Returns a malloc'd block of memory containing the
    371  * contents of the file; various pointers in the output chunk array
    372  * will point into this block of memory.  The caller should free the
    373  * return value when done with all the chunks.  Returns NULL on
    374  * failure.
    375  */
    376 unsigned char* ReadImage(const char* filename,
    377                          int* num_chunks, ImageChunk** chunks) {
    378   struct stat st;
    379   if (stat(filename, &st) != 0) {
    380     printf("failed to stat \"%s\": %s\n", filename, strerror(errno));
    381     return NULL;
    382   }
    383 
    384   unsigned char* img = malloc(st.st_size + 4);
    385   FILE* f = fopen(filename, "rb");
    386   if (fread(img, 1, st.st_size, f) != st.st_size) {
    387     printf("failed to read \"%s\" %s\n", filename, strerror(errno));
    388     fclose(f);
    389     return NULL;
    390   }
    391   fclose(f);
    392 
    393   // append 4 zero bytes to the data so we can always search for the
    394   // four-byte string 1f8b0800 starting at any point in the actual
    395   // file data, without special-casing the end of the data.
    396   memset(img+st.st_size, 0, 4);
    397 
    398   size_t pos = 0;
    399 
    400   *num_chunks = 0;
    401   *chunks = NULL;
    402 
    403   while (pos < st.st_size) {
    404     unsigned char* p = img+pos;
    405 
    406     if (st.st_size - pos >= 4 &&
    407         p[0] == 0x1f && p[1] == 0x8b &&
    408         p[2] == 0x08 &&    // deflate compression
    409         p[3] == 0x00) {    // no header flags
    410       // 'pos' is the offset of the start of a gzip chunk.
    411 
    412       *num_chunks += 3;
    413       *chunks = realloc(*chunks, *num_chunks * sizeof(ImageChunk));
    414       ImageChunk* curr = *chunks + (*num_chunks-3);
    415 
    416       // create a normal chunk for the header.
    417       curr->start = pos;
    418       curr->type = CHUNK_NORMAL;
    419       curr->len = GZIP_HEADER_LEN;
    420       curr->data = p;
    421       curr->I = NULL;
    422 
    423       pos += curr->len;
    424       p += curr->len;
    425       ++curr;
    426 
    427       curr->type = CHUNK_DEFLATE;
    428       curr->filename = NULL;
    429       curr->I = NULL;
    430 
    431       // We must decompress this chunk in order to discover where it
    432       // ends, and so we can put the uncompressed data and its length
    433       // into curr->data and curr->len.
    434 
    435       size_t allocated = 32768;
    436       curr->len = 0;
    437       curr->data = malloc(allocated);
    438       curr->start = pos;
    439       curr->deflate_data = p;
    440 
    441       z_stream strm;
    442       strm.zalloc = Z_NULL;
    443       strm.zfree = Z_NULL;
    444       strm.opaque = Z_NULL;
    445       strm.avail_in = st.st_size - pos;
    446       strm.next_in = p;
    447 
    448       // -15 means we are decoding a 'raw' deflate stream; zlib will
    449       // not expect zlib headers.
    450       int ret = inflateInit2(&strm, -15);
    451 
    452       do {
    453         strm.avail_out = allocated - curr->len;
    454         strm.next_out = curr->data + curr->len;
    455         ret = inflate(&strm, Z_NO_FLUSH);
    456         curr->len = allocated - strm.avail_out;
    457         if (strm.avail_out == 0) {
    458           allocated *= 2;
    459           curr->data = realloc(curr->data, allocated);
    460         }
    461       } while (ret != Z_STREAM_END);
    462 
    463       curr->deflate_len = st.st_size - strm.avail_in - pos;
    464       inflateEnd(&strm);
    465       pos += curr->deflate_len;
    466       p += curr->deflate_len;
    467       ++curr;
    468 
    469       // create a normal chunk for the footer
    470 
    471       curr->type = CHUNK_NORMAL;
    472       curr->start = pos;
    473       curr->len = GZIP_FOOTER_LEN;
    474       curr->data = img+pos;
    475       curr->I = NULL;
    476 
    477       pos += curr->len;
    478       p += curr->len;
    479       ++curr;
    480 
    481       // The footer (that we just skipped over) contains the size of
    482       // the uncompressed data.  Double-check to make sure that it
    483       // matches the size of the data we got when we actually did
    484       // the decompression.
    485       size_t footer_size = Read4(p-4);
    486       if (footer_size != curr[-2].len) {
    487         printf("Error: footer size %d != decompressed size %d\n",
    488                 footer_size, curr[-2].len);
    489         free(img);
    490         return NULL;
    491       }
    492     } else {
    493       // Reallocate the list for every chunk; we expect the number of
    494       // chunks to be small (5 for typical boot and recovery images).
    495       ++*num_chunks;
    496       *chunks = realloc(*chunks, *num_chunks * sizeof(ImageChunk));
    497       ImageChunk* curr = *chunks + (*num_chunks-1);
    498       curr->start = pos;
    499       curr->I = NULL;
    500 
    501       // 'pos' is not the offset of the start of a gzip chunk, so scan
    502       // forward until we find a gzip header.
    503       curr->type = CHUNK_NORMAL;
    504       curr->data = p;
    505 
    506       for (curr->len = 0; curr->len < (st.st_size - pos); ++curr->len) {
    507         if (p[curr->len] == 0x1f &&
    508             p[curr->len+1] == 0x8b &&
    509             p[curr->len+2] == 0x08 &&
    510             p[curr->len+3] == 0x00) {
    511           break;
    512         }
    513       }
    514       pos += curr->len;
    515     }
    516   }
    517 
    518   return img;
    519 }
    520 
    521 #define BUFFER_SIZE 32768
    522 
    523 /*
    524  * Takes the uncompressed data stored in the chunk, compresses it
    525  * using the zlib parameters stored in the chunk, and checks that it
    526  * matches exactly the compressed data we started with (also stored in
    527  * the chunk).  Return 0 on success.
    528  */
    529 int TryReconstruction(ImageChunk* chunk, unsigned char* out) {
    530   size_t p = 0;
    531 
    532 #if 0
    533   printf("trying %d %d %d %d %d\n",
    534           chunk->level, chunk->method, chunk->windowBits,
    535           chunk->memLevel, chunk->strategy);
    536 #endif
    537 
    538   z_stream strm;
    539   strm.zalloc = Z_NULL;
    540   strm.zfree = Z_NULL;
    541   strm.opaque = Z_NULL;
    542   strm.avail_in = chunk->len;
    543   strm.next_in = chunk->data;
    544   int ret;
    545   ret = deflateInit2(&strm, chunk->level, chunk->method, chunk->windowBits,
    546                      chunk->memLevel, chunk->strategy);
    547   do {
    548     strm.avail_out = BUFFER_SIZE;
    549     strm.next_out = out;
    550     ret = deflate(&strm, Z_FINISH);
    551     size_t have = BUFFER_SIZE - strm.avail_out;
    552 
    553     if (memcmp(out, chunk->deflate_data+p, have) != 0) {
    554       // mismatch; data isn't the same.
    555       deflateEnd(&strm);
    556       return -1;
    557     }
    558     p += have;
    559   } while (ret != Z_STREAM_END);
    560   deflateEnd(&strm);
    561   if (p != chunk->deflate_len) {
    562     // mismatch; ran out of data before we should have.
    563     return -1;
    564   }
    565   return 0;
    566 }
    567 
    568 /*
    569  * Verify that we can reproduce exactly the same compressed data that
    570  * we started with.  Sets the level, method, windowBits, memLevel, and
    571  * strategy fields in the chunk to the encoding parameters needed to
    572  * produce the right output.  Returns 0 on success.
    573  */
    574 int ReconstructDeflateChunk(ImageChunk* chunk) {
    575   if (chunk->type != CHUNK_DEFLATE) {
    576     printf("attempt to reconstruct non-deflate chunk\n");
    577     return -1;
    578   }
    579 
    580   size_t p = 0;
    581   unsigned char* out = malloc(BUFFER_SIZE);
    582 
    583   // We only check two combinations of encoder parameters:  level 6
    584   // (the default) and level 9 (the maximum).
    585   for (chunk->level = 6; chunk->level <= 9; chunk->level += 3) {
    586     chunk->windowBits = -15;  // 32kb window; negative to indicate a raw stream.
    587     chunk->memLevel = 8;      // the default value.
    588     chunk->method = Z_DEFLATED;
    589     chunk->strategy = Z_DEFAULT_STRATEGY;
    590 
    591     if (TryReconstruction(chunk, out) == 0) {
    592       free(out);
    593       return 0;
    594     }
    595   }
    596 
    597   free(out);
    598   return -1;
    599 }
    600 
    601 /*
    602  * Given source and target chunks, compute a bsdiff patch between them
    603  * by running bsdiff in a subprocess.  Return the patch data, placing
    604  * its length in *size.  Return NULL on failure.  We expect the bsdiff
    605  * program to be in the path.
    606  */
    607 unsigned char* MakePatch(ImageChunk* src, ImageChunk* tgt, size_t* size) {
    608   if (tgt->type == CHUNK_NORMAL) {
    609     if (tgt->len <= 160) {
    610       tgt->type = CHUNK_RAW;
    611       *size = tgt->len;
    612       return tgt->data;
    613     }
    614   }
    615 
    616   char ptemp[] = "/tmp/imgdiff-patch-XXXXXX";
    617   mkstemp(ptemp);
    618 
    619   int r = bsdiff(src->data, src->len, &(src->I), tgt->data, tgt->len, ptemp);
    620   if (r != 0) {
    621     printf("bsdiff() failed: %d\n", r);
    622     return NULL;
    623   }
    624 
    625   struct stat st;
    626   if (stat(ptemp, &st) != 0) {
    627     printf("failed to stat patch file %s: %s\n",
    628             ptemp, strerror(errno));
    629     return NULL;
    630   }
    631 
    632   unsigned char* data = malloc(st.st_size);
    633 
    634   if (tgt->type == CHUNK_NORMAL && tgt->len <= st.st_size) {
    635     unlink(ptemp);
    636 
    637     tgt->type = CHUNK_RAW;
    638     *size = tgt->len;
    639     return tgt->data;
    640   }
    641 
    642   *size = st.st_size;
    643 
    644   FILE* f = fopen(ptemp, "rb");
    645   if (f == NULL) {
    646     printf("failed to open patch %s: %s\n", ptemp, strerror(errno));
    647     return NULL;
    648   }
    649   if (fread(data, 1, st.st_size, f) != st.st_size) {
    650     printf("failed to read patch %s: %s\n", ptemp, strerror(errno));
    651     return NULL;
    652   }
    653   fclose(f);
    654 
    655   unlink(ptemp);
    656 
    657   tgt->source_start = src->start;
    658   switch (tgt->type) {
    659     case CHUNK_NORMAL:
    660       tgt->source_len = src->len;
    661       break;
    662     case CHUNK_DEFLATE:
    663       tgt->source_len = src->deflate_len;
    664       tgt->source_uncompressed_len = src->len;
    665       break;
    666   }
    667 
    668   return data;
    669 }
    670 
    671 /*
    672  * Cause a gzip chunk to be treated as a normal chunk (ie, as a blob
    673  * of uninterpreted data).  The resulting patch will likely be about
    674  * as big as the target file, but it lets us handle the case of images
    675  * where some gzip chunks are reconstructible but others aren't (by
    676  * treating the ones that aren't as normal chunks).
    677  */
    678 void ChangeDeflateChunkToNormal(ImageChunk* ch) {
    679   if (ch->type != CHUNK_DEFLATE) return;
    680   ch->type = CHUNK_NORMAL;
    681   free(ch->data);
    682   ch->data = ch->deflate_data;
    683   ch->len = ch->deflate_len;
    684 }
    685 
    686 /*
    687  * Return true if the data in the chunk is identical (including the
    688  * compressed representation, for gzip chunks).
    689  */
    690 int AreChunksEqual(ImageChunk* a, ImageChunk* b) {
    691     if (a->type != b->type) return 0;
    692 
    693     switch (a->type) {
    694         case CHUNK_NORMAL:
    695             return a->len == b->len && memcmp(a->data, b->data, a->len) == 0;
    696 
    697         case CHUNK_DEFLATE:
    698             return a->deflate_len == b->deflate_len &&
    699                 memcmp(a->deflate_data, b->deflate_data, a->deflate_len) == 0;
    700 
    701         default:
    702             printf("unknown chunk type %d\n", a->type);
    703             return 0;
    704     }
    705 }
    706 
    707 /*
    708  * Look for runs of adjacent normal chunks and compress them down into
    709  * a single chunk.  (Such runs can be produced when deflate chunks are
    710  * changed to normal chunks.)
    711  */
    712 void MergeAdjacentNormalChunks(ImageChunk* chunks, int* num_chunks) {
    713   int out = 0;
    714   int in_start = 0, in_end;
    715   while (in_start < *num_chunks) {
    716     if (chunks[in_start].type != CHUNK_NORMAL) {
    717       in_end = in_start+1;
    718     } else {
    719       // in_start is a normal chunk.  Look for a run of normal chunks
    720       // that constitute a solid block of data (ie, each chunk begins
    721       // where the previous one ended).
    722       for (in_end = in_start+1;
    723            in_end < *num_chunks && chunks[in_end].type == CHUNK_NORMAL &&
    724              (chunks[in_end].start ==
    725               chunks[in_end-1].start + chunks[in_end-1].len &&
    726               chunks[in_end].data ==
    727               chunks[in_end-1].data + chunks[in_end-1].len);
    728            ++in_end);
    729     }
    730 
    731     if (in_end == in_start+1) {
    732 #if 0
    733       printf("chunk %d is now %d\n", in_start, out);
    734 #endif
    735       if (out != in_start) {
    736         memcpy(chunks+out, chunks+in_start, sizeof(ImageChunk));
    737       }
    738     } else {
    739 #if 0
    740       printf("collapse normal chunks %d-%d into %d\n", in_start, in_end-1, out);
    741 #endif
    742 
    743       // Merge chunks [in_start, in_end-1] into one chunk.  Since the
    744       // data member of each chunk is just a pointer into an in-memory
    745       // copy of the file, this can be done without recopying (the
    746       // output chunk has the first chunk's start location and data
    747       // pointer, and length equal to the sum of the input chunk
    748       // lengths).
    749       chunks[out].type = CHUNK_NORMAL;
    750       chunks[out].start = chunks[in_start].start;
    751       chunks[out].data = chunks[in_start].data;
    752       chunks[out].len = chunks[in_end-1].len +
    753         (chunks[in_end-1].start - chunks[in_start].start);
    754     }
    755 
    756     ++out;
    757     in_start = in_end;
    758   }
    759   *num_chunks = out;
    760 }
    761 
    762 ImageChunk* FindChunkByName(const char* name,
    763                             ImageChunk* chunks, int num_chunks) {
    764   int i;
    765   for (i = 0; i < num_chunks; ++i) {
    766     if (chunks[i].type == CHUNK_DEFLATE && chunks[i].filename &&
    767         strcmp(name, chunks[i].filename) == 0) {
    768       return chunks+i;
    769     }
    770   }
    771   return NULL;
    772 }
    773 
    774 void DumpChunks(ImageChunk* chunks, int num_chunks) {
    775     int i;
    776     for (i = 0; i < num_chunks; ++i) {
    777         printf("chunk %d: type %d start %d len %d\n",
    778                i, chunks[i].type, chunks[i].start, chunks[i].len);
    779     }
    780 }
    781 
    782 int main(int argc, char** argv) {
    783   int zip_mode = 0;
    784 
    785   if (argc >= 2 && strcmp(argv[1], "-z") == 0) {
    786     zip_mode = 1;
    787     --argc;
    788     ++argv;
    789   }
    790 
    791   size_t bonus_size = 0;
    792   unsigned char* bonus_data = NULL;
    793   if (argc >= 3 && strcmp(argv[1], "-b") == 0) {
    794     struct stat st;
    795     if (stat(argv[2], &st) != 0) {
    796       printf("failed to stat bonus file %s: %s\n", argv[2], strerror(errno));
    797       return 1;
    798     }
    799     bonus_size = st.st_size;
    800     bonus_data = malloc(bonus_size);
    801     FILE* f = fopen(argv[2], "rb");
    802     if (f == NULL) {
    803       printf("failed to open bonus file %s: %s\n", argv[2], strerror(errno));
    804       return 1;
    805     }
    806     if (fread(bonus_data, 1, bonus_size, f) != bonus_size) {
    807       printf("failed to read bonus file %s: %s\n", argv[2], strerror(errno));
    808       return 1;
    809     }
    810     fclose(f);
    811 
    812     argc -= 2;
    813     argv += 2;
    814   }
    815 
    816   if (argc != 4) {
    817     usage:
    818     printf("usage: %s [-z] [-b <bonus-file>] <src-img> <tgt-img> <patch-file>\n",
    819             argv[0]);
    820     return 2;
    821   }
    822 
    823   int num_src_chunks;
    824   ImageChunk* src_chunks;
    825   int num_tgt_chunks;
    826   ImageChunk* tgt_chunks;
    827   int i;
    828 
    829   if (zip_mode) {
    830     if (ReadZip(argv[1], &num_src_chunks, &src_chunks, 1) == NULL) {
    831       printf("failed to break apart source zip file\n");
    832       return 1;
    833     }
    834     if (ReadZip(argv[2], &num_tgt_chunks, &tgt_chunks, 0) == NULL) {
    835       printf("failed to break apart target zip file\n");
    836       return 1;
    837     }
    838   } else {
    839     if (ReadImage(argv[1], &num_src_chunks, &src_chunks) == NULL) {
    840       printf("failed to break apart source image\n");
    841       return 1;
    842     }
    843     if (ReadImage(argv[2], &num_tgt_chunks, &tgt_chunks) == NULL) {
    844       printf("failed to break apart target image\n");
    845       return 1;
    846     }
    847 
    848     // Verify that the source and target images have the same chunk
    849     // structure (ie, the same sequence of deflate and normal chunks).
    850 
    851     if (!zip_mode) {
    852         // Merge the gzip header and footer in with any adjacent
    853         // normal chunks.
    854         MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
    855         MergeAdjacentNormalChunks(src_chunks, &num_src_chunks);
    856     }
    857 
    858     if (num_src_chunks != num_tgt_chunks) {
    859       printf("source and target don't have same number of chunks!\n");
    860       printf("source chunks:\n");
    861       DumpChunks(src_chunks, num_src_chunks);
    862       printf("target chunks:\n");
    863       DumpChunks(tgt_chunks, num_tgt_chunks);
    864       return 1;
    865     }
    866     for (i = 0; i < num_src_chunks; ++i) {
    867       if (src_chunks[i].type != tgt_chunks[i].type) {
    868         printf("source and target don't have same chunk "
    869                 "structure! (chunk %d)\n", i);
    870         printf("source chunks:\n");
    871         DumpChunks(src_chunks, num_src_chunks);
    872         printf("target chunks:\n");
    873         DumpChunks(tgt_chunks, num_tgt_chunks);
    874         return 1;
    875       }
    876     }
    877   }
    878 
    879   for (i = 0; i < num_tgt_chunks; ++i) {
    880     if (tgt_chunks[i].type == CHUNK_DEFLATE) {
    881       // Confirm that given the uncompressed chunk data in the target, we
    882       // can recompress it and get exactly the same bits as are in the
    883       // input target image.  If this fails, treat the chunk as a normal
    884       // non-deflated chunk.
    885       if (ReconstructDeflateChunk(tgt_chunks+i) < 0) {
    886         printf("failed to reconstruct target deflate chunk %d [%s]; "
    887                "treating as normal\n", i, tgt_chunks[i].filename);
    888         ChangeDeflateChunkToNormal(tgt_chunks+i);
    889         if (zip_mode) {
    890           ImageChunk* src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks);
    891           if (src) {
    892             ChangeDeflateChunkToNormal(src);
    893           }
    894         } else {
    895           ChangeDeflateChunkToNormal(src_chunks+i);
    896         }
    897         continue;
    898       }
    899 
    900       // If two deflate chunks are identical (eg, the kernel has not
    901       // changed between two builds), treat them as normal chunks.
    902       // This makes applypatch much faster -- it can apply a trivial
    903       // patch to the compressed data, rather than uncompressing and
    904       // recompressing to apply the trivial patch to the uncompressed
    905       // data.
    906       ImageChunk* src;
    907       if (zip_mode) {
    908         src = FindChunkByName(tgt_chunks[i].filename, src_chunks, num_src_chunks);
    909       } else {
    910         src = src_chunks+i;
    911       }
    912 
    913       if (src == NULL || AreChunksEqual(tgt_chunks+i, src)) {
    914         ChangeDeflateChunkToNormal(tgt_chunks+i);
    915         if (src) {
    916           ChangeDeflateChunkToNormal(src);
    917         }
    918       }
    919     }
    920   }
    921 
    922   // Merging neighboring normal chunks.
    923   if (zip_mode) {
    924     // For zips, we only need to do this to the target:  deflated
    925     // chunks are matched via filename, and normal chunks are patched
    926     // using the entire source file as the source.
    927     MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
    928   } else {
    929     // For images, we need to maintain the parallel structure of the
    930     // chunk lists, so do the merging in both the source and target
    931     // lists.
    932     MergeAdjacentNormalChunks(tgt_chunks, &num_tgt_chunks);
    933     MergeAdjacentNormalChunks(src_chunks, &num_src_chunks);
    934     if (num_src_chunks != num_tgt_chunks) {
    935       // This shouldn't happen.
    936       printf("merging normal chunks went awry\n");
    937       return 1;
    938     }
    939   }
    940 
    941   // Compute bsdiff patches for each chunk's data (the uncompressed
    942   // data, in the case of deflate chunks).
    943 
    944   DumpChunks(src_chunks, num_src_chunks);
    945 
    946   printf("Construct patches for %d chunks...\n", num_tgt_chunks);
    947   unsigned char** patch_data = malloc(num_tgt_chunks * sizeof(unsigned char*));
    948   size_t* patch_size = malloc(num_tgt_chunks * sizeof(size_t));
    949   for (i = 0; i < num_tgt_chunks; ++i) {
    950     if (zip_mode) {
    951       ImageChunk* src;
    952       if (tgt_chunks[i].type == CHUNK_DEFLATE &&
    953           (src = FindChunkByName(tgt_chunks[i].filename, src_chunks,
    954                                  num_src_chunks))) {
    955         patch_data[i] = MakePatch(src, tgt_chunks+i, patch_size+i);
    956       } else {
    957         patch_data[i] = MakePatch(src_chunks, tgt_chunks+i, patch_size+i);
    958       }
    959     } else {
    960       if (i == 1 && bonus_data) {
    961         printf("  using %d bytes of bonus data for chunk %d\n", bonus_size, i);
    962         src_chunks[i].data = realloc(src_chunks[i].data, src_chunks[i].len + bonus_size);
    963         memcpy(src_chunks[i].data+src_chunks[i].len, bonus_data, bonus_size);
    964         src_chunks[i].len += bonus_size;
    965      }
    966 
    967       patch_data[i] = MakePatch(src_chunks+i, tgt_chunks+i, patch_size+i);
    968     }
    969     printf("patch %3d is %d bytes (of %d)\n",
    970            i, patch_size[i], tgt_chunks[i].source_len);
    971   }
    972 
    973   // Figure out how big the imgdiff file header is going to be, so
    974   // that we can correctly compute the offset of each bsdiff patch
    975   // within the file.
    976 
    977   size_t total_header_size = 12;
    978   for (i = 0; i < num_tgt_chunks; ++i) {
    979     total_header_size += 4;
    980     switch (tgt_chunks[i].type) {
    981       case CHUNK_NORMAL:
    982         total_header_size += 8*3;
    983         break;
    984       case CHUNK_DEFLATE:
    985         total_header_size += 8*5 + 4*5;
    986         break;
    987       case CHUNK_RAW:
    988         total_header_size += 4 + patch_size[i];
    989         break;
    990     }
    991   }
    992 
    993   size_t offset = total_header_size;
    994 
    995   FILE* f = fopen(argv[3], "wb");
    996 
    997   // Write out the headers.
    998 
    999   fwrite("IMGDIFF2", 1, 8, f);
   1000   Write4(num_tgt_chunks, f);
   1001   for (i = 0; i < num_tgt_chunks; ++i) {
   1002     Write4(tgt_chunks[i].type, f);
   1003 
   1004     switch (tgt_chunks[i].type) {
   1005       case CHUNK_NORMAL:
   1006         printf("chunk %3d: normal   (%10d, %10d)  %10d\n", i,
   1007                tgt_chunks[i].start, tgt_chunks[i].len, patch_size[i]);
   1008         Write8(tgt_chunks[i].source_start, f);
   1009         Write8(tgt_chunks[i].source_len, f);
   1010         Write8(offset, f);
   1011         offset += patch_size[i];
   1012         break;
   1013 
   1014       case CHUNK_DEFLATE:
   1015         printf("chunk %3d: deflate  (%10d, %10d)  %10d  %s\n", i,
   1016                tgt_chunks[i].start, tgt_chunks[i].deflate_len, patch_size[i],
   1017                tgt_chunks[i].filename);
   1018         Write8(tgt_chunks[i].source_start, f);
   1019         Write8(tgt_chunks[i].source_len, f);
   1020         Write8(offset, f);
   1021         Write8(tgt_chunks[i].source_uncompressed_len, f);
   1022         Write8(tgt_chunks[i].len, f);
   1023         Write4(tgt_chunks[i].level, f);
   1024         Write4(tgt_chunks[i].method, f);
   1025         Write4(tgt_chunks[i].windowBits, f);
   1026         Write4(tgt_chunks[i].memLevel, f);
   1027         Write4(tgt_chunks[i].strategy, f);
   1028         offset += patch_size[i];
   1029         break;
   1030 
   1031       case CHUNK_RAW:
   1032         printf("chunk %3d: raw      (%10d, %10d)\n", i,
   1033                tgt_chunks[i].start, tgt_chunks[i].len);
   1034         Write4(patch_size[i], f);
   1035         fwrite(patch_data[i], 1, patch_size[i], f);
   1036         break;
   1037     }
   1038   }
   1039 
   1040   // Append each chunk's bsdiff patch, in order.
   1041 
   1042   for (i = 0; i < num_tgt_chunks; ++i) {
   1043     if (tgt_chunks[i].type != CHUNK_RAW) {
   1044       fwrite(patch_data[i], 1, patch_size[i], f);
   1045     }
   1046   }
   1047 
   1048   fclose(f);
   1049 
   1050   return 0;
   1051 }
   1052