Home | History | Annotate | Download | only in bsdiff
      1 /*-
      2  * Copyright 2003-2005 Colin Percival
      3  * All rights reserved
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted providing that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     24  * POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #if 0
     28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
     29 #endif
     30 
     31 #include <sys/types.h>
     32 
     33 #include <bzlib.h>
     34 #include <err.h>
     35 #include <fcntl.h>
     36 #include <lzma.h>
     37 #include <stdlib.h>
     38 #include <stdio.h>
     39 #include <string.h>
     40 #include <unistd.h>
     41 #include <zlib.h>
     42 
     43 #if defined(__APPLE__)
     44 #include <libkern/OSByteOrder.h>
     45 #define le64toh(x) OSSwapLittleToHostInt64(x)
     46 #elif defined(__linux__)
     47 #include <endian.h>
     48 #elif defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
     49 #define le64toh(x) (x)
     50 #else
     51 #error Provide le64toh for this platform
     52 #endif
     53 
     54 #include "chrome/installer/mac/third_party/bsdiff/sha1_adapter.h"
     55 
     56 static inline off_t offtin(u_char *buf)
     57 {
     58 	return le64toh(*((off_t*)buf));
     59 }
     60 
     61 static void sha1tostr(const u_char *sha1, char *sha1str)
     62 {
     63 	int i;
     64 	for (i = 0; i < SHA1_DIGEST_LENGTH; ++i)
     65 		sprintf(&sha1str[i * 2], "%02x", sha1[i]);
     66 }
     67 
     68 /* xzfile is a provisional stdio-like interface to xz/lzma2-compressed data.
     69  * liblzma does not currently include this functionality. The interface is
     70  * read-only and only supports sequential access. */
     71 
     72 typedef struct {
     73 	/* in and out are the underlying buffers to be used with lzma_stream. */
     74 	u_char in[BUFSIZ];
     75 	u_char out[BUFSIZ];
     76 
     77 	lzma_stream ls;
     78 	FILE *f;
     79 
     80 	/* read_out points to the first byte in out not yet consumed by an
     81 	 * xzread call. read_out_len tracks the amount of data available in
     82 	 * out beginning at read_out. */
     83 	u_char *read_out;
     84 	size_t read_out_len;
     85 
     86 	/* Error and end-of-file indicators. */
     87 	lzma_ret err;
     88 	int eof;
     89 } xzfile;
     90 
     91 /* Initializes and returns a new xzfile pointer that will read from f. On
     92  * failure, returns NULL. If err is non-NULL, it will be set to indicate any
     93  * error that may have occurred. */
     94 static xzfile *xzdopen(FILE *f, lzma_ret *err)
     95 {
     96 	xzfile *xzf;
     97 	lzma_stream ls = LZMA_STREAM_INIT;
     98 	uint64_t physmem, memlimit;
     99 
    100 	if (!(xzf = malloc(sizeof(xzfile)))) {
    101 		if (err) *err = LZMA_MEM_ERROR;
    102 		return NULL;
    103 	}
    104 
    105 	xzf->ls = ls;
    106 	xzf->f = f;
    107 
    108 	xzf->read_out = xzf->out;
    109 	xzf->read_out_len = 0;
    110 
    111 	xzf->err = LZMA_OK;
    112 	xzf->eof = 0;
    113 
    114 	/* Use the same memory limits used by xzdec and xz. Use 40% of
    115 	 * physical memory if 80MB or more, otherwise use 80% of physical
    116 	 * memory if 80MB or less, otherwise use 80MB. If physical memory
    117 	 * can't be determined, use 128MB. These limits should be sufficient
    118 	 * for any decompression on any general-purpose system. */
    119 	physmem = lzma_physmem();
    120 	if (physmem == 0)
    121 		physmem = 128 * 1024 * 1024;
    122 	memlimit = 40 * physmem / 100;
    123 	if (memlimit < 80 * 1024 * 1024) {
    124 		memlimit = 80 * physmem / 100;
    125 		if (memlimit > 80 * 1024 * 1024)
    126 			memlimit = 80 * 1024 * 1024;
    127 	}
    128 
    129 	xzf->err = lzma_stream_decoder(&xzf->ls, memlimit,
    130 	                               LZMA_TELL_NO_CHECK |
    131 	                                   LZMA_TELL_UNSUPPORTED_CHECK);
    132 	if (xzf->err != LZMA_OK) {
    133 		if (err) *err = xzf->err;
    134 		free(xzf);
    135 		return NULL;
    136 	}
    137 
    138 	if (err) *err = xzf->err;
    139 	return xzf;
    140 }
    141 
    142 /* Closes an xzfile opened by xzopen, freeing all memory and closing all
    143  * files. Returns LZMA_OK normally, or LZMA_STREAM_END if fclose fails. */
    144 static lzma_ret xzclose(xzfile *xzf)
    145 {
    146 	lzma_ret lzma_err = LZMA_OK;
    147 
    148 	lzma_end(&xzf->ls);
    149 	if (fclose(xzf->f) != 0)
    150 		lzma_err = LZMA_STREAM_END;
    151 	free(xzf);
    152 
    153 	return lzma_err;
    154 }
    155 
    156 /* Reads len uncompressed bytes from xzf into buf. Returns the number of bytes
    157  * read, which may be less than len at the end of the file. Upon error, if
    158  * err is non-NULL, it will be set to an appropriate value, which will either
    159  * be a return value from lzma_code (with the exception of LZMA_STREAM_END,
    160  * which is remapped to LZMA_OK), or LZMA_STREAM_END to indicate an I/O error.
    161  */
    162 static size_t xzread(xzfile *xzf, u_char *buf, size_t len, lzma_ret *err)
    163 {
    164 	lzma_action action = LZMA_RUN;
    165 	size_t copylen;
    166 	size_t nread = 0;
    167 
    168 	while (xzf->err == LZMA_OK && len > 0) {
    169 		if (xzf->read_out_len == 0) {
    170 			/* No unconsumed data is available, need to run
    171 			 * lzma_code to decompress. */
    172 			if (xzf->ls.avail_in == 0 && !xzf->eof) {
    173 				/* No input data available, need to read. */
    174 				xzf->ls.next_in = xzf->in;
    175 				xzf->ls.avail_in = fread(xzf->in, 1, BUFSIZ,
    176 				                         xzf->f);
    177 				if (ferror(xzf->f)) {
    178 					/* Map I/O errors to LZMA_STREAM_END. */
    179 					xzf->err = LZMA_STREAM_END;
    180 					if (err) *err = xzf->err;
    181 					return 0;
    182 				} else if (feof(xzf->f)) {
    183 					xzf->eof = 1;
    184 				}
    185 			}
    186 
    187 			/* Use the full output buffer. */
    188 			xzf->ls.next_out = xzf->out;
    189 			xzf->ls.avail_out = BUFSIZ;
    190 
    191 			/* There must be something to decode. */
    192 			if (xzf->ls.avail_in == 0) {
    193 				xzf->err = LZMA_BUF_ERROR;
    194 				if (err) *err = xzf->err;
    195 				return 0;
    196 			}
    197 
    198 			/* LZMA_FINISH is not critical because
    199 			 * LZMA_CONCATENATED is not in use. */
    200 			if (xzf->eof)
    201 				action = LZMA_FINISH;
    202 
    203 			/* Run the decoder. */
    204 			xzf->err = lzma_code(&xzf->ls, action);
    205 			if (xzf->err == LZMA_STREAM_END) {
    206 				xzf->eof = 1;
    207 				xzf->err = LZMA_OK;
    208 			} else if (xzf->err != LZMA_OK) {
    209 				if (err) *err = xzf->err;
    210 				return 0;
    211 			}
    212 
    213 			/* Everything that was decoded is now available for
    214 			 * reading into buf. */
    215 			xzf->read_out = xzf->out;
    216 			xzf->read_out_len = BUFSIZ - xzf->ls.avail_out;
    217 		}
    218 
    219 		/* Copy everything available up to len, and push some
    220 		 * pointers. */
    221 		copylen = xzf->read_out_len;
    222 		if (copylen > len)
    223 			copylen = len;
    224 		memcpy(buf, xzf->read_out, copylen);
    225 		nread += copylen;
    226 		buf += copylen;
    227 		len -= copylen;
    228 		xzf->read_out += copylen;
    229 		xzf->read_out_len -= copylen;
    230 	}
    231 
    232 	if (err) *err = xzf->err;
    233 	return nread;
    234 }
    235 
    236 /* cfile is a uniform interface to read from maybe-compressed files. */
    237 
    238 typedef struct {
    239 	FILE *f;              /* method = 1, 2 */
    240 	union {
    241 		BZFILE *bz2;  /* method = 2 */
    242 		gzFile gz;    /* method = 3 */
    243 		xzfile *xz;   /* method = 4 */
    244 	} u;
    245 	const char *tag;
    246 	unsigned char method;
    247 } cfile;
    248 
    249 /* Opens a file at path, seeks to offset off, and prepares for reading using
    250  * the specified method. Supported methods are plain uncompressed (1), bzip2
    251  * (2), gzip (3), and xz/lzma2 (4). tag is used as an identifier for error
    252  * reporting. */
    253 static void cfopen(cfile *cf, const char *path, off_t off,
    254                    const char *tag, unsigned char method)
    255 {
    256 	int fd;
    257 	int bz2_err, gz_err;
    258 	lzma_ret lzma_err;
    259 
    260 	if (method == 1 || method == 2 || method == 4) {
    261 		/* Use stdio for uncompressed files. The bzip interface also
    262 		 * sits on top of a stdio FILE* but does not take "ownership"
    263 		 * of the FILE*. The xz/lzma2 interface sits on top of a FILE*
    264 		 * and does take ownership of the FILE*. */
    265 		if ((cf->f = fopen(path, "rb")) == NULL)
    266 			err(1, "fdopen(%s)", tag);
    267 		if ((fseeko(cf->f, off, SEEK_SET)) != 0)
    268 			err(1, "fseeko(%s, %lld)", tag, off);
    269 		if (method == 2) {
    270 			if ((cf->u.bz2 = BZ2_bzReadOpen(&bz2_err, cf->f, 0, 0,
    271 			                                NULL, 0)) == NULL)
    272 				errx(1, "BZ2_bzReadOpen(%s): %d", tag, bz2_err);
    273 		} else if (method == 4) {
    274 			if ((cf->u.xz = xzdopen(cf->f, &lzma_err)) == NULL)
    275 				errx(1, "xzdopen(%s): %d", tag, lzma_err);
    276 			/* cf->f belongs to the xzfile now, don't access it
    277 			 * from here. */
    278 			cf->f = NULL;
    279 		}
    280 	} else if (method == 3) {
    281 		if ((fd = open(path, O_RDONLY)) < 0)
    282 			err(1, "open(%s)", tag);
    283 		if (lseek(fd, off, SEEK_SET) != off)
    284 			err(1, "lseek(%s, %lld)", tag, off);
    285 		if ((cf->u.gz = gzdopen(fd, "rb")) == NULL)
    286 			errx(1, "gzdopen(%s)", tag);
    287 	} else {
    288 		errx(1, "cfopen(%s): unknown method %d", tag, method);
    289 	}
    290 
    291 	cf->tag = tag;
    292 	cf->method = method;
    293 }
    294 
    295 static void cfclose(cfile *cf)
    296 {
    297 	int bz2_err, gz_err;
    298 	lzma_ret lzma_err;
    299 
    300 	if (cf->method == 1 || cf->method == 2) {
    301 		if (cf->method == 2) {
    302 			bz2_err = BZ_OK;
    303 			BZ2_bzReadClose(&bz2_err, cf->u.bz2);
    304 			if (bz2_err != BZ_OK)
    305 				errx(1, "BZ2_bzReadClose(%s): %d\n",
    306 				     cf->tag, bz2_err);
    307 		}
    308 		if (fclose(cf->f) != 0)
    309 			err(1, "fclose(%s)", cf->tag);
    310 	} else if (cf->method == 3) {
    311 		if ((gz_err = gzclose(cf->u.gz)) != Z_OK)
    312 			errx(1, "gzclose(%s): %d", cf->tag, gz_err);
    313 	} else if (cf->method == 4) {
    314 		if ((lzma_err = xzclose(cf->u.xz)) != LZMA_OK)
    315 			errx(1, "xzclose(%s): %d", cf->tag, lzma_err);
    316 	} else {
    317 		errx(1, "cfclose(%s): unknown method %d", cf->tag, cf->method);
    318 	}
    319 }
    320 
    321 static void cfread(cfile *cf, u_char *buf, size_t len)
    322 {
    323 	size_t nread;
    324 	int bz2_err, gz_err;
    325 	lzma_ret lzma_err;
    326 
    327 	if (cf->method == 1) {
    328 		if ((nread = fread(buf, 1, len, cf->f)) != len) {
    329 			if (!ferror(cf->f))
    330 				errx(1, "fread(%s, %zd): short read %zd",
    331 				     cf->tag, len, nread);
    332 			err(1, "fread(%s, %zd)", cf->tag, len);
    333 		}
    334 	} else if (cf->method == 2) {
    335 		bz2_err = BZ_OK;
    336 		if ((nread = BZ2_bzRead(&bz2_err, cf->u.bz2, buf, len)) !=
    337 		    len) {
    338 			if (bz2_err == BZ_OK)
    339 				errx(1, "BZ2_bzRead(%s, %zd): short read %zd",
    340 				     cf->tag, len, nread);
    341 			errx(1, "BZ2_bzRead(%s, %zd): %d",
    342 			     cf->tag, len, bz2_err);
    343 		}
    344 	} else if (cf->method == 3) {
    345 		if ((nread = gzread(cf->u.gz, buf, len)) != len) {
    346 			gz_err = Z_OK;
    347 			gzerror(cf->u.gz, &gz_err);
    348 			if (gz_err == Z_OK)
    349 				errx(1, "gzread(%s, %zd): short read %zd",
    350 				     cf->tag, len, nread);
    351 			errx(1, "gzread(%s, %zd): %d", cf->tag, len, gz_err);
    352 		}
    353 	} else if (cf->method == 4) {
    354 		if ((nread = xzread(cf->u.xz, buf, len, &lzma_err)) != len) {
    355 			if (lzma_err == LZMA_OK)
    356 				errx(1, "xzread(%s, %zd): short read %zd",
    357 				     cf->tag, len, nread);
    358 			errx(1, "xzread(%s, %zd): %d", cf->tag, len, lzma_err);
    359 		}
    360 	} else {
    361 		errx(1, "cfread(%s, %zd): unknown method %d",
    362 		     cf->tag, len, cf->method);
    363 	}
    364 }
    365 
    366 int main(int argc,char * argv[])
    367 {
    368 	FILE * f;
    369 	cfile cf, df, ef;
    370 	int fd;
    371 	off_t expect_oldsize, oldsize, newsize, patchsize;
    372 	off_t zctrllen, zdatalen, zextralen;
    373 	u_char header[96], buf[8];
    374 	u_char *old, *new;
    375 	off_t oldpos,newpos;
    376 	off_t ctrl[3];
    377 	off_t i;
    378 	u_char sha1[SHA1_DIGEST_LENGTH];
    379 	char sha1str[SHA1_DIGEST_LENGTH * 2 + 1];
    380 	char expected_sha1str[SHA1_DIGEST_LENGTH * 2 + 1];
    381 
    382 	if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile",argv[0]);
    383 
    384 	/* Open patch file */
    385 	if ((f = fopen(argv[3], "rb")) == NULL)
    386 		err(1, "fopen(%s)", argv[3]);
    387 
    388 	/*
    389 	File format:
    390 		0	8	"BSDIFF4G"
    391 		8	8	length of compressed control block (x)
    392 		16	8	length of compressed diff block (y)
    393 		24	8	length of compressed extra block (z)
    394 		32	8	length of old file
    395 		40	8	length of new file
    396 		48	20	SHA1 of old file
    397 		68	20	SHA1 of new file
    398 		88	1	encoding of control block
    399 		89	1	encoding of diff block
    400 		90	1	encoding of extra block
    401 		91	5	unused
    402 		96	x	compressed control block
    403 		96+x	y	compressed diff block
    404 		96+x+y	z	compressed extra block
    405 	Encodings are 1 (uncompressed), 2 (bzip2), 3 (gzip), and 4 (xz/lzma2).
    406 	The control block is a set of triples (x,y,z) meaning "add x bytes
    407 	from oldfile to x bytes from the diff block; copy y bytes from the
    408 	extra block; seek forwards in oldfile by z bytes".
    409 	*/
    410 
    411 	/* Read header */
    412 	if (fread(header, 1, sizeof(header), f) < sizeof(header)) {
    413 		if (feof(f))
    414 			errx(1, "corrupt patch (header size)");
    415 		err(1, "fread(%s)", argv[3]);
    416 	}
    417 
    418 	/* Check for appropriate magic */
    419 	if (memcmp(header, "BSDIFF4G", 8) != 0)
    420 		errx(1, "corrupt patch (magic)");
    421 
    422 	/* Read lengths from header */
    423 	zctrllen = offtin(header + 8);
    424 	zdatalen = offtin(header + 16);
    425 	zextralen = offtin(header + 24);
    426 	expect_oldsize = offtin(header + 32);
    427 	newsize = offtin(header + 40);
    428 	if (zctrllen < 0 || zdatalen < 0 || zextralen < 0)
    429 		errx(1, "corrupt patch (stream sizes)");
    430 	if (expect_oldsize < 0 || newsize < 0)
    431 		errx(1, "corrupt patch (file sizes)");
    432 
    433 	if (fseeko(f, 0, SEEK_END) != 0 || (patchsize = ftello(f)) < 0)
    434 		err(1, "fseeko/ftello(%s)", argv[3]);
    435 	if (patchsize != sizeof(header) + zctrllen + zdatalen + zextralen)
    436 		errx(1, "corrupt patch (patch size)");
    437 
    438 	cfopen(&cf, argv[3], sizeof(header), "control", header[88]);
    439 	cfopen(&df, argv[3], sizeof(header) + zctrllen, "diff", header[89]);
    440 	cfopen(&ef, argv[3], sizeof(header) + zctrllen + zdatalen, "extra",
    441 	       header[90]);
    442 
    443 	if (fclose(f))
    444 		err(1, "fclose(%s)", argv[3]);
    445 
    446 	if(((fd=open(argv[1],O_RDONLY,0))<0) ||
    447 		((oldsize=lseek(fd,0,SEEK_END))==-1) ||
    448 		((old=malloc(oldsize+1))==NULL) ||
    449 		(lseek(fd,0,SEEK_SET)!=0) ||
    450 		(read(fd,old,oldsize)!=oldsize) ||
    451 		(close(fd)==-1)) err(1,"%s",argv[1]);
    452 	if (expect_oldsize != oldsize)
    453 		errx(1, "old size mismatch: %lld != %lld",
    454 		     oldsize, expect_oldsize);
    455 	SHA1(old, oldsize, sha1);
    456 	if (memcmp(sha1, header + 48, sizeof(sha1)) != 0) {
    457 		sha1tostr(sha1, sha1str);
    458 		sha1tostr(header + 48, expected_sha1str);
    459 		errx(1, "old hash mismatch: %s != %s",
    460 	             sha1str, expected_sha1str);
    461 	}
    462 	if((new=malloc(newsize+1))==NULL) err(1,NULL);
    463 
    464 	oldpos=0;newpos=0;
    465 	while(newpos<newsize) {
    466 		/* Read control data */
    467 		for(i=0;i<=2;i++) {
    468 			cfread(&cf, buf, 8);
    469 			ctrl[i]=offtin(buf);
    470 		};
    471 
    472 		/* Sanity-check */
    473 		if((ctrl[0]<0) || (ctrl[1]<0))
    474 			errx(1,"corrupt patch (diff): negative chunk size");
    475 
    476 		/* Sanity-check */
    477 		if(newpos+ctrl[0]>newsize)
    478 			errx(1,"corrupt patch (diff): overrun");
    479 
    480 		/* Read diff string */
    481 		cfread(&df, new + newpos, ctrl[0]);
    482 
    483 		/* Add old data to diff string */
    484 		for(i=0;i<ctrl[0];i++)
    485 			if((oldpos+i>=0) && (oldpos+i<oldsize))
    486 				new[newpos+i]+=old[oldpos+i];
    487 
    488 		/* Adjust pointers */
    489 		newpos+=ctrl[0];
    490 		oldpos+=ctrl[0];
    491 
    492 		/* Sanity-check */
    493 		if(newpos+ctrl[1]>newsize)
    494 			errx(1,"corrupt patch (extra): overrun");
    495 
    496 		/* Read extra string */
    497 		cfread(&ef, new + newpos, ctrl[1]);
    498 
    499 		/* Adjust pointers */
    500 		newpos+=ctrl[1];
    501 		oldpos+=ctrl[2];
    502 	};
    503 
    504 	/* Clean up the readers */
    505 	cfclose(&cf);
    506 	cfclose(&df);
    507 	cfclose(&ef);
    508 
    509 	SHA1(new, newsize, sha1);
    510 	if (memcmp(sha1, header + 68, sizeof(sha1)) != 0) {
    511 		sha1tostr(sha1, sha1str);
    512 		sha1tostr(header + 68, expected_sha1str);
    513 		errx(1, "new hash mismatch: %s != %s",
    514 		     sha1str, expected_sha1str);
    515 	}
    516 
    517 	/* Write the new file */
    518 	if(((fd=open(argv[2],O_CREAT|O_TRUNC|O_WRONLY,0644))<0) ||
    519 		(write(fd,new,newsize)!=newsize) || (close(fd)==-1))
    520 		err(1,"open/write/close(%s)",argv[2]);
    521 
    522 	free(new);
    523 	free(old);
    524 
    525 	return 0;
    526 }
    527