Home | History | Annotate | Download | only in bsdiff
      1 /*-
      2  * Copyright 2003-2005 Colin Percival
      3  * All rights reserved
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted providing that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     24  * POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #if 0
     28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
     29 #endif
     30 
     31 #include <sys/types.h>
     32 
     33 #include <bzlib.h>
     34 #include <err.h>
     35 #include <fcntl.h>
     36 #include <lzma.h>
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <string.h>
     40 #include <unistd.h>
     41 #include <zlib.h>
     42 
     43 #if defined(__APPLE__)
     44 #include <libkern/OSByteOrder.h>
     45 #define htole64(x) OSSwapHostToLittleInt64(x)
     46 #elif defined(__linux__)
     47 #include <endian.h>
     48 #elif defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
     49 #define htole64(x) (x)
     50 #else
     51 #error Provide htole64 for this platform
     52 #endif
     53 
     54 #include "chrome/installer/mac/third_party/bsdiff/sha1_adapter.h"
     55 
     56 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
     57 
     58 static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
     59 {
     60 	off_t i,j,k,x,tmp,jj,kk;
     61 
     62 	if(len<16) {
     63 		for(k=start;k<start+len;k+=j) {
     64 			j=1;x=V[I[k]+h];
     65 			for(i=1;k+i<start+len;i++) {
     66 				if(V[I[k+i]+h]<x) {
     67 					x=V[I[k+i]+h];
     68 					j=0;
     69 				};
     70 				if(V[I[k+i]+h]==x) {
     71 					tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
     72 					j++;
     73 				};
     74 			};
     75 			for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
     76 			if(j==1) I[k]=-1;
     77 		};
     78 		return;
     79 	};
     80 
     81 	x=V[I[start+len/2]+h];
     82 	jj=0;kk=0;
     83 	for(i=start;i<start+len;i++) {
     84 		if(V[I[i]+h]<x) jj++;
     85 		if(V[I[i]+h]==x) kk++;
     86 	};
     87 	jj+=start;kk+=jj;
     88 
     89 	i=start;j=0;k=0;
     90 	while(i<jj) {
     91 		if(V[I[i]+h]<x) {
     92 			i++;
     93 		} else if(V[I[i]+h]==x) {
     94 			tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
     95 			j++;
     96 		} else {
     97 			tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
     98 			k++;
     99 		};
    100 	};
    101 
    102 	while(jj+j<kk) {
    103 		if(V[I[jj+j]+h]==x) {
    104 			j++;
    105 		} else {
    106 			tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
    107 			k++;
    108 		};
    109 	};
    110 
    111 	if(jj>start) split(I,V,start,jj-start,h);
    112 
    113 	for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
    114 	if(jj==kk-1) I[jj]=-1;
    115 
    116 	if(start+len>kk) split(I,V,kk,start+len-kk,h);
    117 }
    118 
    119 static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
    120 {
    121 	off_t buckets[256];
    122 	off_t i,h,len;
    123 
    124 	for(i=0;i<256;i++) buckets[i]=0;
    125 	for(i=0;i<oldsize;i++) buckets[old[i]]++;
    126 	for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
    127 	for(i=255;i>0;i--) buckets[i]=buckets[i-1];
    128 	buckets[0]=0;
    129 
    130 	for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
    131 	I[0]=oldsize;
    132 	for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
    133 	V[oldsize]=0;
    134 	for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
    135 	I[0]=-1;
    136 
    137 	for(h=1;I[0]!=-(oldsize+1);h+=h) {
    138 		len=0;
    139 		for(i=0;i<oldsize+1;) {
    140 			if(I[i]<0) {
    141 				len-=I[i];
    142 				i-=I[i];
    143 			} else {
    144 				if(len) I[i-len]=-len;
    145 				len=V[I[i]]+1-i;
    146 				split(I,V,i,len,h);
    147 				i+=len;
    148 				len=0;
    149 			};
    150 		};
    151 		if(len) I[i-len]=-len;
    152 	};
    153 
    154 	for(i=0;i<oldsize+1;i++) I[V[i]]=i;
    155 }
    156 
    157 static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
    158 {
    159 	off_t i;
    160 
    161 	for(i=0;(i<oldsize)&&(i<newsize);i++)
    162 		if(old[i]!=new[i]) break;
    163 
    164 	return i;
    165 }
    166 
    167 static off_t search(off_t *I,u_char *old,off_t oldsize,
    168 		u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
    169 {
    170 	off_t x,y;
    171 
    172 	if(en-st<2) {
    173 		x=matchlen(old+I[st],oldsize-I[st],new,newsize);
    174 		y=matchlen(old+I[en],oldsize-I[en],new,newsize);
    175 
    176 		if(x>y) {
    177 			*pos=I[st];
    178 			return x;
    179 		} else {
    180 			*pos=I[en];
    181 			return y;
    182 		}
    183 	};
    184 
    185 	x=st+(en-st)/2;
    186 	if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
    187 		return search(I,old,oldsize,new,newsize,x,en,pos);
    188 	} else {
    189 		return search(I,old,oldsize,new,newsize,st,x,pos);
    190 	};
    191 }
    192 
    193 static inline void offtout(off_t x,u_char *buf)
    194 {
    195 	*((off_t*)buf) = htole64(x);
    196 }
    197 
    198 /* zlib provides compress2, which deflates to deflate (zlib) format. This is
    199  * unfortunately distinct from gzip format in that the headers wrapping the
    200  * decompressed data are different. gbspatch reads gzip-compressed data using
    201  * the file-oriented gzread interface, which only supports gzip format.
    202  * compress2gzip is identical to zlib's compress2 except that it produces gzip
    203  * output compatible with gzread. This change is achieved by calling
    204  * deflateInit2 instead of deflateInit and specifying 31 for windowBits;
    205  * numbers greater than 15 cause the addition of a gzip wrapper. */
    206 static int compress2gzip(Bytef *dest, uLongf *destLen,
    207                          const Bytef *source, uLong sourceLen, int level)
    208 {
    209 	z_stream stream;
    210 	int err;
    211 
    212 	stream.next_in = (Bytef*)source;
    213 	stream.avail_in = (uInt)sourceLen;
    214 
    215 	stream.next_out = dest;
    216 	stream.avail_out = (uInt)*destLen;
    217 	if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
    218 
    219 	stream.zalloc = (alloc_func)0;
    220 	stream.zfree = (free_func)0;
    221 	stream.opaque = (voidpf)0;
    222 
    223 	err = deflateInit2(&stream,
    224 	                   level, Z_DEFLATED, 31, 8, Z_DEFAULT_STRATEGY);
    225 	if (err != Z_OK) return err;
    226 
    227 	err = deflate(&stream, Z_FINISH);
    228 	if (err != Z_STREAM_END) {
    229 		deflateEnd(&stream);
    230 		return err == Z_OK ? Z_BUF_ERROR : err;
    231 	}
    232 	*destLen = stream.total_out;
    233 
    234 	err = deflateEnd(&stream);
    235 	return err;
    236 }
    237 
    238 /* Recompress buf of size buf_len using bzip2 or gzip. The smallest version is
    239  * used. The original uncompressed variant may be the smallest. Returns a
    240  * number identifying the encoding, 1 for uncompressed, 2 for bzip2, 3 for
    241  * gzip, and 4 for xz/lzma2. If the original uncompressed variant is not
    242  * smallest, it is freed. The caller must free any buf after this function
    243  * returns. */
    244 static char make_small(u_char **buf, off_t *buf_len)
    245 {
    246 	u_char *source = *buf;
    247 	off_t source_len = *buf_len;
    248 	u_char *bz2, *gz, *lzma;
    249 	unsigned int bz2_len;
    250 	size_t gz_len, lzma_len, lzma_pos;
    251 	int bz2_err, gz_err;
    252 	lzma_ret lzma_err;
    253 	lzma_check lzma_ck;
    254 	char smallest;
    255 
    256 	smallest = 1;
    257 
    258 	bz2_len = source_len + 1;
    259 	bz2 = malloc(bz2_len);
    260 	bz2_err = BZ2_bzBuffToBuffCompress((char*)bz2, &bz2_len, (char*)source,
    261 	                                   source_len, 9, 0, 0);
    262 	if (bz2_err == BZ_OK) {
    263 		if (bz2_len < *buf_len) {
    264 			smallest = 2;
    265 			*buf = bz2;
    266 			*buf_len = bz2_len;
    267 		} else {
    268 			free(bz2);
    269 			bz2 = NULL;
    270 		}
    271 	} else if (bz2_err == BZ_OUTBUFF_FULL) {
    272 		free(bz2);
    273 		bz2 = NULL;
    274 	} else {
    275 		errx(1, "BZ2_bzBuffToBuffCompress: %d", bz2_err);
    276 	}
    277 
    278 	gz_len = source_len + 1;
    279 	gz = malloc(gz_len);
    280 	gz_err = compress2gzip(gz, &gz_len, source, source_len, 9);
    281 	if (gz_err == Z_OK) {
    282 		if (gz_len < *buf_len) {
    283 			smallest = 3;
    284 			*buf = gz;
    285 			*buf_len = gz_len;
    286 		} else {
    287 			free(gz);
    288 			gz = NULL;
    289 		}
    290 	} else if (gz_err == Z_BUF_ERROR) {
    291 		free(gz);
    292 		gz = NULL;
    293 	} else {
    294 		errx(1, "compress2gzip: %d", gz_err);
    295 	}
    296 
    297 	lzma_len = source_len + 1;
    298 	lzma = malloc(lzma_len);
    299 	lzma_pos = 0;
    300 
    301 	/* Equivalent to the options used by xz -9 -e. */
    302 	lzma_ck = LZMA_CHECK_CRC64;
    303 	if (!lzma_check_is_supported(lzma_ck))
    304 		lzma_ck = LZMA_CHECK_CRC32;
    305 	lzma_err = lzma_easy_buffer_encode(9 | LZMA_PRESET_EXTREME,
    306 	                                   lzma_ck, NULL,
    307 	                                   source, source_len,
    308 	                                   lzma, &lzma_pos, lzma_len);
    309 	if (lzma_err == LZMA_OK) {
    310 		if (lzma_pos < *buf_len) {
    311 			smallest = 4;
    312 			*buf = lzma;
    313 			*buf_len = lzma_pos;
    314 		} else {
    315 			free(lzma);
    316 			lzma = NULL;
    317 		}
    318 	} else if (lzma_err == LZMA_BUF_ERROR) {
    319 		free(lzma);
    320 		lzma = NULL;
    321 	} else {
    322 		errx(1, "lzma_easy_buffer_encode: %d", lzma_err);
    323 	}
    324 
    325 	if (smallest != 1) {
    326 		free(source);
    327 	}
    328 
    329 	return smallest;
    330 }
    331 
    332 int main(int argc,char *argv[])
    333 {
    334 	int fd;
    335 	u_char *old,*new;
    336 	off_t oldsize,newsize;
    337 	off_t *I,*V;
    338 	off_t scan,pos,len;
    339 	off_t lastscan,lastpos,lastoffset;
    340 	off_t oldscore,scsc;
    341 	off_t s,Sf,lenf,Sb,lenb;
    342 	off_t overlap,Ss,lens;
    343 	off_t i;
    344 	off_t cblen, dblen, eblen;
    345 	u_char *cb, *db, *eb;
    346 	u_char header[96];
    347 	FILE * pf;
    348 
    349 	if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile",argv[0]);
    350 
    351 	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
    352 		that we never try to malloc(0) and get a NULL pointer */
    353 	if(((fd=open(argv[1],O_RDONLY,0))<0) ||
    354 		((oldsize=lseek(fd,0,SEEK_END))==-1) ||
    355 		((old=malloc(oldsize+1))==NULL) ||
    356 		(lseek(fd,0,SEEK_SET)!=0) ||
    357 		(read(fd,old,oldsize)!=oldsize) ||
    358 		(close(fd)==-1)) err(1,"%s",argv[1]);
    359 
    360 	if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
    361 		((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
    362 
    363 	qsufsort(I,V,old,oldsize);
    364 
    365 	free(V);
    366 
    367 	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
    368 		that we never try to malloc(0) and get a NULL pointer */
    369 	if(((fd=open(argv[2],O_RDONLY,0))<0) ||
    370 		((newsize=lseek(fd,0,SEEK_END))==-1) ||
    371 		((new=malloc(newsize+1))==NULL) ||
    372 		(lseek(fd,0,SEEK_SET)!=0) ||
    373 		(read(fd,new,newsize)!=newsize) ||
    374 		(close(fd)==-1)) err(1,"%s",argv[2]);
    375 
    376 	if(((cb=malloc(newsize+1))==NULL) ||
    377 		((db=malloc(newsize+1))==NULL) ||
    378 		((eb=malloc(newsize+1))==NULL)) err(1,NULL);
    379 	cblen=0;
    380 	dblen=0;
    381 	eblen=0;
    382 
    383 	/* Create the patch file */
    384 	if ((pf = fopen(argv[3], "wb")) == NULL)
    385 		err(1, "%s", argv[3]);
    386 
    387 	/* File format:
    388 		0	8	"BSDIFF4G"
    389 		8	8	length of compressed control block (x)
    390 		16	8	length of compressed diff block (y)
    391 		24	8	length of compressed extra block (z)
    392 		32	8	length of old file
    393 		40	8	length of new file
    394 		48	20	SHA1 of old file
    395 		68	20	SHA1 of new file
    396 		88	1	encoding of control block
    397 		89	1	encoding of diff block
    398 		90	1	encoding of extra block
    399 		91	5	unused
    400 		96	x	compressed control block
    401 		96+x	y	compressed diff block
    402 		96+x+y	z	compressed extra block
    403 	Encodings are 1 (uncompressed), 2 (bzip2), 3 (gzip), and 4 (xz/lzma2).
    404 	*/
    405 
    406 	memset(header, 0, sizeof(header));
    407 	if (fwrite(header, sizeof(header), 1, pf) != 1)
    408 		err(1, "fwrite(%s)", argv[3]);
    409 	memcpy(header, "BSDIFF4G", 8);
    410 	offtout(oldsize, header + 32);
    411 	offtout(newsize, header + 40);
    412 	SHA1(old, oldsize, header + 48);
    413 	SHA1(new, newsize, header + 68);
    414 
    415 	/* Compute the differences */
    416 	scan=0;len=0;
    417 	lastscan=0;lastpos=0;lastoffset=0;
    418 	while(scan<newsize) {
    419 		oldscore=0;
    420 
    421 		for(scsc=scan+=len;scan<newsize;scan++) {
    422 			len=search(I,old,oldsize,new+scan,newsize-scan,
    423 					0,oldsize,&pos);
    424 
    425 			for(;scsc<scan+len;scsc++)
    426 			if((scsc+lastoffset<oldsize) &&
    427 				(old[scsc+lastoffset] == new[scsc]))
    428 				oldscore++;
    429 
    430 			if(((len==oldscore) && (len!=0)) ||
    431 				(len>oldscore+8)) break;
    432 
    433 			if((scan+lastoffset<oldsize) &&
    434 				(old[scan+lastoffset] == new[scan]))
    435 				oldscore--;
    436 		};
    437 
    438 		if((len!=oldscore) || (scan==newsize)) {
    439 			s=0;Sf=0;lenf=0;
    440 			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
    441 				if(old[lastpos+i]==new[lastscan+i]) s++;
    442 				i++;
    443 				if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
    444 			};
    445 
    446 			lenb=0;
    447 			if(scan<newsize) {
    448 				s=0;Sb=0;
    449 				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
    450 					if(old[pos-i]==new[scan-i]) s++;
    451 					if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
    452 				};
    453 			};
    454 
    455 			if(lastscan+lenf>scan-lenb) {
    456 				overlap=(lastscan+lenf)-(scan-lenb);
    457 				s=0;Ss=0;lens=0;
    458 				for(i=0;i<overlap;i++) {
    459 					if(new[lastscan+lenf-overlap+i]==
    460 					   old[lastpos+lenf-overlap+i]) s++;
    461 					if(new[scan-lenb+i]==
    462 					   old[pos-lenb+i]) s--;
    463 					if(s>Ss) { Ss=s; lens=i+1; };
    464 				};
    465 
    466 				lenf+=lens-overlap;
    467 				lenb-=lens;
    468 			};
    469 
    470 			for(i=0;i<lenf;i++)
    471 				db[dblen+i]=new[lastscan+i]-old[lastpos+i];
    472 			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
    473 				eb[eblen+i]=new[lastscan+lenf+i];
    474 
    475 			dblen+=lenf;
    476 			eblen+=(scan-lenb)-(lastscan+lenf);
    477 
    478 			offtout(lenf, cb + cblen);
    479 			cblen += 8;
    480 
    481 			offtout((scan - lenb) - (lastscan + lenf), cb + cblen);
    482 			cblen += 8;
    483 
    484 			offtout((pos - lenb) - (lastpos + lenf), cb + cblen);
    485 			cblen += 8;
    486 
    487 			lastscan=scan-lenb;
    488 			lastpos=pos-lenb;
    489 			lastoffset=pos-scan;
    490 		};
    491 	};
    492 
    493 	header[88] = make_small(&cb, &cblen);
    494 	header[89] = make_small(&db, &dblen);
    495 	header[90] = make_small(&eb, &eblen);
    496 
    497 	if (fwrite(cb, 1, cblen, pf) != cblen)
    498 		err(1, "fwrite");
    499 	if (fwrite(db, 1, dblen, pf) != dblen)
    500 		err(1, "fwrite");
    501 	if (fwrite(eb, 1, eblen, pf) != eblen)
    502 		err(1, "fwrite");
    503 
    504 	offtout(cblen, header + 8);
    505 	offtout(dblen, header + 16);
    506 	offtout(eblen, header + 24);
    507 
    508 	/* Seek to the beginning, write the header, and close the file */
    509 	if (fseeko(pf, 0, SEEK_SET))
    510 		err(1, "fseeko");
    511 	if (fwrite(header, sizeof(header), 1, pf) != 1)
    512 		err(1, "fwrite(%s)", argv[3]);
    513 	if (fclose(pf))
    514 		err(1, "fclose");
    515 
    516 	/* Free the memory we used */
    517 	free(cb);
    518 	free(db);
    519 	free(eb);
    520 	free(I);
    521 	free(old);
    522 	free(new);
    523 
    524 	return 0;
    525 }
    526