Home | History | Annotate | Download | only in lib
      1 /*
      2  * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
      3  *
      4  * Author: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *
      6  * This file has been put into the public domain.
      7  * You can do whatever you want with this file.
      8  */
      9 
     10 /*
     11  * Important notes about in-place decompression
     12  *
     13  * At least on x86, the kernel is decompressed in place: the compressed data
     14  * is placed to the end of the output buffer, and the decompressor overwrites
     15  * most of the compressed data. There must be enough safety margin to
     16  * guarantee that the write position is always behind the read position.
     17  *
     18  * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below.
     19  * Note that the margin with XZ is bigger than with Deflate (gzip)!
     20  *
     21  * The worst case for in-place decompression is that the beginning of
     22  * the file is compressed extremely well, and the rest of the file is
     23  * uncompressible. Thus, we must look for worst-case expansion when the
     24  * compressor is encoding uncompressible data.
     25  *
     26  * The structure of the .xz file in case of a compresed kernel is as follows.
     27  * Sizes (as bytes) of the fields are in parenthesis.
     28  *
     29  *    Stream Header (12)
     30  *    Block Header:
     31  *      Block Header (8-12)
     32  *      Compressed Data (N)
     33  *      Block Padding (0-3)
     34  *      CRC32 (4)
     35  *    Index (8-20)
     36  *    Stream Footer (12)
     37  *
     38  * Normally there is exactly one Block, but let's assume that there are
     39  * 2-4 Blocks just in case. Because Stream Header and also Block Header
     40  * of the first Block don't make the decompressor produce any uncompressed
     41  * data, we can ignore them from our calculations. Block Headers of possible
     42  * additional Blocks have to be taken into account still. With these
     43  * assumptions, it is safe to assume that the total header overhead is
     44  * less than 128 bytes.
     45  *
     46  * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ
     47  * doesn't change the size of the data, it is enough to calculate the
     48  * safety margin for LZMA2.
     49  *
     50  * LZMA2 stores the data in chunks. Each chunk has a header whose size is
     51  * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that
     52  * the maximum chunk header size is 8 bytes. After the chunk header, there
     53  * may be up to 64 KiB of actual payload in the chunk. Often the payload is
     54  * quite a bit smaller though; to be safe, let's assume that an average
     55  * chunk has only 32 KiB of payload.
     56  *
     57  * The maximum uncompressed size of the payload is 2 MiB. The minimum
     58  * uncompressed size of the payload is in practice never less than the
     59  * payload size itself. The LZMA2 format would allow uncompressed size
     60  * to be less than the payload size, but no sane compressor creates such
     61  * files. LZMA2 supports storing uncompressible data in uncompressed form,
     62  * so there's never a need to create payloads whose uncompressed size is
     63  * smaller than the compressed size.
     64  *
     65  * The assumption, that the uncompressed size of the payload is never
     66  * smaller than the payload itself, is valid only when talking about
     67  * the payload as a whole. It is possible that the payload has parts where
     68  * the decompressor consumes more input than it produces output. Calculating
     69  * the worst case for this would be tricky. Instead of trying to do that,
     70  * let's simply make sure that the decompressor never overwrites any bytes
     71  * of the payload which it is currently reading.
     72  *
     73  * Now we have enough information to calculate the safety margin. We need
     74  *   - 128 bytes for the .xz file format headers;
     75  *   - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header
     76  *     per chunk, each chunk having average payload size of 32 KiB); and
     77  *   - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that
     78  *     the decompressor never overwrites anything from the LZMA2 chunk
     79  *     payload it is currently reading.
     80  *
     81  * We get the following formula:
     82  *
     83  *    safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
     84  *                  = 128 + (uncompressed_size >> 12) + 65536
     85  *
     86  * For comparison, according to arch/x86/boot/compressed/misc.c, the
     87  * equivalent formula for Deflate is this:
     88  *
     89  *    safety_margin = 18 + (uncompressed_size >> 12) + 32768
     90  *
     91  * Thus, when updating Deflate-only in-place kernel decompressor to
     92  * support XZ, the fixed overhead has to be increased from 18+32768 bytes
     93  * to 128+65536 bytes.
     94  */
     95 
     96 /*
     97  * STATIC is defined to "static" if we are being built for kernel
     98  * decompression (pre-boot code). <linux/decompress/mm.h> will define
     99  * STATIC to empty if it wasn't already defined. Since we will need to
    100  * know later if we are being used for kernel decompression, we define
    101  * XZ_PREBOOT here.
    102  */
    103 #ifdef STATIC
    104 #	define XZ_PREBOOT
    105 #endif
    106 #ifdef __KERNEL__
    107 #	include <linux/decompress/mm.h>
    108 #endif
    109 #define XZ_EXTERN STATIC
    110 
    111 #ifndef XZ_PREBOOT
    112 #	include <linux/slab.h>
    113 #	include <linux/xz.h>
    114 #else
    115 /*
    116  * Use the internal CRC32 code instead of kernel's CRC32 module, which
    117  * is not available in early phase of booting.
    118  */
    119 #define XZ_INTERNAL_CRC32 1
    120 
    121 /*
    122  * For boot time use, we enable only the BCJ filter of the current
    123  * architecture or none if no BCJ filter is available for the architecture.
    124  */
    125 #ifdef CONFIG_X86
    126 #	define XZ_DEC_X86
    127 #endif
    128 #ifdef CONFIG_PPC
    129 #	define XZ_DEC_POWERPC
    130 #endif
    131 #ifdef CONFIG_ARM
    132 #	define XZ_DEC_ARM
    133 #endif
    134 #ifdef CONFIG_IA64
    135 #	define XZ_DEC_IA64
    136 #endif
    137 #ifdef CONFIG_SPARC
    138 #	define XZ_DEC_SPARC
    139 #endif
    140 
    141 /*
    142  * This will get the basic headers so that memeq() and others
    143  * can be defined.
    144  */
    145 #include "xz/xz_private.h"
    146 
    147 /*
    148  * Replace the normal allocation functions with the versions from
    149  * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL)
    150  * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it.
    151  * Workaround it here because the other decompressors don't need it.
    152  */
    153 #undef kmalloc
    154 #undef kfree
    155 #undef vmalloc
    156 #undef vfree
    157 #define kmalloc(size, flags) malloc(size)
    158 #define kfree(ptr) free(ptr)
    159 #define vmalloc(size) malloc(size)
    160 #define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0)
    161 
    162 /*
    163  * FIXME: Not all basic memory functions are provided in architecture-specific
    164  * files (yet). We define our own versions here for now, but this should be
    165  * only a temporary solution.
    166  *
    167  * memeq and memzero are not used much and any remotely sane implementation
    168  * is fast enough. memcpy/memmove speed matters in multi-call mode, but
    169  * the kernel image is decompressed in single-call mode, in which only
    170  * memcpy speed can matter and only if there is a lot of uncompressible data
    171  * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
    172  * functions below should just be kept small; it's probably not worth
    173  * optimizing for speed.
    174  */
    175 
    176 #ifndef memeq
    177 static bool memeq(const void *a, const void *b, size_t size)
    178 {
    179 	const uint8_t *x = a;
    180 	const uint8_t *y = b;
    181 	size_t i;
    182 
    183 	for (i = 0; i < size; ++i)
    184 		if (x[i] != y[i])
    185 			return false;
    186 
    187 	return true;
    188 }
    189 #endif
    190 
    191 #ifndef memzero
    192 static void memzero(void *buf, size_t size)
    193 {
    194 	uint8_t *b = buf;
    195 	uint8_t *e = b + size;
    196 
    197 	while (b != e)
    198 		*b++ = '\0';
    199 }
    200 #endif
    201 
    202 #ifndef memmove
    203 /* Not static to avoid a conflict with the prototype in the Linux headers. */
    204 void *memmove(void *dest, const void *src, size_t size)
    205 {
    206 	uint8_t *d = dest;
    207 	const uint8_t *s = src;
    208 	size_t i;
    209 
    210 	if (d < s) {
    211 		for (i = 0; i < size; ++i)
    212 			d[i] = s[i];
    213 	} else if (d > s) {
    214 		i = size;
    215 		while (i-- > 0)
    216 			d[i] = s[i];
    217 	}
    218 
    219 	return dest;
    220 }
    221 #endif
    222 
    223 /*
    224  * Since we need memmove anyway, would use it as memcpy too.
    225  * Commented out for now to avoid breaking things.
    226  */
    227 /*
    228 #ifndef memcpy
    229 #	define memcpy memmove
    230 #endif
    231 */
    232 
    233 #include "xz/xz_crc32.c"
    234 #include "xz/xz_dec_stream.c"
    235 #include "xz/xz_dec_lzma2.c"
    236 #include "xz/xz_dec_bcj.c"
    237 
    238 #endif /* XZ_PREBOOT */
    239 
    240 /* Size of the input and output buffers in multi-call mode */
    241 #define XZ_IOBUF_SIZE 4096
    242 
    243 /*
    244  * This function implements the API defined in <linux/decompress/generic.h>.
    245  *
    246  * This wrapper will automatically choose single-call or multi-call mode
    247  * of the native XZ decoder API. The single-call mode can be used only when
    248  * both input and output buffers are available as a single chunk, i.e. when
    249  * fill() and flush() won't be used.
    250  */
    251 STATIC int INIT unxz(unsigned char *in, int in_size,
    252 		     int (*fill)(void *dest, unsigned int size),
    253 		     int (*flush)(void *src, unsigned int size),
    254 		     unsigned char *out, int *in_used,
    255 		     void (*error)(char *x))
    256 {
    257 	struct xz_buf b;
    258 	struct xz_dec *s;
    259 	enum xz_ret ret;
    260 	bool must_free_in = false;
    261 
    262 #if XZ_INTERNAL_CRC32
    263 	xz_crc32_init();
    264 #endif
    265 
    266 	if (in_used != NULL)
    267 		*in_used = 0;
    268 
    269 	if (fill == NULL && flush == NULL)
    270 		s = xz_dec_init(XZ_SINGLE, 0);
    271 	else
    272 		s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1);
    273 
    274 	if (s == NULL)
    275 		goto error_alloc_state;
    276 
    277 	if (flush == NULL) {
    278 		b.out = out;
    279 		b.out_size = (size_t)-1;
    280 	} else {
    281 		b.out_size = XZ_IOBUF_SIZE;
    282 		b.out = malloc(XZ_IOBUF_SIZE);
    283 		if (b.out == NULL)
    284 			goto error_alloc_out;
    285 	}
    286 
    287 	if (in == NULL) {
    288 		must_free_in = true;
    289 		in = malloc(XZ_IOBUF_SIZE);
    290 		if (in == NULL)
    291 			goto error_alloc_in;
    292 	}
    293 
    294 	b.in = in;
    295 	b.in_pos = 0;
    296 	b.in_size = in_size;
    297 	b.out_pos = 0;
    298 
    299 	if (fill == NULL && flush == NULL) {
    300 		ret = xz_dec_run(s, &b);
    301 	} else {
    302 		do {
    303 			if (b.in_pos == b.in_size && fill != NULL) {
    304 				if (in_used != NULL)
    305 					*in_used += b.in_pos;
    306 
    307 				b.in_pos = 0;
    308 
    309 				in_size = fill(in, XZ_IOBUF_SIZE);
    310 				if (in_size < 0) {
    311 					/*
    312 					 * This isn't an optimal error code
    313 					 * but it probably isn't worth making
    314 					 * a new one either.
    315 					 */
    316 					ret = XZ_BUF_ERROR;
    317 					break;
    318 				}
    319 
    320 				b.in_size = in_size;
    321 			}
    322 
    323 			ret = xz_dec_run(s, &b);
    324 
    325 			if (flush != NULL && (b.out_pos == b.out_size
    326 					|| (ret != XZ_OK && b.out_pos > 0))) {
    327 				/*
    328 				 * Setting ret here may hide an error
    329 				 * returned by xz_dec_run(), but probably
    330 				 * it's not too bad.
    331 				 */
    332 				if (flush(b.out, b.out_pos) != (int)b.out_pos)
    333 					ret = XZ_BUF_ERROR;
    334 
    335 				b.out_pos = 0;
    336 			}
    337 		} while (ret == XZ_OK);
    338 
    339 		if (must_free_in)
    340 			free(in);
    341 
    342 		if (flush != NULL)
    343 			free(b.out);
    344 	}
    345 
    346 	if (in_used != NULL)
    347 		*in_used += b.in_pos;
    348 
    349 	xz_dec_end(s);
    350 
    351 	switch (ret) {
    352 	case XZ_STREAM_END:
    353 		return 0;
    354 
    355 	case XZ_MEM_ERROR:
    356 		/* This can occur only in multi-call mode. */
    357 		error("XZ decompressor ran out of memory");
    358 		break;
    359 
    360 	case XZ_FORMAT_ERROR:
    361 		error("Input is not in the XZ format (wrong magic bytes)");
    362 		break;
    363 
    364 	case XZ_OPTIONS_ERROR:
    365 		error("Input was encoded with settings that are not "
    366 				"supported by this XZ decoder");
    367 		break;
    368 
    369 	case XZ_DATA_ERROR:
    370 	case XZ_BUF_ERROR:
    371 		error("XZ-compressed data is corrupt");
    372 		break;
    373 
    374 	default:
    375 		error("Bug in the XZ decompressor");
    376 		break;
    377 	}
    378 
    379 	return -1;
    380 
    381 error_alloc_in:
    382 	if (flush != NULL)
    383 		free(b.out);
    384 
    385 error_alloc_out:
    386 	xz_dec_end(s);
    387 
    388 error_alloc_state:
    389 	error("XZ decompressor ran out of memory");
    390 	return -1;
    391 }
    392 
    393 /*
    394  * This macro is used by architecture-specific files to decompress
    395  * the kernel image.
    396  */
    397 #define decompress unxz
    398