Home | History | Annotate | Download | only in pending
      1 /*
      2  * Simple XZ decoder command line tool
      3  *
      4  * Author: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *
      6  * This file has been put into the public domain.
      7  * You can do whatever you want with this file.
      8  * Modified for toybox by Isaac Dunham
      9 USE_XZCAT(NEWTOY(xzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN))
     10 
     11 config XZCAT
     12   bool "xzcat"
     13   default n
     14   help
     15     usage: xzcat [filename...]
     16 
     17     Decompress listed files to stdout. Use stdin if no files listed.
     18 
     19 */
     20 #define FOR_xzcat
     21 #include "toys.h"
     22 
     23 // BEGIN xz.h
     24 
     25 /**
     26  * enum xz_ret - Return codes
     27  * @XZ_OK:                  Everything is OK so far. More input or more
     28  *                          output space is required to continue.
     29  * @XZ_STREAM_END:          Operation finished successfully.
     30  * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
     31  *                          is still possible in multi-call mode by simply
     32  *                          calling xz_dec_run() again.
     33  *                          Note that this return value is used only if
     34  *                          XZ_DEC_ANY_CHECK was defined at build time,
     35  *                          which is not used in the kernel. Unsupported
     36  *                          check types return XZ_OPTIONS_ERROR if
     37  *                          XZ_DEC_ANY_CHECK was not defined at build time.
     38  * @XZ_MEM_ERROR:           Allocating memory failed. The amount of memory
     39  *                          that was tried to be allocated was no more than the
     40  *                          dict_max argument given to xz_dec_init().
     41  * @XZ_MEMLIMIT_ERROR:      A bigger LZMA2 dictionary would be needed than
     42  *                          allowed by the dict_max argument given to
     43  *                          xz_dec_init().
     44  * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
     45  *                          bytes).
     46  * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
     47  *                          compression options. In the decoder this means
     48  *                          that the header CRC32 matches, but the header
     49  *                          itself specifies something that we don't support.
     50  * @XZ_DATA_ERROR:          Compressed data is corrupt.
     51  * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
     52  *                          different between multi-call and single-call
     53  *                          mode; more information below.
     54  *
     55  * XZ_BUF_ERROR is returned when two consecutive calls to XZ code cannot
     56  * consume any input and cannot produce any new output. This happens when
     57  * there is no new input available, or the output buffer is full while at
     58  * least one output byte is still pending. Assuming your code is not buggy,
     59  * you can get this error only when decoding a compressed stream that is
     60  * truncated or otherwise corrupt.
     61  */
     62 enum xz_ret {
     63   XZ_OK,
     64   XZ_STREAM_END,
     65   XZ_UNSUPPORTED_CHECK,
     66   XZ_MEM_ERROR,
     67   XZ_MEMLIMIT_ERROR,
     68   XZ_FORMAT_ERROR,
     69   XZ_OPTIONS_ERROR,
     70   XZ_DATA_ERROR,
     71   XZ_BUF_ERROR
     72 };
     73 
     74 /**
     75  * struct xz_buf - Passing input and output buffers to XZ code
     76  * @in:         Beginning of the input buffer. This may be NULL if and only
     77  *              if in_pos is equal to in_size.
     78  * @in_pos:     Current position in the input buffer. This must not exceed
     79  *              in_size.
     80  * @in_size:    Size of the input buffer
     81  * @out:        Beginning of the output buffer. This may be NULL if and only
     82  *              if out_pos is equal to out_size.
     83  * @out_pos:    Current position in the output buffer. This must not exceed
     84  *              out_size.
     85  * @out_size:   Size of the output buffer
     86  *
     87  * Only the contents of the output buffer from out[out_pos] onward, and
     88  * the variables in_pos and out_pos are modified by the XZ code.
     89  */
     90 struct xz_buf {
     91   const uint8_t *in;
     92   size_t in_pos;
     93   size_t in_size;
     94 
     95   uint8_t *out;
     96   size_t out_pos;
     97   size_t out_size;
     98 };
     99 
    100 /**
    101  * struct xz_dec - Opaque type to hold the XZ decoder state
    102  */
    103 struct xz_dec;
    104 
    105 /**
    106  * xz_dec_init() - Allocate and initialize a XZ decoder state
    107  * @mode:       Operation mode
    108  * @dict_max:   Maximum size of the LZMA2 dictionary (history buffer) for
    109  *              multi-call decoding. LZMA2 dictionary is always 2^n bytes
    110  *              or 2^n + 2^(n-1) bytes (the latter sizes are less common
    111  *              in practice), so other values for dict_max don't make sense.
    112  *              In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
    113  *              512 KiB, and 1 MiB are probably the only reasonable values,
    114  *              except for kernel and initramfs images where a bigger
    115  *              dictionary can be fine and useful.
    116  *
    117  * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
    118  * may allocate once it has parsed the dictionary size from the stream
    119  * headers. This way excessive allocations can be avoided while still
    120  * limiting the maximum memory usage to a sane value to prevent running the
    121  * system out of memory when decompressing streams from untrusted sources.
    122  *
    123  * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
    124  * ready to be used with xz_dec_run(). If memory allocation fails,
    125  * xz_dec_init() returns NULL.
    126  */
    127 struct xz_dec *xz_dec_init(uint32_t dict_max);
    128 
    129 /**
    130  * xz_dec_run() - Run the XZ decoder
    131  * @s:          Decoder state allocated using xz_dec_init()
    132  * @b:          Input and output buffers
    133  *
    134  * The possible return values depend on build options and operation mode.
    135  * See enum xz_ret for details.
    136  *
    137  * Note that if an error occurs in single-call mode (return value is not
    138  * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
    139  * contents of the output buffer from b->out[b->out_pos] onward are
    140  * undefined. This is true even after XZ_BUF_ERROR, because with some filter
    141  * chains, there may be a second pass over the output buffer, and this pass
    142  * cannot be properly done if the output buffer is truncated. Thus, you
    143  * cannot give the single-call decoder a too small buffer and then expect to
    144  * get that amount valid data from the beginning of the stream. You must use
    145  * the multi-call decoder if you don't want to uncompress the whole stream.
    146  */
    147 enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
    148 
    149 /**
    150  * xz_dec_reset() - Reset an already allocated decoder state
    151  * @s:          Decoder state allocated using xz_dec_init()
    152  *
    153  * This function can be used to reset the multi-call decoder state without
    154  * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
    155  *
    156  * In single-call mode, xz_dec_reset() is always called in the beginning of
    157  * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
    158  * multi-call mode.
    159  */
    160 void xz_dec_reset(struct xz_dec *s);
    161 
    162 /**
    163  * xz_dec_end() - Free the memory allocated for the decoder state
    164  * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
    165  *              this function does nothing.
    166  */
    167 void xz_dec_end(struct xz_dec *s);
    168 
    169 /*
    170  * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
    171  * calculation, the third argument must be zero. To continue the calculation,
    172  * the previously returned value is passed as the third argument.
    173  */
    174 static uint32_t xz_crc32_table[256];
    175 
    176 uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
    177 {
    178   crc = ~crc;
    179 
    180   while (size != 0) {
    181     crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
    182     --size;
    183   }
    184 
    185   return ~crc;
    186 }
    187 
    188 static uint64_t xz_crc64_table[256];
    189 
    190 
    191 // END xz.h
    192 
    193 static uint8_t in[BUFSIZ];
    194 static uint8_t out[BUFSIZ];
    195 
    196 void do_xzcat(int fd, char *name)
    197 {
    198   struct xz_buf b;
    199   struct xz_dec *s;
    200   enum xz_ret ret;
    201   const char *msg;
    202 
    203   crc_init(xz_crc32_table, 1);
    204   const uint64_t poly = 0xC96C5795D7870F42ULL;
    205   uint32_t i;
    206   uint32_t j;
    207   uint64_t r;
    208 
    209   /* initialize CRC64 table*/
    210   for (i = 0; i < 256; ++i) {
    211     r = i;
    212     for (j = 0; j < 8; ++j)
    213       r = (r >> 1) ^ (poly & ~((r & 1) - 1));
    214 
    215     xz_crc64_table[i] = r;
    216   }
    217 
    218   /*
    219    * Support up to 64 MiB dictionary. The actually needed memory
    220    * is allocated once the headers have been parsed.
    221    */
    222   s = xz_dec_init(1 << 26);
    223   if (s == NULL) {
    224     msg = "Memory allocation failed\n";
    225     goto error;
    226   }
    227 
    228   b.in = in;
    229   b.in_pos = 0;
    230   b.in_size = 0;
    231   b.out = out;
    232   b.out_pos = 0;
    233   b.out_size = BUFSIZ;
    234 
    235   for (;;) {
    236     if (b.in_pos == b.in_size) {
    237       b.in_size = read(fd, in, sizeof(in));
    238       b.in_pos = 0;
    239     }
    240 
    241     ret = xz_dec_run(s, &b);
    242 
    243     if (b.out_pos == sizeof(out)) {
    244       if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
    245         msg = "Write error\n";
    246         goto error;
    247       }
    248 
    249       b.out_pos = 0;
    250     }
    251 
    252     if (ret == XZ_OK)
    253       continue;
    254 
    255     if (ret == XZ_UNSUPPORTED_CHECK)
    256       continue;
    257 
    258     if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
    259       msg = "Write error\n";
    260       goto error;
    261     }
    262 
    263     switch (ret) {
    264     case XZ_STREAM_END:
    265       xz_dec_end(s);
    266       return;
    267 
    268     case XZ_MEM_ERROR:
    269       msg = "Memory allocation failed\n";
    270       goto error;
    271 
    272     case XZ_MEMLIMIT_ERROR:
    273       msg = "Memory usage limit reached\n";
    274       goto error;
    275 
    276     case XZ_FORMAT_ERROR:
    277       msg = "Not a .xz file\n";
    278       goto error;
    279 
    280     case XZ_OPTIONS_ERROR:
    281       msg = "Unsupported options in the .xz headers\n";
    282       goto error;
    283 
    284     case XZ_DATA_ERROR:
    285     case XZ_BUF_ERROR:
    286       msg = "File is corrupt\n";
    287       goto error;
    288 
    289     default:
    290       msg = "Bug!\n";
    291       goto error;
    292     }
    293   }
    294 
    295 error:
    296   xz_dec_end(s);
    297   error_exit("%s", msg);
    298 }
    299 
    300 void xzcat_main(void)
    301 {
    302   loopfiles(toys.optargs, do_xzcat);
    303 }
    304 
    305 // BEGIN xz_private.h
    306 
    307 
    308 /* Uncomment as needed to enable BCJ filter decoders.
    309  * These cost about 2.5 k when all are enabled; SPARC and IA64 make 0.7 k
    310  * */
    311 
    312 #define XZ_DEC_X86
    313 #define XZ_DEC_POWERPC
    314 #define XZ_DEC_IA64
    315 #define XZ_DEC_ARM
    316 #define XZ_DEC_ARMTHUMB
    317 #define XZ_DEC_SPARC
    318 
    319 
    320 #define memeq(a, b, size) (memcmp(a, b, size) == 0)
    321 
    322 /* Inline functions to access unaligned unsigned 32-bit integers */
    323 #ifndef get_unaligned_le32
    324 static inline uint32_t get_unaligned_le32(const uint8_t *buf)
    325 {
    326   return (uint32_t)buf[0]
    327       | ((uint32_t)buf[1] << 8)
    328       | ((uint32_t)buf[2] << 16)
    329       | ((uint32_t)buf[3] << 24);
    330 }
    331 #endif
    332 
    333 #ifndef get_unaligned_be32
    334 static inline uint32_t get_unaligned_be32(const uint8_t *buf)
    335 {
    336   return (uint32_t)(buf[0] << 24)
    337       | ((uint32_t)buf[1] << 16)
    338       | ((uint32_t)buf[2] << 8)
    339       | (uint32_t)buf[3];
    340 }
    341 #endif
    342 
    343 #ifndef put_unaligned_le32
    344 static inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
    345 {
    346   buf[0] = (uint8_t)val;
    347   buf[1] = (uint8_t)(val >> 8);
    348   buf[2] = (uint8_t)(val >> 16);
    349   buf[3] = (uint8_t)(val >> 24);
    350 }
    351 #endif
    352 
    353 #ifndef put_unaligned_be32
    354 static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
    355 {
    356   buf[0] = (uint8_t)(val >> 24);
    357   buf[1] = (uint8_t)(val >> 16);
    358   buf[2] = (uint8_t)(val >> 8);
    359   buf[3] = (uint8_t)val;
    360 }
    361 #endif
    362 
    363 /*
    364  * Use get_unaligned_le32() also for aligned access for simplicity. On
    365  * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
    366  * could save a few bytes in code size.
    367  */
    368 #ifndef get_le32
    369 #	define get_le32 get_unaligned_le32
    370 #endif
    371 
    372 /*
    373  * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
    374  * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
    375  */
    376 #ifndef XZ_DEC_BCJ
    377 #	if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
    378       || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
    379       || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
    380       || defined(XZ_DEC_SPARC)
    381 #		define XZ_DEC_BCJ
    382 #	endif
    383 #endif
    384 
    385 /*
    386  * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
    387  * before calling xz_dec_lzma2_run().
    388  */
    389 struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max);
    390 
    391 /*
    392  * Decode the LZMA2 properties (one byte) and reset the decoder. Return
    393  * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
    394  * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
    395  * decoder doesn't support.
    396  */
    397 enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
    398            uint8_t props);
    399 
    400 /* Decode raw LZMA2 stream from b->in to b->out. */
    401 enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
    402                struct xz_buf *b);
    403 
    404 // END "xz_private.h"
    405 
    406 
    407 
    408 
    409 /*
    410  * Branch/Call/Jump (BCJ) filter decoders
    411  * The rest of the code is inside this ifdef. It makes things a little more
    412  * convenient when building without support for any BCJ filters.
    413  */
    414 #ifdef XZ_DEC_BCJ
    415 
    416 struct xz_dec_bcj {
    417   /* Type of the BCJ filter being used */
    418   enum {
    419     BCJ_X86 = 4,        /* x86 or x86-64 */
    420     BCJ_POWERPC = 5,    /* Big endian only */
    421     BCJ_IA64 = 6,       /* Big or little endian */
    422     BCJ_ARM = 7,        /* Little endian only */
    423     BCJ_ARMTHUMB = 8,   /* Little endian only */
    424     BCJ_SPARC = 9       /* Big or little endian */
    425   } type;
    426 
    427   /*
    428    * Return value of the next filter in the chain. We need to preserve
    429    * this information across calls, because we must not call the next
    430    * filter anymore once it has returned XZ_STREAM_END.
    431    */
    432   enum xz_ret ret;
    433 
    434   /*
    435    * Absolute position relative to the beginning of the uncompressed
    436    * data (in a single .xz Block). We care only about the lowest 32
    437    * bits so this doesn't need to be uint64_t even with big files.
    438    */
    439   uint32_t pos;
    440 
    441   /* x86 filter state */
    442   uint32_t x86_prev_mask;
    443 
    444   /* Temporary space to hold the variables from struct xz_buf */
    445   uint8_t *out;
    446   size_t out_pos;
    447   size_t out_size;
    448 
    449   struct {
    450     /* Amount of already filtered data in the beginning of buf */
    451     size_t filtered;
    452 
    453     /* Total amount of data currently stored in buf  */
    454     size_t size;
    455 
    456     /*
    457      * Buffer to hold a mix of filtered and unfiltered data. This
    458      * needs to be big enough to hold Alignment + 2 * Look-ahead:
    459      *
    460      * Type         Alignment   Look-ahead
    461      * x86              1           4
    462      * PowerPC          4           0
    463      * IA-64           16           0
    464      * ARM              4           0
    465      * ARM-Thumb        2           2
    466      * SPARC            4           0
    467      */
    468     uint8_t buf[16];
    469   } temp;
    470 };
    471 
    472 /*
    473  * Decode the Filter ID of a BCJ filter. This implementation doesn't
    474  * support custom start offsets, so no decoding of Filter Properties
    475  * is needed. Returns XZ_OK if the given Filter ID is supported.
    476  * Otherwise XZ_OPTIONS_ERROR is returned.
    477  */
    478 enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
    479 
    480 /*
    481  * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
    482  * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
    483  * must be called directly.
    484  */
    485 enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
    486              struct xz_dec_lzma2 *lzma2,
    487              struct xz_buf *b);
    488 
    489 #ifdef XZ_DEC_X86
    490 /*
    491  * This is used to test the most significant byte of a memory address
    492  * in an x86 instruction.
    493  */
    494 static inline int bcj_x86_test_msbyte(uint8_t b)
    495 {
    496   return b == 0x00 || b == 0xFF;
    497 }
    498 
    499 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    500 {
    501   static const int mask_to_allowed_status[8]
    502     = { 1,1,1,0,1,0,0,0 };
    503 
    504   static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
    505 
    506   size_t i;
    507   size_t prev_pos = (size_t)-1;
    508   uint32_t prev_mask = s->x86_prev_mask;
    509   uint32_t src;
    510   uint32_t dest;
    511   uint32_t j;
    512   uint8_t b;
    513 
    514   if (size <= 4)
    515     return 0;
    516 
    517   size -= 4;
    518   for (i = 0; i < size; ++i) {
    519     if ((buf[i] & 0xFE) != 0xE8)
    520       continue;
    521 
    522     prev_pos = i - prev_pos;
    523     if (prev_pos > 3) {
    524       prev_mask = 0;
    525     } else {
    526       prev_mask = (prev_mask << (prev_pos - 1)) & 7;
    527       if (prev_mask != 0) {
    528         b = buf[i + 4 - mask_to_bit_num[prev_mask]];
    529         if (!mask_to_allowed_status[prev_mask]
    530             || bcj_x86_test_msbyte(b)) {
    531           prev_pos = i;
    532           prev_mask = (prev_mask << 1) | 1;
    533           continue;
    534         }
    535       }
    536     }
    537 
    538     prev_pos = i;
    539 
    540     if (bcj_x86_test_msbyte(buf[i + 4])) {
    541       src = get_unaligned_le32(buf + i + 1);
    542       for (;;) {
    543         dest = src - (s->pos + (uint32_t)i + 5);
    544         if (prev_mask == 0)
    545           break;
    546 
    547         j = mask_to_bit_num[prev_mask] * 8;
    548         b = (uint8_t)(dest >> (24 - j));
    549         if (!bcj_x86_test_msbyte(b))
    550           break;
    551 
    552         src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
    553       }
    554 
    555       dest &= 0x01FFFFFF;
    556       dest |= (uint32_t)0 - (dest & 0x01000000);
    557       put_unaligned_le32(dest, buf + i + 1);
    558       i += 4;
    559     } else {
    560       prev_mask = (prev_mask << 1) | 1;
    561     }
    562   }
    563 
    564   prev_pos = i - prev_pos;
    565   s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
    566   return i;
    567 }
    568 #endif
    569 
    570 #ifdef XZ_DEC_POWERPC
    571 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    572 {
    573   size_t i;
    574   uint32_t instr;
    575 
    576   for (i = 0; i + 4 <= size; i += 4) {
    577     instr = get_unaligned_be32(buf + i);
    578     if ((instr & 0xFC000003) == 0x48000001) {
    579       instr &= 0x03FFFFFC;
    580       instr -= s->pos + (uint32_t)i;
    581       instr &= 0x03FFFFFC;
    582       instr |= 0x48000001;
    583       put_unaligned_be32(instr, buf + i);
    584     }
    585   }
    586 
    587   return i;
    588 }
    589 #endif
    590 
    591 #ifdef XZ_DEC_IA64
    592 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    593 {
    594   static const uint8_t branch_table[32] = {
    595     0, 0, 0, 0, 0, 0, 0, 0,
    596     0, 0, 0, 0, 0, 0, 0, 0,
    597     4, 4, 6, 6, 0, 0, 7, 7,
    598     4, 4, 0, 0, 4, 4, 0, 0
    599   };
    600 
    601   /*
    602    * The local variables take a little bit stack space, but it's less
    603    * than what LZMA2 decoder takes, so it doesn't make sense to reduce
    604    * stack usage here without doing that for the LZMA2 decoder too.
    605    */
    606 
    607   /* Loop counters */
    608   size_t i;
    609   size_t j;
    610 
    611   /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
    612   uint32_t slot;
    613 
    614   /* Bitwise offset of the instruction indicated by slot */
    615   uint32_t bit_pos;
    616 
    617   /* bit_pos split into byte and bit parts */
    618   uint32_t byte_pos;
    619   uint32_t bit_res;
    620 
    621   /* Address part of an instruction */
    622   uint32_t addr;
    623 
    624   /* Mask used to detect which instructions to convert */
    625   uint32_t mask;
    626 
    627   /* 41-bit instruction stored somewhere in the lowest 48 bits */
    628   uint64_t instr;
    629 
    630   /* Instruction normalized with bit_res for easier manipulation */
    631   uint64_t norm;
    632 
    633   for (i = 0; i + 16 <= size; i += 16) {
    634     mask = branch_table[buf[i] & 0x1F];
    635     for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
    636       if (((mask >> slot) & 1) == 0)
    637         continue;
    638 
    639       byte_pos = bit_pos >> 3;
    640       bit_res = bit_pos & 7;
    641       instr = 0;
    642       for (j = 0; j < 6; ++j)
    643         instr |= (uint64_t)(buf[i + j + byte_pos])
    644             << (8 * j);
    645 
    646       norm = instr >> bit_res;
    647 
    648       if (((norm >> 37) & 0x0F) == 0x05
    649           && ((norm >> 9) & 0x07) == 0) {
    650         addr = (norm >> 13) & 0x0FFFFF;
    651         addr |= ((uint32_t)(norm >> 36) & 1) << 20;
    652         addr <<= 4;
    653         addr -= s->pos + (uint32_t)i;
    654         addr >>= 4;
    655 
    656         norm &= ~((uint64_t)0x8FFFFF << 13);
    657         norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
    658         norm |= (uint64_t)(addr & 0x100000)
    659             << (36 - 20);
    660 
    661         instr &= (1 << bit_res) - 1;
    662         instr |= norm << bit_res;
    663 
    664         for (j = 0; j < 6; j++)
    665           buf[i + j + byte_pos]
    666             = (uint8_t)(instr >> (8 * j));
    667       }
    668     }
    669   }
    670 
    671   return i;
    672 }
    673 #endif
    674 
    675 #ifdef XZ_DEC_ARM
    676 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    677 {
    678   size_t i;
    679   uint32_t addr;
    680 
    681   for (i = 0; i + 4 <= size; i += 4) {
    682     if (buf[i + 3] == 0xEB) {
    683       addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
    684           | ((uint32_t)buf[i + 2] << 16);
    685       addr <<= 2;
    686       addr -= s->pos + (uint32_t)i + 8;
    687       addr >>= 2;
    688       buf[i] = (uint8_t)addr;
    689       buf[i + 1] = (uint8_t)(addr >> 8);
    690       buf[i + 2] = (uint8_t)(addr >> 16);
    691     }
    692   }
    693 
    694   return i;
    695 }
    696 #endif
    697 
    698 #ifdef XZ_DEC_ARMTHUMB
    699 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    700 {
    701   size_t i;
    702   uint32_t addr;
    703 
    704   for (i = 0; i + 4 <= size; i += 2) {
    705     if ((buf[i + 1] & 0xF8) == 0xF0
    706         && (buf[i + 3] & 0xF8) == 0xF8) {
    707       addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
    708           | ((uint32_t)buf[i] << 11)
    709           | (((uint32_t)buf[i + 3] & 0x07) << 8)
    710           | (uint32_t)buf[i + 2];
    711       addr <<= 1;
    712       addr -= s->pos + (uint32_t)i + 4;
    713       addr >>= 1;
    714       buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
    715       buf[i] = (uint8_t)(addr >> 11);
    716       buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
    717       buf[i + 2] = (uint8_t)addr;
    718       i += 2;
    719     }
    720   }
    721 
    722   return i;
    723 }
    724 #endif
    725 
    726 #ifdef XZ_DEC_SPARC
    727 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
    728 {
    729   size_t i;
    730   uint32_t instr;
    731 
    732   for (i = 0; i + 4 <= size; i += 4) {
    733     instr = get_unaligned_be32(buf + i);
    734     if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
    735       instr <<= 2;
    736       instr -= s->pos + (uint32_t)i;
    737       instr >>= 2;
    738       instr = ((uint32_t)0x40000000 - (instr & 0x400000))
    739           | 0x40000000 | (instr & 0x3FFFFF);
    740       put_unaligned_be32(instr, buf + i);
    741     }
    742   }
    743 
    744   return i;
    745 }
    746 #endif
    747 
    748 /*
    749  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
    750  * of data that got filtered.
    751  *
    752  * NOTE: This is implemented as a switch statement to avoid using function
    753  * pointers, which could be problematic in the kernel boot code, which must
    754  * avoid pointers to static data (at least on x86).
    755  */
    756 static void bcj_apply(struct xz_dec_bcj *s,
    757           uint8_t *buf, size_t *pos, size_t size)
    758 {
    759   size_t filtered;
    760 
    761   buf += *pos;
    762   size -= *pos;
    763 
    764   switch (s->type) {
    765 #ifdef XZ_DEC_X86
    766   case BCJ_X86:
    767     filtered = bcj_x86(s, buf, size);
    768     break;
    769 #endif
    770 #ifdef XZ_DEC_POWERPC
    771   case BCJ_POWERPC:
    772     filtered = bcj_powerpc(s, buf, size);
    773     break;
    774 #endif
    775 #ifdef XZ_DEC_IA64
    776   case BCJ_IA64:
    777     filtered = bcj_ia64(s, buf, size);
    778     break;
    779 #endif
    780 #ifdef XZ_DEC_ARM
    781   case BCJ_ARM:
    782     filtered = bcj_arm(s, buf, size);
    783     break;
    784 #endif
    785 #ifdef XZ_DEC_ARMTHUMB
    786   case BCJ_ARMTHUMB:
    787     filtered = bcj_armthumb(s, buf, size);
    788     break;
    789 #endif
    790 #ifdef XZ_DEC_SPARC
    791   case BCJ_SPARC:
    792     filtered = bcj_sparc(s, buf, size);
    793     break;
    794 #endif
    795   default:
    796     /* Never reached but silence compiler warnings. */
    797     filtered = 0;
    798     break;
    799   }
    800 
    801   *pos += filtered;
    802   s->pos += filtered;
    803 }
    804 
    805 /*
    806  * Flush pending filtered data from temp to the output buffer.
    807  * Move the remaining mixture of possibly filtered and unfiltered
    808  * data to the beginning of temp.
    809  */
    810 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
    811 {
    812   size_t copy_size;
    813 
    814   copy_size = minof(s->temp.filtered, b->out_size - b->out_pos);
    815   memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
    816   b->out_pos += copy_size;
    817 
    818   s->temp.filtered -= copy_size;
    819   s->temp.size -= copy_size;
    820   memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
    821 }
    822 
    823 /*
    824  * The BCJ filter functions are primitive in sense that they process the
    825  * data in chunks of 1-16 bytes. To hide this issue, this function does
    826  * some buffering.
    827  */
    828 enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
    829              struct xz_dec_lzma2 *lzma2,
    830              struct xz_buf *b)
    831 {
    832   size_t out_start;
    833 
    834   /*
    835    * Flush pending already filtered data to the output buffer. Return
    836    * immediatelly if we couldn't flush everything, or if the next
    837    * filter in the chain had already returned XZ_STREAM_END.
    838    */
    839   if (s->temp.filtered > 0) {
    840     bcj_flush(s, b);
    841     if (s->temp.filtered > 0)
    842       return XZ_OK;
    843 
    844     if (s->ret == XZ_STREAM_END)
    845       return XZ_STREAM_END;
    846   }
    847 
    848   /*
    849    * If we have more output space than what is currently pending in
    850    * temp, copy the unfiltered data from temp to the output buffer
    851    * and try to fill the output buffer by decoding more data from the
    852    * next filter in the chain. Apply the BCJ filter on the new data
    853    * in the output buffer. If everything cannot be filtered, copy it
    854    * to temp and rewind the output buffer position accordingly.
    855    *
    856    * This needs to be always run when temp.size == 0 to handle a special
    857    * case where the output buffer is full and the next filter has no
    858    * more output coming but hasn't returned XZ_STREAM_END yet.
    859    */
    860   if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
    861     out_start = b->out_pos;
    862     memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
    863     b->out_pos += s->temp.size;
    864 
    865     s->ret = xz_dec_lzma2_run(lzma2, b);
    866     if (s->ret != XZ_STREAM_END
    867         && (s->ret != XZ_OK ))
    868       return s->ret;
    869 
    870     bcj_apply(s, b->out, &out_start, b->out_pos);
    871 
    872     /*
    873      * As an exception, if the next filter returned XZ_STREAM_END,
    874      * we can do that too, since the last few bytes that remain
    875      * unfiltered are meant to remain unfiltered.
    876      */
    877     if (s->ret == XZ_STREAM_END)
    878       return XZ_STREAM_END;
    879 
    880     s->temp.size = b->out_pos - out_start;
    881     b->out_pos -= s->temp.size;
    882     memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
    883 
    884     /*
    885      * If there wasn't enough input to the next filter to fill
    886      * the output buffer with unfiltered data, there's no point
    887      * to try decoding more data to temp.
    888      */
    889     if (b->out_pos + s->temp.size < b->out_size)
    890       return XZ_OK;
    891   }
    892 
    893   /*
    894    * We have unfiltered data in temp. If the output buffer isn't full
    895    * yet, try to fill the temp buffer by decoding more data from the
    896    * next filter. Apply the BCJ filter on temp. Then we hopefully can
    897    * fill the actual output buffer by copying filtered data from temp.
    898    * A mix of filtered and unfiltered data may be left in temp; it will
    899    * be taken care on the next call to this function.
    900    */
    901   if (b->out_pos < b->out_size) {
    902     /* Make b->out{,_pos,_size} temporarily point to s->temp. */
    903     s->out = b->out;
    904     s->out_pos = b->out_pos;
    905     s->out_size = b->out_size;
    906     b->out = s->temp.buf;
    907     b->out_pos = s->temp.size;
    908     b->out_size = sizeof(s->temp.buf);
    909 
    910     s->ret = xz_dec_lzma2_run(lzma2, b);
    911 
    912     s->temp.size = b->out_pos;
    913     b->out = s->out;
    914     b->out_pos = s->out_pos;
    915     b->out_size = s->out_size;
    916 
    917     if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
    918       return s->ret;
    919 
    920     bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
    921 
    922     /*
    923      * If the next filter returned XZ_STREAM_END, we mark that
    924      * everything is filtered, since the last unfiltered bytes
    925      * of the stream are meant to be left as is.
    926      */
    927     if (s->ret == XZ_STREAM_END)
    928       s->temp.filtered = s->temp.size;
    929 
    930     bcj_flush(s, b);
    931     if (s->temp.filtered > 0)
    932       return XZ_OK;
    933   }
    934 
    935   return s->ret;
    936 }
    937 
    938 enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
    939 {
    940   switch (id) {
    941 #ifdef XZ_DEC_X86
    942   case BCJ_X86:
    943 #endif
    944 #ifdef XZ_DEC_POWERPC
    945   case BCJ_POWERPC:
    946 #endif
    947 #ifdef XZ_DEC_IA64
    948   case BCJ_IA64:
    949 #endif
    950 #ifdef XZ_DEC_ARM
    951   case BCJ_ARM:
    952 #endif
    953 #ifdef XZ_DEC_ARMTHUMB
    954   case BCJ_ARMTHUMB:
    955 #endif
    956 #ifdef XZ_DEC_SPARC
    957   case BCJ_SPARC:
    958 #endif
    959     break;
    960 
    961   default:
    962     /* Unsupported Filter ID */
    963     return XZ_OPTIONS_ERROR;
    964   }
    965 
    966   s->type = id;
    967   s->ret = XZ_OK;
    968   s->pos = 0;
    969   s->x86_prev_mask = 0;
    970   s->temp.filtered = 0;
    971   s->temp.size = 0;
    972 
    973   return XZ_OK;
    974 }
    975 
    976 #endif
    977 /*
    978  * LZMA2 decoder
    979  */
    980 
    981 
    982 // BEGIN xz_lzma2.h
    983 /*
    984  * LZMA2 definitions
    985  *
    986  */
    987 
    988 
    989 /* Range coder constants */
    990 #define RC_SHIFT_BITS 8
    991 #define RC_TOP_BITS 24
    992 #define RC_TOP_VALUE (1 << RC_TOP_BITS)
    993 #define RC_BIT_MODEL_TOTAL_BITS 11
    994 #define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
    995 #define RC_MOVE_BITS 5
    996 
    997 /*
    998  * Maximum number of position states. A position state is the lowest pb
    999  * number of bits of the current uncompressed offset. In some places there
   1000  * are different sets of probabilities for different position states.
   1001  */
   1002 #define POS_STATES_MAX (1 << 4)
   1003 
   1004 /*
   1005  * This enum is used to track which LZMA symbols have occurred most recently
   1006  * and in which order. This information is used to predict the next symbol.
   1007  *
   1008  * Symbols:
   1009  *  - Literal: One 8-bit byte
   1010  *  - Match: Repeat a chunk of data at some distance
   1011  *  - Long repeat: Multi-byte match at a recently seen distance
   1012  *  - Short repeat: One-byte repeat at a recently seen distance
   1013  *
   1014  * The symbol names are in from STATE_oldest_older_previous. REP means
   1015  * either short or long repeated match, and NONLIT means any non-literal.
   1016  */
   1017 enum lzma_state {
   1018   STATE_LIT_LIT,
   1019   STATE_MATCH_LIT_LIT,
   1020   STATE_REP_LIT_LIT,
   1021   STATE_SHORTREP_LIT_LIT,
   1022   STATE_MATCH_LIT,
   1023   STATE_REP_LIT,
   1024   STATE_SHORTREP_LIT,
   1025   STATE_LIT_MATCH,
   1026   STATE_LIT_LONGREP,
   1027   STATE_LIT_SHORTREP,
   1028   STATE_NONLIT_MATCH,
   1029   STATE_NONLIT_REP
   1030 };
   1031 
   1032 /* Total number of states */
   1033 #define STATES 12
   1034 
   1035 /* The lowest 7 states indicate that the previous state was a literal. */
   1036 #define LIT_STATES 7
   1037 
   1038 /* Indicate that the latest symbol was a literal. */
   1039 static inline void lzma_state_literal(enum lzma_state *state)
   1040 {
   1041   if (*state <= STATE_SHORTREP_LIT_LIT)
   1042     *state = STATE_LIT_LIT;
   1043   else if (*state <= STATE_LIT_SHORTREP)
   1044     *state -= 3;
   1045   else
   1046     *state -= 6;
   1047 }
   1048 
   1049 /* Indicate that the latest symbol was a match. */
   1050 static inline void lzma_state_match(enum lzma_state *state)
   1051 {
   1052   *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
   1053 }
   1054 
   1055 /* Indicate that the latest state was a long repeated match. */
   1056 static inline void lzma_state_long_rep(enum lzma_state *state)
   1057 {
   1058   *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
   1059 }
   1060 
   1061 /* Indicate that the latest symbol was a short match. */
   1062 static inline void lzma_state_short_rep(enum lzma_state *state)
   1063 {
   1064   *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
   1065 }
   1066 
   1067 /* Test if the previous symbol was a literal. */
   1068 static inline int lzma_state_is_literal(enum lzma_state state)
   1069 {
   1070   return state < LIT_STATES;
   1071 }
   1072 
   1073 /* Each literal coder is divided in three sections:
   1074  *   - 0x001-0x0FF: Without match byte
   1075  *   - 0x101-0x1FF: With match byte; match bit is 0
   1076  *   - 0x201-0x2FF: With match byte; match bit is 1
   1077  *
   1078  * Match byte is used when the previous LZMA symbol was something else than
   1079  * a literal (that is, it was some kind of match).
   1080  */
   1081 #define LITERAL_CODER_SIZE 0x300
   1082 
   1083 /* Maximum number of literal coders */
   1084 #define LITERAL_CODERS_MAX (1 << 4)
   1085 
   1086 /* Minimum length of a match is two bytes. */
   1087 #define MATCH_LEN_MIN 2
   1088 
   1089 /* Match length is encoded with 4, 5, or 10 bits.
   1090  *
   1091  * Length   Bits
   1092  *  2-9      4 = Choice=0 + 3 bits
   1093  * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
   1094  * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
   1095  */
   1096 #define LEN_LOW_BITS 3
   1097 #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
   1098 #define LEN_MID_BITS 3
   1099 #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
   1100 #define LEN_HIGH_BITS 8
   1101 #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
   1102 #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
   1103 
   1104 /*
   1105  * Maximum length of a match is 273 which is a result of the encoding
   1106  * described above.
   1107  */
   1108 #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
   1109 
   1110 /*
   1111  * Different sets of probabilities are used for match distances that have
   1112  * very short match length: Lengths of 2, 3, and 4 bytes have a separate
   1113  * set of probabilities for each length. The matches with longer length
   1114  * use a shared set of probabilities.
   1115  */
   1116 #define DIST_STATES 4
   1117 
   1118 /*
   1119  * Get the index of the appropriate probability array for decoding
   1120  * the distance slot.
   1121  */
   1122 static inline uint32_t lzma_get_dist_state(uint32_t len)
   1123 {
   1124   return len < DIST_STATES + MATCH_LEN_MIN
   1125       ? len - MATCH_LEN_MIN : DIST_STATES - 1;
   1126 }
   1127 
   1128 /*
   1129  * The highest two bits of a 32-bit match distance are encoded using six bits.
   1130  * This six-bit value is called a distance slot. This way encoding a 32-bit
   1131  * value takes 6-36 bits, larger values taking more bits.
   1132  */
   1133 #define DIST_SLOT_BITS 6
   1134 #define DIST_SLOTS (1 << DIST_SLOT_BITS)
   1135 
   1136 /* Match distances up to 127 are fully encoded using probabilities. Since
   1137  * the highest two bits (distance slot) are always encoded using six bits,
   1138  * the distances 0-3 don't need any additional bits to encode, since the
   1139  * distance slot itself is the same as the actual distance. DIST_MODEL_START
   1140  * indicates the first distance slot where at least one additional bit is
   1141  * needed.
   1142  */
   1143 #define DIST_MODEL_START 4
   1144 
   1145 /*
   1146  * Match distances greater than 127 are encoded in three pieces:
   1147  *   - distance slot: the highest two bits
   1148  *   - direct bits: 2-26 bits below the highest two bits
   1149  *   - alignment bits: four lowest bits
   1150  *
   1151  * Direct bits don't use any probabilities.
   1152  *
   1153  * The distance slot value of 14 is for distances 128-191.
   1154  */
   1155 #define DIST_MODEL_END 14
   1156 
   1157 /* Distance slots that indicate a distance <= 127. */
   1158 #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
   1159 #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
   1160 
   1161 /*
   1162  * For match distances greater than 127, only the highest two bits and the
   1163  * lowest four bits (alignment) is encoded using probabilities.
   1164  */
   1165 #define ALIGN_BITS 4
   1166 #define ALIGN_SIZE (1 << ALIGN_BITS)
   1167 #define ALIGN_MASK (ALIGN_SIZE - 1)
   1168 
   1169 /* Total number of all probability variables */
   1170 #define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
   1171 
   1172 /*
   1173  * LZMA remembers the four most recent match distances. Reusing these
   1174  * distances tends to take less space than re-encoding the actual
   1175  * distance value.
   1176  */
   1177 #define REPS 4
   1178 
   1179 
   1180 // END xz_lzma2.h
   1181 
   1182 /*
   1183  * Range decoder initialization eats the first five bytes of each LZMA chunk.
   1184  */
   1185 #define RC_INIT_BYTES 5
   1186 
   1187 /*
   1188  * Minimum number of usable input buffer to safely decode one LZMA symbol.
   1189  * The worst case is that we decode 22 bits using probabilities and 26
   1190  * direct bits. This may decode at maximum of 20 bytes of input. However,
   1191  * lzma_main() does an extra normalization before returning, thus we
   1192  * need to put 21 here.
   1193  */
   1194 #define LZMA_IN_REQUIRED 21
   1195 
   1196 /*
   1197  * Dictionary (history buffer)
   1198  *
   1199  * These are always true:
   1200  *    start <= pos <= full <= end
   1201  *    pos <= limit <= end
   1202  *    end == size
   1203  *    size <= size_max
   1204  *    allocated <= size
   1205  *
   1206  * Most of these variables are size_t as a relic of single-call mode,
   1207  * in which the dictionary variables address the actual output
   1208  * buffer directly.
   1209  */
   1210 struct dictionary {
   1211   /* Beginning of the history buffer */
   1212   uint8_t *buf;
   1213 
   1214   /* Old position in buf (before decoding more data) */
   1215   size_t start;
   1216 
   1217   /* Position in buf */
   1218   size_t pos;
   1219 
   1220   /*
   1221    * How full dictionary is. This is used to detect corrupt input that
   1222    * would read beyond the beginning of the uncompressed stream.
   1223    */
   1224   size_t full;
   1225 
   1226   /* Write limit; we don't write to buf[limit] or later bytes. */
   1227   size_t limit;
   1228 
   1229   /* End of the dictionary buffer. This is the same as the dictionary size. */
   1230   size_t end;
   1231 
   1232   /*
   1233    * Size of the dictionary as specified in Block Header. This is used
   1234    * together with "full" to detect corrupt input that would make us
   1235    * read beyond the beginning of the uncompressed stream.
   1236    */
   1237   uint32_t size;
   1238 
   1239   /*
   1240    * Maximum allowed dictionary size.
   1241    */
   1242   uint32_t size_max;
   1243 
   1244   /*
   1245    * Amount of memory currently allocated for the dictionary.
   1246    */
   1247   uint32_t allocated;
   1248 };
   1249 
   1250 /* Range decoder */
   1251 struct rc_dec {
   1252   uint32_t range;
   1253   uint32_t code;
   1254 
   1255   /*
   1256    * Number of initializing bytes remaining to be read
   1257    * by rc_read_init().
   1258    */
   1259   uint32_t init_bytes_left;
   1260 
   1261   /*
   1262    * Buffer from which we read our input. It can be either
   1263    * temp.buf or the caller-provided input buffer.
   1264    */
   1265   const uint8_t *in;
   1266   size_t in_pos;
   1267   size_t in_limit;
   1268 };
   1269 
   1270 /* Probabilities for a length decoder. */
   1271 struct lzma_len_dec {
   1272   /* Probability of match length being at least 10 */
   1273   uint16_t choice;
   1274 
   1275   /* Probability of match length being at least 18 */
   1276   uint16_t choice2;
   1277 
   1278   /* Probabilities for match lengths 2-9 */
   1279   uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
   1280 
   1281   /* Probabilities for match lengths 10-17 */
   1282   uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
   1283 
   1284   /* Probabilities for match lengths 18-273 */
   1285   uint16_t high[LEN_HIGH_SYMBOLS];
   1286 };
   1287 
   1288 struct lzma_dec {
   1289   /* Distances of latest four matches */
   1290   uint32_t rep0;
   1291   uint32_t rep1;
   1292   uint32_t rep2;
   1293   uint32_t rep3;
   1294 
   1295   /* Types of the most recently seen LZMA symbols */
   1296   enum lzma_state state;
   1297 
   1298   /*
   1299    * Length of a match. This is updated so that dict_repeat can
   1300    * be called again to finish repeating the whole match.
   1301    */
   1302   uint32_t len;
   1303 
   1304   /*
   1305    * LZMA properties or related bit masks (number of literal
   1306    * context bits, a mask dervied from the number of literal
   1307    * position bits, and a mask dervied from the number
   1308    * position bits)
   1309    */
   1310   uint32_t lc;
   1311   uint32_t literal_pos_mask; /* (1 << lp) - 1 */
   1312   uint32_t pos_mask;         /* (1 << pb) - 1 */
   1313 
   1314   /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
   1315   uint16_t is_match[STATES][POS_STATES_MAX];
   1316 
   1317   /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
   1318   uint16_t is_rep[STATES];
   1319 
   1320   /*
   1321    * If 0, distance of a repeated match is rep0.
   1322    * Otherwise check is_rep1.
   1323    */
   1324   uint16_t is_rep0[STATES];
   1325 
   1326   /*
   1327    * If 0, distance of a repeated match is rep1.
   1328    * Otherwise check is_rep2.
   1329    */
   1330   uint16_t is_rep1[STATES];
   1331 
   1332   /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
   1333   uint16_t is_rep2[STATES];
   1334 
   1335   /*
   1336    * If 1, the repeated match has length of one byte. Otherwise
   1337    * the length is decoded from rep_len_decoder.
   1338    */
   1339   uint16_t is_rep0_long[STATES][POS_STATES_MAX];
   1340 
   1341   /*
   1342    * Probability tree for the highest two bits of the match
   1343    * distance. There is a separate probability tree for match
   1344    * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
   1345    */
   1346   uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
   1347 
   1348   /*
   1349    * Probility trees for additional bits for match distance
   1350    * when the distance is in the range [4, 127].
   1351    */
   1352   uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
   1353 
   1354   /*
   1355    * Probability tree for the lowest four bits of a match
   1356    * distance that is equal to or greater than 128.
   1357    */
   1358   uint16_t dist_align[ALIGN_SIZE];
   1359 
   1360   /* Length of a normal match */
   1361   struct lzma_len_dec match_len_dec;
   1362 
   1363   /* Length of a repeated match */
   1364   struct lzma_len_dec rep_len_dec;
   1365 
   1366   /* Probabilities of literals */
   1367   uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
   1368 };
   1369 
   1370 struct lzma2_dec {
   1371   /* Position in xz_dec_lzma2_run(). */
   1372   enum lzma2_seq {
   1373     SEQ_CONTROL,
   1374     SEQ_UNCOMPRESSED_1,
   1375     SEQ_UNCOMPRESSED_2,
   1376     SEQ_COMPRESSED_0,
   1377     SEQ_COMPRESSED_1,
   1378     SEQ_PROPERTIES,
   1379     SEQ_LZMA_PREPARE,
   1380     SEQ_LZMA_RUN,
   1381     SEQ_COPY
   1382   } sequence;
   1383 
   1384   /* Next position after decoding the compressed size of the chunk. */
   1385   enum lzma2_seq next_sequence;
   1386 
   1387   /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
   1388   uint32_t uncompressed;
   1389 
   1390   /*
   1391    * Compressed size of LZMA chunk or compressed/uncompressed
   1392    * size of uncompressed chunk (64 KiB at maximum)
   1393    */
   1394   uint32_t compressed;
   1395 
   1396   /*
   1397    * True if dictionary reset is needed. This is false before
   1398    * the first chunk (LZMA or uncompressed).
   1399    */
   1400   int need_dict_reset;
   1401 
   1402   /*
   1403    * True if new LZMA properties are needed. This is false
   1404    * before the first LZMA chunk.
   1405    */
   1406   int need_props;
   1407 };
   1408 
   1409 struct xz_dec_lzma2 {
   1410   /*
   1411    * The order below is important on x86 to reduce code size and
   1412    * it shouldn't hurt on other platforms. Everything up to and
   1413    * including lzma.pos_mask are in the first 128 bytes on x86-32,
   1414    * which allows using smaller instructions to access those
   1415    * variables. On x86-64, fewer variables fit into the first 128
   1416    * bytes, but this is still the best order without sacrificing
   1417    * the readability by splitting the structures.
   1418    */
   1419   struct rc_dec rc;
   1420   struct dictionary dict;
   1421   struct lzma2_dec lzma2;
   1422   struct lzma_dec lzma;
   1423 
   1424   /*
   1425    * Temporary buffer which holds small number of input bytes between
   1426    * decoder calls. See lzma2_lzma() for details.
   1427    */
   1428   struct {
   1429     uint32_t size;
   1430     uint8_t buf[3 * LZMA_IN_REQUIRED];
   1431   } temp;
   1432 };
   1433 
   1434 /**************
   1435  * Dictionary *
   1436  **************/
   1437 
   1438 /* Reset the dictionary state. */
   1439 static void dict_reset(struct dictionary *dict)
   1440 {
   1441   dict->start = 0;
   1442   dict->pos = 0;
   1443   dict->limit = 0;
   1444   dict->full = 0;
   1445 }
   1446 
   1447 /* Set dictionary write limit */
   1448 static void dict_limit(struct dictionary *dict, size_t out_max)
   1449 {
   1450   if (dict->end - dict->pos <= out_max)
   1451     dict->limit = dict->end;
   1452   else
   1453     dict->limit = dict->pos + out_max;
   1454 }
   1455 
   1456 /* Return true if at least one byte can be written into the dictionary. */
   1457 static inline int dict_has_space(const struct dictionary *dict)
   1458 {
   1459   return dict->pos < dict->limit;
   1460 }
   1461 
   1462 /*
   1463  * Get a byte from the dictionary at the given distance. The distance is
   1464  * assumed to valid, or as a special case, zero when the dictionary is
   1465  * still empty. This special case is needed for single-call decoding to
   1466  * avoid writing a '\0' to the end of the destination buffer.
   1467  */
   1468 static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
   1469 {
   1470   size_t offset = dict->pos - dist - 1;
   1471 
   1472   if (dist >= dict->pos)
   1473     offset += dict->end;
   1474 
   1475   return dict->full > 0 ? dict->buf[offset] : 0;
   1476 }
   1477 
   1478 /*
   1479  * Put one byte into the dictionary. It is assumed that there is space for it.
   1480  */
   1481 static inline void dict_put(struct dictionary *dict, uint8_t byte)
   1482 {
   1483   dict->buf[dict->pos++] = byte;
   1484 
   1485   if (dict->full < dict->pos)
   1486     dict->full = dict->pos;
   1487 }
   1488 
   1489 /*
   1490  * Repeat given number of bytes from the given distance. If the distance is
   1491  * invalid, false is returned. On success, true is returned and *len is
   1492  * updated to indicate how many bytes were left to be repeated.
   1493  */
   1494 static int dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
   1495 {
   1496   size_t back;
   1497   uint32_t left;
   1498 
   1499   if (dist >= dict->full || dist >= dict->size) return 0;
   1500 
   1501   left = minof(dict->limit - dict->pos, *len);
   1502   *len -= left;
   1503 
   1504   back = dict->pos - dist - 1;
   1505   if (dist >= dict->pos)
   1506     back += dict->end;
   1507 
   1508   do {
   1509     dict->buf[dict->pos++] = dict->buf[back++];
   1510     if (back == dict->end)
   1511       back = 0;
   1512   } while (--left > 0);
   1513 
   1514   if (dict->full < dict->pos)
   1515     dict->full = dict->pos;
   1516 
   1517   return 1;
   1518 }
   1519 
   1520 /* Copy uncompressed data as is from input to dictionary and output buffers. */
   1521 static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
   1522             uint32_t *left)
   1523 {
   1524   size_t copy_size;
   1525 
   1526   while (*left > 0 && b->in_pos < b->in_size
   1527       && b->out_pos < b->out_size) {
   1528     copy_size = minof(b->in_size - b->in_pos,
   1529         b->out_size - b->out_pos);
   1530     if (copy_size > dict->end - dict->pos)
   1531       copy_size = dict->end - dict->pos;
   1532     if (copy_size > *left)
   1533       copy_size = *left;
   1534 
   1535     *left -= copy_size;
   1536 
   1537     memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
   1538     dict->pos += copy_size;
   1539 
   1540     if (dict->full < dict->pos)
   1541       dict->full = dict->pos;
   1542 
   1543     if (dict->pos == dict->end)
   1544       dict->pos = 0;
   1545 
   1546     memcpy(b->out + b->out_pos, b->in + b->in_pos,
   1547         copy_size);
   1548 
   1549     dict->start = dict->pos;
   1550 
   1551     b->out_pos += copy_size;
   1552     b->in_pos += copy_size;
   1553   }
   1554 }
   1555 
   1556 /*
   1557  * Flush pending data from dictionary to b->out. It is assumed that there is
   1558  * enough space in b->out. This is guaranteed because caller uses dict_limit()
   1559  * before decoding data into the dictionary.
   1560  */
   1561 static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
   1562 {
   1563   size_t copy_size = dict->pos - dict->start;
   1564 
   1565   if (dict->pos == dict->end)
   1566     dict->pos = 0;
   1567 
   1568   memcpy(b->out + b->out_pos, dict->buf + dict->start,
   1569       copy_size);
   1570 
   1571   dict->start = dict->pos;
   1572   b->out_pos += copy_size;
   1573   return copy_size;
   1574 }
   1575 
   1576 /*****************
   1577  * Range decoder *
   1578  *****************/
   1579 
   1580 /* Reset the range decoder. */
   1581 static void rc_reset(struct rc_dec *rc)
   1582 {
   1583   rc->range = (uint32_t)-1;
   1584   rc->code = 0;
   1585   rc->init_bytes_left = RC_INIT_BYTES;
   1586 }
   1587 
   1588 /*
   1589  * Read the first five initial bytes into rc->code if they haven't been
   1590  * read already. (Yes, the first byte gets completely ignored.)
   1591  */
   1592 static int rc_read_init(struct rc_dec *rc, struct xz_buf *b)
   1593 {
   1594   while (rc->init_bytes_left > 0) {
   1595     if (b->in_pos == b->in_size) return 0;
   1596 
   1597     rc->code = (rc->code << 8) + b->in[b->in_pos++];
   1598     --rc->init_bytes_left;
   1599   }
   1600 
   1601   return 1;
   1602 }
   1603 
   1604 /* Return true if there may not be enough input for the next decoding loop. */
   1605 static inline int rc_limit_exceeded(const struct rc_dec *rc)
   1606 {
   1607   return rc->in_pos > rc->in_limit;
   1608 }
   1609 
   1610 /*
   1611  * Return true if it is possible (from point of view of range decoder) that
   1612  * we have reached the end of the LZMA chunk.
   1613  */
   1614 static inline int rc_is_finished(const struct rc_dec *rc)
   1615 {
   1616   return rc->code == 0;
   1617 }
   1618 
   1619 /* Read the next input byte if needed. */
   1620 static inline void rc_normalize(struct rc_dec *rc)
   1621 {
   1622   if (rc->range < RC_TOP_VALUE) {
   1623     rc->range <<= RC_SHIFT_BITS;
   1624     rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
   1625   }
   1626 }
   1627 
   1628 /*
   1629  * Decode one bit. In some versions, this function has been splitted in three
   1630  * functions so that the compiler is supposed to be able to more easily avoid
   1631  * an extra branch. In this particular version of the LZMA decoder, this
   1632  * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
   1633  * on x86). Using a non-splitted version results in nicer looking code too.
   1634  *
   1635  * NOTE: This must return an int. Do not make it return a bool or the speed
   1636  * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
   1637  * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
   1638  */
   1639 static inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
   1640 {
   1641   uint32_t bound;
   1642   int bit;
   1643 
   1644   rc_normalize(rc);
   1645   bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
   1646   if (rc->code < bound) {
   1647     rc->range = bound;
   1648     *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
   1649     bit = 0;
   1650   } else {
   1651     rc->range -= bound;
   1652     rc->code -= bound;
   1653     *prob -= *prob >> RC_MOVE_BITS;
   1654     bit = 1;
   1655   }
   1656 
   1657   return bit;
   1658 }
   1659 
   1660 /* Decode a bittree starting from the most significant bit. */
   1661 static inline uint32_t rc_bittree(struct rc_dec *rc,
   1662              uint16_t *probs, uint32_t limit)
   1663 {
   1664   uint32_t symbol = 1;
   1665 
   1666   do {
   1667     if (rc_bit(rc, &probs[symbol]))
   1668       symbol = (symbol << 1) + 1;
   1669     else
   1670       symbol <<= 1;
   1671   } while (symbol < limit);
   1672 
   1673   return symbol;
   1674 }
   1675 
   1676 /* Decode a bittree starting from the least significant bit. */
   1677 static inline void rc_bittree_reverse(struct rc_dec *rc,
   1678                  uint16_t *probs,
   1679                  uint32_t *dest, uint32_t limit)
   1680 {
   1681   uint32_t symbol = 1;
   1682   uint32_t i = 0;
   1683 
   1684   do {
   1685     if (rc_bit(rc, &probs[symbol])) {
   1686       symbol = (symbol << 1) + 1;
   1687       *dest += 1 << i;
   1688     } else {
   1689       symbol <<= 1;
   1690     }
   1691   } while (++i < limit);
   1692 }
   1693 
   1694 /* Decode direct bits (fixed fifty-fifty probability) */
   1695 static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
   1696 {
   1697   uint32_t mask;
   1698 
   1699   do {
   1700     rc_normalize(rc);
   1701     rc->range >>= 1;
   1702     rc->code -= rc->range;
   1703     mask = (uint32_t)0 - (rc->code >> 31);
   1704     rc->code += rc->range & mask;
   1705     *dest = (*dest << 1) + (mask + 1);
   1706   } while (--limit > 0);
   1707 }
   1708 
   1709 /********
   1710  * LZMA *
   1711  ********/
   1712 
   1713 /* Get pointer to literal coder probability array. */
   1714 static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
   1715 {
   1716   uint32_t prev_byte = dict_get(&s->dict, 0);
   1717   uint32_t low = prev_byte >> (8 - s->lzma.lc);
   1718   uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
   1719   return s->lzma.literal[low + high];
   1720 }
   1721 
   1722 /* Decode a literal (one 8-bit byte) */
   1723 static void lzma_literal(struct xz_dec_lzma2 *s)
   1724 {
   1725   uint16_t *probs;
   1726   uint32_t symbol;
   1727   uint32_t match_byte;
   1728   uint32_t match_bit;
   1729   uint32_t offset;
   1730   uint32_t i;
   1731 
   1732   probs = lzma_literal_probs(s);
   1733 
   1734   if (lzma_state_is_literal(s->lzma.state)) {
   1735     symbol = rc_bittree(&s->rc, probs, 0x100);
   1736   } else {
   1737     symbol = 1;
   1738     match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
   1739     offset = 0x100;
   1740 
   1741     do {
   1742       match_bit = match_byte & offset;
   1743       match_byte <<= 1;
   1744       i = offset + match_bit + symbol;
   1745 
   1746       if (rc_bit(&s->rc, &probs[i])) {
   1747         symbol = (symbol << 1) + 1;
   1748         offset &= match_bit;
   1749       } else {
   1750         symbol <<= 1;
   1751         offset &= ~match_bit;
   1752       }
   1753     } while (symbol < 0x100);
   1754   }
   1755 
   1756   dict_put(&s->dict, (uint8_t)symbol);
   1757   lzma_state_literal(&s->lzma.state);
   1758 }
   1759 
   1760 /* Decode the length of the match into s->lzma.len. */
   1761 static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
   1762          uint32_t pos_state)
   1763 {
   1764   uint16_t *probs;
   1765   uint32_t limit;
   1766 
   1767   if (!rc_bit(&s->rc, &l->choice)) {
   1768     probs = l->low[pos_state];
   1769     limit = LEN_LOW_SYMBOLS;
   1770     s->lzma.len = MATCH_LEN_MIN;
   1771   } else {
   1772     if (!rc_bit(&s->rc, &l->choice2)) {
   1773       probs = l->mid[pos_state];
   1774       limit = LEN_MID_SYMBOLS;
   1775       s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
   1776     } else {
   1777       probs = l->high;
   1778       limit = LEN_HIGH_SYMBOLS;
   1779       s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
   1780           + LEN_MID_SYMBOLS;
   1781     }
   1782   }
   1783 
   1784   s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
   1785 }
   1786 
   1787 /* Decode a match. The distance will be stored in s->lzma.rep0. */
   1788 static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
   1789 {
   1790   uint16_t *probs;
   1791   uint32_t dist_slot;
   1792   uint32_t limit;
   1793 
   1794   lzma_state_match(&s->lzma.state);
   1795 
   1796   s->lzma.rep3 = s->lzma.rep2;
   1797   s->lzma.rep2 = s->lzma.rep1;
   1798   s->lzma.rep1 = s->lzma.rep0;
   1799 
   1800   lzma_len(s, &s->lzma.match_len_dec, pos_state);
   1801 
   1802   probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
   1803   dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
   1804 
   1805   if (dist_slot < DIST_MODEL_START) {
   1806     s->lzma.rep0 = dist_slot;
   1807   } else {
   1808     limit = (dist_slot >> 1) - 1;
   1809     s->lzma.rep0 = 2 + (dist_slot & 1);
   1810 
   1811     if (dist_slot < DIST_MODEL_END) {
   1812       s->lzma.rep0 <<= limit;
   1813       probs = s->lzma.dist_special + s->lzma.rep0
   1814           - dist_slot - 1;
   1815       rc_bittree_reverse(&s->rc, probs,
   1816           &s->lzma.rep0, limit);
   1817     } else {
   1818       rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
   1819       s->lzma.rep0 <<= ALIGN_BITS;
   1820       rc_bittree_reverse(&s->rc, s->lzma.dist_align,
   1821           &s->lzma.rep0, ALIGN_BITS);
   1822     }
   1823   }
   1824 }
   1825 
   1826 /*
   1827  * Decode a repeated match. The distance is one of the four most recently
   1828  * seen matches. The distance will be stored in s->lzma.rep0.
   1829  */
   1830 static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
   1831 {
   1832   uint32_t tmp;
   1833 
   1834   if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
   1835     if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
   1836         s->lzma.state][pos_state])) {
   1837       lzma_state_short_rep(&s->lzma.state);
   1838       s->lzma.len = 1;
   1839       return;
   1840     }
   1841   } else {
   1842     if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
   1843       tmp = s->lzma.rep1;
   1844     } else {
   1845       if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
   1846         tmp = s->lzma.rep2;
   1847       } else {
   1848         tmp = s->lzma.rep3;
   1849         s->lzma.rep3 = s->lzma.rep2;
   1850       }
   1851 
   1852       s->lzma.rep2 = s->lzma.rep1;
   1853     }
   1854 
   1855     s->lzma.rep1 = s->lzma.rep0;
   1856     s->lzma.rep0 = tmp;
   1857   }
   1858 
   1859   lzma_state_long_rep(&s->lzma.state);
   1860   lzma_len(s, &s->lzma.rep_len_dec, pos_state);
   1861 }
   1862 
   1863 /* LZMA decoder core */
   1864 static int lzma_main(struct xz_dec_lzma2 *s)
   1865 {
   1866   uint32_t pos_state;
   1867 
   1868   /*
   1869    * If the dictionary was reached during the previous call, try to
   1870    * finish the possibly pending repeat in the dictionary.
   1871    */
   1872   if (dict_has_space(&s->dict) && s->lzma.len > 0)
   1873     dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
   1874 
   1875   /*
   1876    * Decode more LZMA symbols. One iteration may consume up to
   1877    * LZMA_IN_REQUIRED - 1 bytes.
   1878    */
   1879   while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
   1880     pos_state = s->dict.pos & s->lzma.pos_mask;
   1881 
   1882     if (!rc_bit(&s->rc, &s->lzma.is_match[
   1883         s->lzma.state][pos_state])) {
   1884       lzma_literal(s);
   1885     } else {
   1886       if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
   1887         lzma_rep_match(s, pos_state);
   1888       else
   1889         lzma_match(s, pos_state);
   1890 
   1891       if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
   1892         return 0;
   1893     }
   1894   }
   1895 
   1896   /*
   1897    * Having the range decoder always normalized when we are outside
   1898    * this function makes it easier to correctly handle end of the chunk.
   1899    */
   1900   rc_normalize(&s->rc);
   1901 
   1902   return 1;
   1903 }
   1904 
   1905 /*
   1906  * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
   1907  * here, because LZMA state may be reset without resetting the dictionary.
   1908  */
   1909 static void lzma_reset(struct xz_dec_lzma2 *s)
   1910 {
   1911   uint16_t *probs;
   1912   size_t i;
   1913 
   1914   s->lzma.state = STATE_LIT_LIT;
   1915   s->lzma.rep0 = 0;
   1916   s->lzma.rep1 = 0;
   1917   s->lzma.rep2 = 0;
   1918   s->lzma.rep3 = 0;
   1919 
   1920   /*
   1921    * All probabilities are initialized to the same value. This hack
   1922    * makes the code smaller by avoiding a separate loop for each
   1923    * probability array.
   1924    *
   1925    * This could be optimized so that only that part of literal
   1926    * probabilities that are actually required. In the common case
   1927    * we would write 12 KiB less.
   1928    */
   1929   probs = s->lzma.is_match[0];
   1930   for (i = 0; i < PROBS_TOTAL; ++i)
   1931     probs[i] = RC_BIT_MODEL_TOTAL / 2;
   1932 
   1933   rc_reset(&s->rc);
   1934 }
   1935 
   1936 /*
   1937  * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
   1938  * from the decoded lp and pb values. On success, the LZMA decoder state is
   1939  * reset and true is returned.
   1940  */
   1941 static int lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
   1942 {
   1943   if (props > (4 * 5 + 4) * 9 + 8)
   1944     return 0;
   1945 
   1946   s->lzma.pos_mask = 0;
   1947   while (props >= 9 * 5) {
   1948     props -= 9 * 5;
   1949     ++s->lzma.pos_mask;
   1950   }
   1951 
   1952   s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
   1953 
   1954   s->lzma.literal_pos_mask = 0;
   1955   while (props >= 9) {
   1956     props -= 9;
   1957     ++s->lzma.literal_pos_mask;
   1958   }
   1959 
   1960   s->lzma.lc = props;
   1961 
   1962   if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
   1963     return 0;
   1964 
   1965   s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
   1966 
   1967   lzma_reset(s);
   1968 
   1969   return 1;
   1970 }
   1971 
   1972 /*********
   1973  * LZMA2 *
   1974  *********/
   1975 
   1976 /*
   1977  * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
   1978  * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
   1979  * wrapper function takes care of making the LZMA decoder's assumption safe.
   1980  *
   1981  * As long as there is plenty of input left to be decoded in the current LZMA
   1982  * chunk, we decode directly from the caller-supplied input buffer until
   1983  * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
   1984  * s->temp.buf, which (hopefully) gets filled on the next call to this
   1985  * function. We decode a few bytes from the temporary buffer so that we can
   1986  * continue decoding from the caller-supplied input buffer again.
   1987  */
   1988 static int lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
   1989 {
   1990   size_t in_avail;
   1991   uint32_t tmp;
   1992 
   1993   in_avail = b->in_size - b->in_pos;
   1994   if (s->temp.size > 0 || s->lzma2.compressed == 0) {
   1995     tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
   1996     if (tmp > s->lzma2.compressed - s->temp.size)
   1997       tmp = s->lzma2.compressed - s->temp.size;
   1998     if (tmp > in_avail)
   1999       tmp = in_avail;
   2000 
   2001     memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
   2002 
   2003     if (s->temp.size + tmp == s->lzma2.compressed) {
   2004       memset(s->temp.buf + s->temp.size + tmp, 0,
   2005           sizeof(s->temp.buf)
   2006             - s->temp.size - tmp);
   2007       s->rc.in_limit = s->temp.size + tmp;
   2008     } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
   2009       s->temp.size += tmp;
   2010       b->in_pos += tmp;
   2011       return 1;
   2012     } else {
   2013       s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
   2014     }
   2015 
   2016     s->rc.in = s->temp.buf;
   2017     s->rc.in_pos = 0;
   2018 
   2019     if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
   2020       return 0;
   2021 
   2022     s->lzma2.compressed -= s->rc.in_pos;
   2023 
   2024     if (s->rc.in_pos < s->temp.size) {
   2025       s->temp.size -= s->rc.in_pos;
   2026       memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
   2027           s->temp.size);
   2028       return 1;
   2029     }
   2030 
   2031     b->in_pos += s->rc.in_pos - s->temp.size;
   2032     s->temp.size = 0;
   2033   }
   2034 
   2035   in_avail = b->in_size - b->in_pos;
   2036   if (in_avail >= LZMA_IN_REQUIRED) {
   2037     s->rc.in = b->in;
   2038     s->rc.in_pos = b->in_pos;
   2039 
   2040     if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
   2041       s->rc.in_limit = b->in_pos + s->lzma2.compressed;
   2042     else
   2043       s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
   2044 
   2045     if (!lzma_main(s))
   2046       return 0;
   2047 
   2048     in_avail = s->rc.in_pos - b->in_pos;
   2049     if (in_avail > s->lzma2.compressed) return 0;
   2050 
   2051     s->lzma2.compressed -= in_avail;
   2052     b->in_pos = s->rc.in_pos;
   2053   }
   2054 
   2055   in_avail = b->in_size - b->in_pos;
   2056   if (in_avail < LZMA_IN_REQUIRED) {
   2057     if (in_avail > s->lzma2.compressed)
   2058       in_avail = s->lzma2.compressed;
   2059 
   2060     memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
   2061     s->temp.size = in_avail;
   2062     b->in_pos += in_avail;
   2063   }
   2064 
   2065   return 1;
   2066 }
   2067 
   2068 /*
   2069  * Take care of the LZMA2 control layer, and forward the job of actual LZMA
   2070  * decoding or copying of uncompressed chunks to other functions.
   2071  */
   2072 enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
   2073                struct xz_buf *b)
   2074 {
   2075   uint32_t tmp;
   2076 
   2077   while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
   2078     switch (s->lzma2.sequence) {
   2079     case SEQ_CONTROL:
   2080       /*
   2081        * LZMA2 control byte
   2082        *
   2083        * Exact values:
   2084        *   0x00   End marker
   2085        *   0x01   Dictionary reset followed by
   2086        *          an uncompressed chunk
   2087        *   0x02   Uncompressed chunk (no dictionary reset)
   2088        *
   2089        * Highest three bits (s->control & 0xE0):
   2090        *   0xE0   Dictionary reset, new properties and state
   2091        *          reset, followed by LZMA compressed chunk
   2092        *   0xC0   New properties and state reset, followed
   2093        *          by LZMA compressed chunk (no dictionary
   2094        *          reset)
   2095        *   0xA0   State reset using old properties,
   2096        *          followed by LZMA compressed chunk (no
   2097        *          dictionary reset)
   2098        *   0x80   LZMA chunk (no dictionary or state reset)
   2099        *
   2100        * For LZMA compressed chunks, the lowest five bits
   2101        * (s->control & 1F) are the highest bits of the
   2102        * uncompressed size (bits 16-20).
   2103        *
   2104        * A new LZMA2 stream must begin with a dictionary
   2105        * reset. The first LZMA chunk must set new
   2106        * properties and reset the LZMA state.
   2107        *
   2108        * Values that don't match anything described above
   2109        * are invalid and we return XZ_DATA_ERROR.
   2110        */
   2111       tmp = b->in[b->in_pos++];
   2112 
   2113       if (tmp == 0x00)
   2114         return XZ_STREAM_END;
   2115 
   2116       if (tmp >= 0xE0 || tmp == 0x01) {
   2117         s->lzma2.need_props = 1;
   2118         s->lzma2.need_dict_reset = 0;
   2119         dict_reset(&s->dict);
   2120       } else if (s->lzma2.need_dict_reset) {
   2121         return XZ_DATA_ERROR;
   2122       }
   2123 
   2124       if (tmp >= 0x80) {
   2125         s->lzma2.uncompressed = (tmp & 0x1F) << 16;
   2126         s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
   2127 
   2128         if (tmp >= 0xC0) {
   2129           /*
   2130            * When there are new properties,
   2131            * state reset is done at
   2132            * SEQ_PROPERTIES.
   2133            */
   2134           s->lzma2.need_props = 0;
   2135           s->lzma2.next_sequence
   2136               = SEQ_PROPERTIES;
   2137 
   2138         } else if (s->lzma2.need_props) {
   2139           return XZ_DATA_ERROR;
   2140 
   2141         } else {
   2142           s->lzma2.next_sequence
   2143               = SEQ_LZMA_PREPARE;
   2144           if (tmp >= 0xA0)
   2145             lzma_reset(s);
   2146         }
   2147       } else {
   2148         if (tmp > 0x02)
   2149           return XZ_DATA_ERROR;
   2150 
   2151         s->lzma2.sequence = SEQ_COMPRESSED_0;
   2152         s->lzma2.next_sequence = SEQ_COPY;
   2153       }
   2154 
   2155       break;
   2156 
   2157     case SEQ_UNCOMPRESSED_1:
   2158       s->lzma2.uncompressed
   2159           += (uint32_t)b->in[b->in_pos++] << 8;
   2160       s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
   2161       break;
   2162 
   2163     case SEQ_UNCOMPRESSED_2:
   2164       s->lzma2.uncompressed
   2165           += (uint32_t)b->in[b->in_pos++] + 1;
   2166       s->lzma2.sequence = SEQ_COMPRESSED_0;
   2167       break;
   2168 
   2169     case SEQ_COMPRESSED_0:
   2170       s->lzma2.compressed
   2171           = (uint32_t)b->in[b->in_pos++] << 8;
   2172       s->lzma2.sequence = SEQ_COMPRESSED_1;
   2173       break;
   2174 
   2175     case SEQ_COMPRESSED_1:
   2176       s->lzma2.compressed
   2177           += (uint32_t)b->in[b->in_pos++] + 1;
   2178       s->lzma2.sequence = s->lzma2.next_sequence;
   2179       break;
   2180 
   2181     case SEQ_PROPERTIES:
   2182       if (!lzma_props(s, b->in[b->in_pos++]))
   2183         return XZ_DATA_ERROR;
   2184 
   2185       s->lzma2.sequence = SEQ_LZMA_PREPARE;
   2186 
   2187     case SEQ_LZMA_PREPARE:
   2188       if (s->lzma2.compressed < RC_INIT_BYTES)
   2189         return XZ_DATA_ERROR;
   2190 
   2191       if (!rc_read_init(&s->rc, b))
   2192         return XZ_OK;
   2193 
   2194       s->lzma2.compressed -= RC_INIT_BYTES;
   2195       s->lzma2.sequence = SEQ_LZMA_RUN;
   2196 
   2197     case SEQ_LZMA_RUN:
   2198       /*
   2199        * Set dictionary limit to indicate how much we want
   2200        * to be encoded at maximum. Decode new data into the
   2201        * dictionary. Flush the new data from dictionary to
   2202        * b->out. Check if we finished decoding this chunk.
   2203        * In case the dictionary got full but we didn't fill
   2204        * the output buffer yet, we may run this loop
   2205        * multiple times without changing s->lzma2.sequence.
   2206        */
   2207       dict_limit(&s->dict, minof(b->out_size - b->out_pos,
   2208           s->lzma2.uncompressed));
   2209       if (!lzma2_lzma(s, b))
   2210         return XZ_DATA_ERROR;
   2211 
   2212       s->lzma2.uncompressed -= dict_flush(&s->dict, b);
   2213 
   2214       if (s->lzma2.uncompressed == 0) {
   2215         if (s->lzma2.compressed > 0 || s->lzma.len > 0
   2216             || !rc_is_finished(&s->rc))
   2217           return XZ_DATA_ERROR;
   2218 
   2219         rc_reset(&s->rc);
   2220         s->lzma2.sequence = SEQ_CONTROL;
   2221 
   2222       } else if (b->out_pos == b->out_size
   2223           || (b->in_pos == b->in_size
   2224             && s->temp.size
   2225             < s->lzma2.compressed)) {
   2226         return XZ_OK;
   2227       }
   2228 
   2229       break;
   2230 
   2231     case SEQ_COPY:
   2232       dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
   2233       if (s->lzma2.compressed > 0)
   2234         return XZ_OK;
   2235 
   2236       s->lzma2.sequence = SEQ_CONTROL;
   2237       break;
   2238     }
   2239   }
   2240 
   2241   return XZ_OK;
   2242 }
   2243 
   2244 struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max)
   2245 {
   2246   struct xz_dec_lzma2 *s = malloc(sizeof(*s));
   2247   if (s == NULL)
   2248     return NULL;
   2249 
   2250   s->dict.size_max = dict_max;
   2251   s->dict.buf = NULL;
   2252   s->dict.allocated = 0;
   2253 
   2254   return s;
   2255 }
   2256 
   2257 enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
   2258 {
   2259   /* This limits dictionary size to 3 GiB to keep parsing simpler. */
   2260   if (props > 39)
   2261     return XZ_OPTIONS_ERROR;
   2262 
   2263   s->dict.size = 2 + (props & 1);
   2264   s->dict.size <<= (props >> 1) + 11;
   2265 
   2266   if (s->dict.size > s->dict.size_max)
   2267     return XZ_MEMLIMIT_ERROR;
   2268 
   2269   s->dict.end = s->dict.size;
   2270 
   2271   if (s->dict.allocated < s->dict.size) {
   2272     free(s->dict.buf);
   2273     s->dict.buf = malloc(s->dict.size);
   2274     if (s->dict.buf == NULL) {
   2275       s->dict.allocated = 0;
   2276       return XZ_MEM_ERROR;
   2277     }
   2278   }
   2279 
   2280   s->lzma.len = 0;
   2281 
   2282   s->lzma2.sequence = SEQ_CONTROL;
   2283   s->lzma2.need_dict_reset = 1;
   2284 
   2285   s->temp.size = 0;
   2286 
   2287   return XZ_OK;
   2288 }
   2289 
   2290 /*
   2291  * .xz Stream decoder
   2292  */
   2293 
   2294 
   2295 // BEGIN xz_stream.h
   2296 /*
   2297  * Definitions for handling the .xz file format
   2298  */
   2299 
   2300 /*
   2301  * See the .xz file format specification at
   2302  * http://tukaani.org/xz/xz-file-format.txt
   2303  * to understand the container format.
   2304  */
   2305 
   2306 #define STREAM_HEADER_SIZE 12
   2307 
   2308 #define HEADER_MAGIC "\3757zXZ"
   2309 #define HEADER_MAGIC_SIZE 6
   2310 
   2311 #define FOOTER_MAGIC "YZ"
   2312 #define FOOTER_MAGIC_SIZE 2
   2313 
   2314 /*
   2315  * Variable-length integer can hold a 63-bit unsigned integer or a special
   2316  * value indicating that the value is unknown.
   2317  *
   2318  * Experimental: vli_type can be defined to uint32_t to save a few bytes
   2319  * in code size (no effect on speed). Doing so limits the uncompressed and
   2320  * compressed size of the file to less than 256 MiB and may also weaken
   2321  * error detection slightly.
   2322  */
   2323 typedef uint64_t vli_type;
   2324 
   2325 #define VLI_MAX ((vli_type)-1 / 2)
   2326 #define VLI_UNKNOWN ((vli_type)-1)
   2327 
   2328 /* Maximum encoded size of a VLI */
   2329 #define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
   2330 
   2331 /* Integrity Check types */
   2332 enum xz_check {
   2333   XZ_CHECK_NONE = 0,
   2334   XZ_CHECK_CRC32 = 1,
   2335   XZ_CHECK_CRC64 = 4,
   2336   XZ_CHECK_SHA256 = 10
   2337 };
   2338 
   2339 /* Maximum possible Check ID */
   2340 #define XZ_CHECK_MAX 15
   2341 // END xz_stream.h
   2342 
   2343 #define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64)
   2344 
   2345 /* Hash used to validate the Index field */
   2346 struct xz_dec_hash {
   2347   vli_type unpadded;
   2348   vli_type uncompressed;
   2349   uint32_t crc32;
   2350 };
   2351 
   2352 struct xz_dec {
   2353   /* Position in dec_main() */
   2354   enum {
   2355     SEQ_STREAM_HEADER,
   2356     SEQ_BLOCK_START,
   2357     SEQ_BLOCK_HEADER,
   2358     SEQ_BLOCK_UNCOMPRESS,
   2359     SEQ_BLOCK_PADDING,
   2360     SEQ_BLOCK_CHECK,
   2361     SEQ_INDEX,
   2362     SEQ_INDEX_PADDING,
   2363     SEQ_INDEX_CRC32,
   2364     SEQ_STREAM_FOOTER
   2365   } sequence;
   2366 
   2367   /* Position in variable-length integers and Check fields */
   2368   uint32_t pos;
   2369 
   2370   /* Variable-length integer decoded by dec_vli() */
   2371   vli_type vli;
   2372 
   2373   /* Saved in_pos and out_pos */
   2374   size_t in_start;
   2375   size_t out_start;
   2376 
   2377   /* CRC32 or CRC64 value in Block or CRC32 value in Index */
   2378   uint64_t crc;
   2379 
   2380   /* Type of the integrity check calculated from uncompressed data */
   2381   enum xz_check check_type;
   2382 
   2383   /*
   2384    * True if the next call to xz_dec_run() is allowed to return
   2385    * XZ_BUF_ERROR.
   2386    */
   2387   int allow_buf_error;
   2388 
   2389   /* Information stored in Block Header */
   2390   struct {
   2391     /*
   2392      * Value stored in the Compressed Size field, or
   2393      * VLI_UNKNOWN if Compressed Size is not present.
   2394      */
   2395     vli_type compressed;
   2396 
   2397     /*
   2398      * Value stored in the Uncompressed Size field, or
   2399      * VLI_UNKNOWN if Uncompressed Size is not present.
   2400      */
   2401     vli_type uncompressed;
   2402 
   2403     /* Size of the Block Header field */
   2404     uint32_t size;
   2405   } block_header;
   2406 
   2407   /* Information collected when decoding Blocks */
   2408   struct {
   2409     /* Observed compressed size of the current Block */
   2410     vli_type compressed;
   2411 
   2412     /* Observed uncompressed size of the current Block */
   2413     vli_type uncompressed;
   2414 
   2415     /* Number of Blocks decoded so far */
   2416     vli_type count;
   2417 
   2418     /*
   2419      * Hash calculated from the Block sizes. This is used to
   2420      * validate the Index field.
   2421      */
   2422     struct xz_dec_hash hash;
   2423   } block;
   2424 
   2425   /* Variables needed when verifying the Index field */
   2426   struct {
   2427     /* Position in dec_index() */
   2428     enum {
   2429       SEQ_INDEX_COUNT,
   2430       SEQ_INDEX_UNPADDED,
   2431       SEQ_INDEX_UNCOMPRESSED
   2432     } sequence;
   2433 
   2434     /* Size of the Index in bytes */
   2435     vli_type size;
   2436 
   2437     /* Number of Records (matches block.count in valid files) */
   2438     vli_type count;
   2439 
   2440     /*
   2441      * Hash calculated from the Records (matches block.hash in
   2442      * valid files).
   2443      */
   2444     struct xz_dec_hash hash;
   2445   } index;
   2446 
   2447   /*
   2448    * Temporary buffer needed to hold Stream Header, Block Header,
   2449    * and Stream Footer. The Block Header is the biggest (1 KiB)
   2450    * so we reserve space according to that. buf[] has to be aligned
   2451    * to a multiple of four bytes; the size_t variables before it
   2452    * should guarantee this.
   2453    */
   2454   struct {
   2455     size_t pos;
   2456     size_t size;
   2457     uint8_t buf[1024];
   2458   } temp;
   2459 
   2460   struct xz_dec_lzma2 *lzma2;
   2461 
   2462 #ifdef XZ_DEC_BCJ
   2463   struct xz_dec_bcj *bcj;
   2464   int bcj_active;
   2465 #endif
   2466 };
   2467 
   2468 /* Sizes of the Check field with different Check IDs */
   2469 static const uint8_t check_sizes[16] = {
   2470   0,
   2471   4, 4, 4,
   2472   8, 8, 8,
   2473   16, 16, 16,
   2474   32, 32, 32,
   2475   64, 64, 64
   2476 };
   2477 
   2478 /*
   2479  * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
   2480  * must have set s->temp.pos to indicate how much data we are supposed
   2481  * to copy into s->temp.buf. Return true once s->temp.pos has reached
   2482  * s->temp.size.
   2483  */
   2484 static int fill_temp(struct xz_dec *s, struct xz_buf *b)
   2485 {
   2486   size_t copy_size = minof(b->in_size - b->in_pos, s->temp.size - s->temp.pos);
   2487 
   2488   memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
   2489   b->in_pos += copy_size;
   2490   s->temp.pos += copy_size;
   2491 
   2492   if (s->temp.pos == s->temp.size) {
   2493     s->temp.pos = 0;
   2494     return 1;
   2495   }
   2496 
   2497   return 0;
   2498 }
   2499 
   2500 /* Decode a variable-length integer (little-endian base-128 encoding) */
   2501 static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
   2502          size_t *in_pos, size_t in_size)
   2503 {
   2504   uint8_t byte;
   2505 
   2506   if (s->pos == 0)
   2507     s->vli = 0;
   2508 
   2509   while (*in_pos < in_size) {
   2510     byte = in[*in_pos];
   2511     ++*in_pos;
   2512 
   2513     s->vli |= (vli_type)(byte & 0x7F) << s->pos;
   2514 
   2515     if ((byte & 0x80) == 0) {
   2516       /* Don't allow non-minimal encodings. */
   2517       if (byte == 0 && s->pos != 0)
   2518         return XZ_DATA_ERROR;
   2519 
   2520       s->pos = 0;
   2521       return XZ_STREAM_END;
   2522     }
   2523 
   2524     s->pos += 7;
   2525     if (s->pos == 7 * VLI_BYTES_MAX)
   2526       return XZ_DATA_ERROR;
   2527   }
   2528 
   2529   return XZ_OK;
   2530 }
   2531 
   2532 /*
   2533  * Decode the Compressed Data field from a Block. Update and validate
   2534  * the observed compressed and uncompressed sizes of the Block so that
   2535  * they don't exceed the values possibly stored in the Block Header
   2536  * (validation assumes that no integer overflow occurs, since vli_type
   2537  * is normally uint64_t). Update the CRC32 or CRC64 value if presence of
   2538  * the CRC32 or CRC64 field was indicated in Stream Header.
   2539  *
   2540  * Once the decoding is finished, validate that the observed sizes match
   2541  * the sizes possibly stored in the Block Header. Update the hash and
   2542  * Block count, which are later used to validate the Index field.
   2543  */
   2544 static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
   2545 {
   2546   enum xz_ret ret;
   2547 
   2548   s->in_start = b->in_pos;
   2549   s->out_start = b->out_pos;
   2550 
   2551 #ifdef XZ_DEC_BCJ
   2552   if (s->bcj_active)
   2553     ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
   2554   else
   2555 #endif
   2556     ret = xz_dec_lzma2_run(s->lzma2, b);
   2557 
   2558   s->block.compressed += b->in_pos - s->in_start;
   2559   s->block.uncompressed += b->out_pos - s->out_start;
   2560 
   2561   /*
   2562    * There is no need to separately check for VLI_UNKNOWN, since
   2563    * the observed sizes are always smaller than VLI_UNKNOWN.
   2564    */
   2565   if (s->block.compressed > s->block_header.compressed
   2566       || s->block.uncompressed
   2567         > s->block_header.uncompressed)
   2568     return XZ_DATA_ERROR;
   2569 
   2570   if (s->check_type == XZ_CHECK_CRC32)
   2571     s->crc = xz_crc32(b->out + s->out_start,
   2572         b->out_pos - s->out_start, s->crc);
   2573   else if (s->check_type == XZ_CHECK_CRC64)
   2574     s->crc = ~(s->crc);
   2575     size_t size = b->out_pos - s->out_start;
   2576     uint8_t *buf = b->out + s->out_start;
   2577     while (size) {
   2578       s->crc = xz_crc64_table[*buf++ ^ (s->crc & 0xFF)] ^ (s->crc >> 8);
   2579       --size;
   2580     }
   2581     s->crc=~(s->crc);
   2582 
   2583   if (ret == XZ_STREAM_END) {
   2584     if (s->block_header.compressed != VLI_UNKNOWN
   2585         && s->block_header.compressed
   2586           != s->block.compressed)
   2587       return XZ_DATA_ERROR;
   2588 
   2589     if (s->block_header.uncompressed != VLI_UNKNOWN
   2590         && s->block_header.uncompressed
   2591           != s->block.uncompressed)
   2592       return XZ_DATA_ERROR;
   2593 
   2594     s->block.hash.unpadded += s->block_header.size
   2595         + s->block.compressed;
   2596 
   2597     s->block.hash.unpadded += check_sizes[s->check_type];
   2598 
   2599     s->block.hash.uncompressed += s->block.uncompressed;
   2600     s->block.hash.crc32 = xz_crc32(
   2601         (const uint8_t *)&s->block.hash,
   2602         sizeof(s->block.hash), s->block.hash.crc32);
   2603 
   2604     ++s->block.count;
   2605   }
   2606 
   2607   return ret;
   2608 }
   2609 
   2610 /* Update the Index size and the CRC32 value. */
   2611 static void index_update(struct xz_dec *s, const struct xz_buf *b)
   2612 {
   2613   size_t in_used = b->in_pos - s->in_start;
   2614   s->index.size += in_used;
   2615   s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc);
   2616 }
   2617 
   2618 /*
   2619  * Decode the Number of Records, Unpadded Size, and Uncompressed Size
   2620  * fields from the Index field. That is, Index Padding and CRC32 are not
   2621  * decoded by this function.
   2622  *
   2623  * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
   2624  * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
   2625  */
   2626 static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
   2627 {
   2628   enum xz_ret ret;
   2629 
   2630   do {
   2631     ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
   2632     if (ret != XZ_STREAM_END) {
   2633       index_update(s, b);
   2634       return ret;
   2635     }
   2636 
   2637     switch (s->index.sequence) {
   2638     case SEQ_INDEX_COUNT:
   2639       s->index.count = s->vli;
   2640 
   2641       /*
   2642        * Validate that the Number of Records field
   2643        * indicates the same number of Records as
   2644        * there were Blocks in the Stream.
   2645        */
   2646       if (s->index.count != s->block.count)
   2647         return XZ_DATA_ERROR;
   2648 
   2649       s->index.sequence = SEQ_INDEX_UNPADDED;
   2650       break;
   2651 
   2652     case SEQ_INDEX_UNPADDED:
   2653       s->index.hash.unpadded += s->vli;
   2654       s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
   2655       break;
   2656 
   2657     case SEQ_INDEX_UNCOMPRESSED:
   2658       s->index.hash.uncompressed += s->vli;
   2659       s->index.hash.crc32 = xz_crc32(
   2660           (const uint8_t *)&s->index.hash,
   2661           sizeof(s->index.hash),
   2662           s->index.hash.crc32);
   2663       --s->index.count;
   2664       s->index.sequence = SEQ_INDEX_UNPADDED;
   2665       break;
   2666     }
   2667   } while (s->index.count > 0);
   2668 
   2669   return XZ_STREAM_END;
   2670 }
   2671 
   2672 /*
   2673  * Validate that the next four or eight input bytes match the value
   2674  * of s->crc. s->pos must be zero when starting to validate the first byte.
   2675  * The "bits" argument allows using the same code for both CRC32 and CRC64.
   2676  */
   2677 static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b,
   2678         uint32_t bits)
   2679 {
   2680   do {
   2681     if (b->in_pos == b->in_size)
   2682       return XZ_OK;
   2683 
   2684     if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++])
   2685       return XZ_DATA_ERROR;
   2686 
   2687     s->pos += 8;
   2688 
   2689   } while (s->pos < bits);
   2690 
   2691   s->crc = 0;
   2692   s->pos = 0;
   2693 
   2694   return XZ_STREAM_END;
   2695 }
   2696 
   2697 /*
   2698  * Skip over the Check field when the Check ID is not supported.
   2699  * Returns true once the whole Check field has been skipped over.
   2700  */
   2701 static int check_skip(struct xz_dec *s, struct xz_buf *b)
   2702 {
   2703   while (s->pos < check_sizes[s->check_type]) {
   2704     if (b->in_pos == b->in_size) return 0;
   2705 
   2706     ++b->in_pos;
   2707     ++s->pos;
   2708   }
   2709 
   2710   s->pos = 0;
   2711 
   2712   return 1;
   2713 }
   2714 
   2715 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
   2716 static enum xz_ret dec_stream_header(struct xz_dec *s)
   2717 {
   2718   if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
   2719     return XZ_FORMAT_ERROR;
   2720 
   2721   if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
   2722       != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
   2723     return XZ_DATA_ERROR;
   2724 
   2725   if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
   2726     return XZ_OPTIONS_ERROR;
   2727 
   2728   /*
   2729    * Of integrity checks, we support none (Check ID = 0),
   2730    * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
   2731    * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
   2732    * check types too, but then the check won't be verified and
   2733    * a warning (XZ_UNSUPPORTED_CHECK) will be given.
   2734    */
   2735   s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
   2736 
   2737   if (s->check_type > XZ_CHECK_MAX)
   2738     return XZ_OPTIONS_ERROR;
   2739 
   2740   if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
   2741     return XZ_UNSUPPORTED_CHECK;
   2742 
   2743   return XZ_OK;
   2744 }
   2745 
   2746 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
   2747 static enum xz_ret dec_stream_footer(struct xz_dec *s)
   2748 {
   2749   if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
   2750     return XZ_DATA_ERROR;
   2751 
   2752   if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
   2753     return XZ_DATA_ERROR;
   2754 
   2755   /*
   2756    * Validate Backward Size. Note that we never added the size of the
   2757    * Index CRC32 field to s->index.size, thus we use s->index.size / 4
   2758    * instead of s->index.size / 4 - 1.
   2759    */
   2760   if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
   2761     return XZ_DATA_ERROR;
   2762 
   2763   if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
   2764     return XZ_DATA_ERROR;
   2765 
   2766   /*
   2767    * Use XZ_STREAM_END instead of XZ_OK to be more convenient
   2768    * for the caller.
   2769    */
   2770   return XZ_STREAM_END;
   2771 }
   2772 
   2773 /* Decode the Block Header and initialize the filter chain. */
   2774 static enum xz_ret dec_block_header(struct xz_dec *s)
   2775 {
   2776   enum xz_ret ret;
   2777 
   2778   /*
   2779    * Validate the CRC32. We know that the temp buffer is at least
   2780    * eight bytes so this is safe.
   2781    */
   2782   s->temp.size -= 4;
   2783   if (xz_crc32(s->temp.buf, s->temp.size, 0)
   2784       != get_le32(s->temp.buf + s->temp.size))
   2785     return XZ_DATA_ERROR;
   2786 
   2787   s->temp.pos = 2;
   2788 
   2789   /*
   2790    * Catch unsupported Block Flags. We support only one or two filters
   2791    * in the chain, so we catch that with the same test.
   2792    */
   2793 #ifdef XZ_DEC_BCJ
   2794   if (s->temp.buf[1] & 0x3E)
   2795 #else
   2796   if (s->temp.buf[1] & 0x3F)
   2797 #endif
   2798     return XZ_OPTIONS_ERROR;
   2799 
   2800   /* Compressed Size */
   2801   if (s->temp.buf[1] & 0x40) {
   2802     if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
   2803           != XZ_STREAM_END)
   2804       return XZ_DATA_ERROR;
   2805 
   2806     s->block_header.compressed = s->vli;
   2807   } else {
   2808     s->block_header.compressed = VLI_UNKNOWN;
   2809   }
   2810 
   2811   /* Uncompressed Size */
   2812   if (s->temp.buf[1] & 0x80) {
   2813     if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
   2814         != XZ_STREAM_END)
   2815       return XZ_DATA_ERROR;
   2816 
   2817     s->block_header.uncompressed = s->vli;
   2818   } else {
   2819     s->block_header.uncompressed = VLI_UNKNOWN;
   2820   }
   2821 
   2822 #ifdef XZ_DEC_BCJ
   2823   /* If there are two filters, the first one must be a BCJ filter. */
   2824   s->bcj_active = s->temp.buf[1] & 0x01;
   2825   if (s->bcj_active) {
   2826     if (s->temp.size - s->temp.pos < 2)
   2827       return XZ_OPTIONS_ERROR;
   2828 
   2829     ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
   2830     if (ret != XZ_OK)
   2831       return ret;
   2832 
   2833     /*
   2834      * We don't support custom start offset,
   2835      * so Size of Properties must be zero.
   2836      */
   2837     if (s->temp.buf[s->temp.pos++] != 0x00)
   2838       return XZ_OPTIONS_ERROR;
   2839   }
   2840 #endif
   2841 
   2842   /* Valid Filter Flags always take at least two bytes. */
   2843   if (s->temp.size - s->temp.pos < 2)
   2844     return XZ_DATA_ERROR;
   2845 
   2846   /* Filter ID = LZMA2 */
   2847   if (s->temp.buf[s->temp.pos++] != 0x21)
   2848     return XZ_OPTIONS_ERROR;
   2849 
   2850   /* Size of Properties = 1-byte Filter Properties */
   2851   if (s->temp.buf[s->temp.pos++] != 0x01)
   2852     return XZ_OPTIONS_ERROR;
   2853 
   2854   /* Filter Properties contains LZMA2 dictionary size. */
   2855   if (s->temp.size - s->temp.pos < 1)
   2856     return XZ_DATA_ERROR;
   2857 
   2858   ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
   2859   if (ret != XZ_OK)
   2860     return ret;
   2861 
   2862   /* The rest must be Header Padding. */
   2863   while (s->temp.pos < s->temp.size)
   2864     if (s->temp.buf[s->temp.pos++] != 0x00)
   2865       return XZ_OPTIONS_ERROR;
   2866 
   2867   s->temp.pos = 0;
   2868   s->block.compressed = 0;
   2869   s->block.uncompressed = 0;
   2870 
   2871   return XZ_OK;
   2872 }
   2873 
   2874 static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
   2875 {
   2876   enum xz_ret ret;
   2877 
   2878   /*
   2879    * Store the start position for the case when we are in the middle
   2880    * of the Index field.
   2881    */
   2882   s->in_start = b->in_pos;
   2883 
   2884   for (;;) {
   2885     switch (s->sequence) {
   2886     case SEQ_STREAM_HEADER:
   2887       /*
   2888        * Stream Header is copied to s->temp, and then
   2889        * decoded from there. This way if the caller
   2890        * gives us only little input at a time, we can
   2891        * still keep the Stream Header decoding code
   2892        * simple. Similar approach is used in many places
   2893        * in this file.
   2894        */
   2895       if (!fill_temp(s, b))
   2896         return XZ_OK;
   2897 
   2898       /*
   2899        * If dec_stream_header() returns
   2900        * XZ_UNSUPPORTED_CHECK, it is still possible
   2901        * to continue decoding if working in multi-call
   2902        * mode. Thus, update s->sequence before calling
   2903        * dec_stream_header().
   2904        */
   2905       s->sequence = SEQ_BLOCK_START;
   2906 
   2907       ret = dec_stream_header(s);
   2908       if (ret != XZ_OK)
   2909         return ret;
   2910 
   2911     case SEQ_BLOCK_START:
   2912       /* We need one byte of input to continue. */
   2913       if (b->in_pos == b->in_size)
   2914         return XZ_OK;
   2915 
   2916       /* See if this is the beginning of the Index field. */
   2917       if (b->in[b->in_pos] == 0) {
   2918         s->in_start = b->in_pos++;
   2919         s->sequence = SEQ_INDEX;
   2920         break;
   2921       }
   2922 
   2923       /*
   2924        * Calculate the size of the Block Header and
   2925        * prepare to decode it.
   2926        */
   2927       s->block_header.size
   2928         = ((uint32_t)b->in[b->in_pos] + 1) * 4;
   2929 
   2930       s->temp.size = s->block_header.size;
   2931       s->temp.pos = 0;
   2932       s->sequence = SEQ_BLOCK_HEADER;
   2933 
   2934     case SEQ_BLOCK_HEADER:
   2935       if (!fill_temp(s, b))
   2936         return XZ_OK;
   2937 
   2938       ret = dec_block_header(s);
   2939       if (ret != XZ_OK)
   2940         return ret;
   2941 
   2942       s->sequence = SEQ_BLOCK_UNCOMPRESS;
   2943 
   2944     case SEQ_BLOCK_UNCOMPRESS:
   2945       ret = dec_block(s, b);
   2946       if (ret != XZ_STREAM_END)
   2947         return ret;
   2948 
   2949       s->sequence = SEQ_BLOCK_PADDING;
   2950 
   2951     case SEQ_BLOCK_PADDING:
   2952       /*
   2953        * Size of Compressed Data + Block Padding
   2954        * must be a multiple of four. We don't need
   2955        * s->block.compressed for anything else
   2956        * anymore, so we use it here to test the size
   2957        * of the Block Padding field.
   2958        */
   2959       while (s->block.compressed & 3) {
   2960         if (b->in_pos == b->in_size)
   2961           return XZ_OK;
   2962 
   2963         if (b->in[b->in_pos++] != 0)
   2964           return XZ_DATA_ERROR;
   2965 
   2966         ++s->block.compressed;
   2967       }
   2968 
   2969       s->sequence = SEQ_BLOCK_CHECK;
   2970 
   2971     case SEQ_BLOCK_CHECK:
   2972       if (s->check_type == XZ_CHECK_CRC32) {
   2973         ret = crc_validate(s, b, 32);
   2974         if (ret != XZ_STREAM_END)
   2975           return ret;
   2976       }
   2977       else if (IS_CRC64(s->check_type)) {
   2978         ret = crc_validate(s, b, 64);
   2979         if (ret != XZ_STREAM_END)
   2980           return ret;
   2981       }
   2982       else if (!check_skip(s, b)) {
   2983         return XZ_OK;
   2984       }
   2985 
   2986       s->sequence = SEQ_BLOCK_START;
   2987       break;
   2988 
   2989     case SEQ_INDEX:
   2990       ret = dec_index(s, b);
   2991       if (ret != XZ_STREAM_END)
   2992         return ret;
   2993 
   2994       s->sequence = SEQ_INDEX_PADDING;
   2995 
   2996     case SEQ_INDEX_PADDING:
   2997       while ((s->index.size + (b->in_pos - s->in_start))
   2998           & 3) {
   2999         if (b->in_pos == b->in_size) {
   3000           index_update(s, b);
   3001           return XZ_OK;
   3002         }
   3003 
   3004         if (b->in[b->in_pos++] != 0)
   3005           return XZ_DATA_ERROR;
   3006       }
   3007 
   3008       /* Finish the CRC32 value and Index size. */
   3009       index_update(s, b);
   3010 
   3011       /* Compare the hashes to validate the Index field. */
   3012       if (!memeq(&s->block.hash, &s->index.hash,
   3013           sizeof(s->block.hash)))
   3014         return XZ_DATA_ERROR;
   3015 
   3016       s->sequence = SEQ_INDEX_CRC32;
   3017 
   3018     case SEQ_INDEX_CRC32:
   3019       ret = crc_validate(s, b, 32);
   3020       if (ret != XZ_STREAM_END)
   3021         return ret;
   3022 
   3023       s->temp.size = STREAM_HEADER_SIZE;
   3024       s->sequence = SEQ_STREAM_FOOTER;
   3025 
   3026     case SEQ_STREAM_FOOTER:
   3027       if (!fill_temp(s, b))
   3028         return XZ_OK;
   3029 
   3030       return dec_stream_footer(s);
   3031     }
   3032   }
   3033 
   3034   /* Never reached */
   3035 }
   3036 
   3037 /*
   3038  * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
   3039  * multi-call and single-call decoding.
   3040  *
   3041  * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
   3042  * are not going to make any progress anymore. This is to prevent the caller
   3043  * from calling us infinitely when the input file is truncated or otherwise
   3044  * corrupt. Since zlib-style API allows that the caller fills the input buffer
   3045  * only when the decoder doesn't produce any new output, we have to be careful
   3046  * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
   3047  * after the second consecutive call to xz_dec_run() that makes no progress.
   3048  *
   3049  * In single-call mode, if we couldn't decode everything and no error
   3050  * occurred, either the input is truncated or the output buffer is too small.
   3051  * Since we know that the last input byte never produces any output, we know
   3052  * that if all the input was consumed and decoding wasn't finished, the file
   3053  * must be corrupt. Otherwise the output buffer has to be too small or the
   3054  * file is corrupt in a way that decoding it produces too big output.
   3055  *
   3056  * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
   3057  * their original values. This is because with some filter chains there won't
   3058  * be any valid uncompressed data in the output buffer unless the decoding
   3059  * actually succeeds (that's the price to pay of using the output buffer as
   3060  * the workspace).
   3061  */
   3062 enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
   3063 {
   3064   size_t in_start;
   3065   size_t out_start;
   3066   enum xz_ret ret;
   3067 
   3068   in_start = b->in_pos;
   3069   out_start = b->out_pos;
   3070   ret = dec_main(s, b);
   3071 
   3072   if (ret == XZ_OK && in_start == b->in_pos && out_start == b->out_pos) {
   3073     if (s->allow_buf_error)
   3074       ret = XZ_BUF_ERROR;
   3075 
   3076     s->allow_buf_error = 1;
   3077   } else {
   3078     s->allow_buf_error = 0;
   3079   }
   3080 
   3081   return ret;
   3082 }
   3083 
   3084 struct xz_dec *xz_dec_init(uint32_t dict_max)
   3085 {
   3086   struct xz_dec *s = malloc(sizeof(*s));
   3087   if (!s)
   3088     return NULL;
   3089 
   3090 #ifdef XZ_DEC_BCJ
   3091   s->bcj = malloc(sizeof(*s->bcj));
   3092   if (!s->bcj)
   3093     goto error_bcj;
   3094 #endif
   3095 
   3096   s->lzma2 = xz_dec_lzma2_create(dict_max);
   3097   if (s->lzma2 == NULL)
   3098     goto error_lzma2;
   3099 
   3100   xz_dec_reset(s);
   3101   return s;
   3102 
   3103 error_lzma2:
   3104 #ifdef XZ_DEC_BCJ
   3105   free(s->bcj);
   3106 error_bcj:
   3107 #endif
   3108   free(s);
   3109   return NULL;
   3110 }
   3111 
   3112 void xz_dec_reset(struct xz_dec *s)
   3113 {
   3114   s->sequence = SEQ_STREAM_HEADER;
   3115   s->allow_buf_error = 0;
   3116   s->pos = 0;
   3117   s->crc = 0;
   3118   memset(&s->block, 0, sizeof(s->block));
   3119   memset(&s->index, 0, sizeof(s->index));
   3120   s->temp.pos = 0;
   3121   s->temp.size = STREAM_HEADER_SIZE;
   3122 }
   3123 
   3124 void xz_dec_end(struct xz_dec *s)
   3125 {
   3126   if (s != NULL) {
   3127     free((s->lzma2)->dict.buf);
   3128     free(s->lzma2);
   3129 
   3130 #ifdef XZ_DEC_BCJ
   3131     free(s->bcj);
   3132 #endif
   3133     free(s);
   3134   }
   3135 }
   3136