Home | History | Annotate | Download | only in xz
      1 /*
      2  * LZMAInputStream
      3  *
      4  * Authors: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *          Igor Pavlov <http://7-zip.org/>
      6  *
      7  * This file has been put into the public domain.
      8  * You can do whatever you want with this file.
      9  */
     10 
     11 package org.tukaani.xz;
     12 
     13 import java.io.InputStream;
     14 import java.io.DataInputStream;
     15 import java.io.IOException;
     16 import org.tukaani.xz.lz.LZDecoder;
     17 import org.tukaani.xz.rangecoder.RangeDecoderFromStream;
     18 import org.tukaani.xz.lzma.LZMADecoder;
     19 
     20 /**
     21  * Decompresses legacy .lzma files and raw LZMA streams (no .lzma header).
     22  * <p>
     23  * <b>IMPORTANT:</b> In contrast to other classes in this package, this class
     24  * reads data from its input stream one byte at a time. If the input stream
     25  * is for example {@link java.io.FileInputStream}, wrapping it into
     26  * {@link java.io.BufferedInputStream} tends to improve performance a lot.
     27  * This is not automatically done by this class because there may be use
     28  * cases where it is desired that this class won't read any bytes past
     29  * the end of the LZMA stream.
     30  * <p>
     31  * Even when using <code>BufferedInputStream</code>, the performance tends
     32  * to be worse (maybe 10-20&nbsp;% slower) than with {@link LZMA2InputStream}
     33  * or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data).
     34  *
     35  * @since 1.4
     36  */
     37 public class LZMAInputStream extends InputStream {
     38     /**
     39      * Largest dictionary size supported by this implementation.
     40      * <p>
     41      * LZMA allows dictionaries up to one byte less than 4 GiB. This
     42      * implementation supports only 16 bytes less than 2 GiB. This
     43      * limitation is due to Java using signed 32-bit integers for array
     44      * indexing. The limitation shouldn't matter much in practice since so
     45      * huge dictionaries are not normally used.
     46      */
     47     public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
     48 
     49     private InputStream in;
     50     private ArrayCache arrayCache;
     51     private LZDecoder lz;
     52     private RangeDecoderFromStream rc;
     53     private LZMADecoder lzma;
     54 
     55     private boolean endReached = false;
     56 
     57     private final byte[] tempBuf = new byte[1];
     58 
     59     /**
     60      * Number of uncompressed bytes left to be decompressed, or -1 if
     61      * the end marker is used.
     62      */
     63     private long remainingSize;
     64 
     65     private IOException exception = null;
     66 
     67     /**
     68      * Gets approximate decompressor memory requirements as kibibytes for
     69      * the given dictionary size and LZMA properties byte (lc, lp, and pb).
     70      *
     71      * @param       dictSize    LZMA dictionary size as bytes, should be
     72      *                          in the range [<code>0</code>,
     73      *                          <code>DICT_SIZE_MAX</code>]
     74      *
     75      * @param       propsByte   LZMA properties byte that encodes the values
     76      *                          of lc, lp, and pb
     77      *
     78      * @return      approximate memory requirements as kibibytes (KiB)
     79      *
     80      * @throws      UnsupportedOptionsException
     81      *                          if <code>dictSize</code> is outside
     82      *                          the range [<code>0</code>,
     83      *                          <code>DICT_SIZE_MAX</code>]
     84      *
     85      * @throws      CorruptedInputException
     86      *                          if <code>propsByte</code> is invalid
     87      */
     88     public static int getMemoryUsage(int dictSize, byte propsByte)
     89             throws UnsupportedOptionsException, CorruptedInputException {
     90         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
     91             throw new UnsupportedOptionsException(
     92                     "LZMA dictionary is too big for this implementation");
     93 
     94         int props = propsByte & 0xFF;
     95         if (props > (4 * 5 + 4) * 9 + 8)
     96             throw new CorruptedInputException("Invalid LZMA properties byte");
     97 
     98         props %= 9 * 5;
     99         int lp = props / 9;
    100         int lc = props - lp * 9;
    101 
    102         return getMemoryUsage(dictSize, lc, lp);
    103     }
    104 
    105     /**
    106      * Gets approximate decompressor memory requirements as kibibytes for
    107      * the given dictionary size, lc, and lp. Note that pb isn't needed.
    108      *
    109      * @param       dictSize    LZMA dictionary size as bytes, must be
    110      *                          in the range [<code>0</code>,
    111      *                          <code>DICT_SIZE_MAX</code>]
    112      *
    113      * @param       lc          number of literal context bits, must be
    114      *                          in the range [0, 8]
    115      *
    116      * @param       lp          number of literal position bits, must be
    117      *                          in the range [0, 4]
    118      *
    119      * @return      approximate memory requirements as kibibytes (KiB)
    120      */
    121     public static int getMemoryUsage(int dictSize, int lc, int lp) {
    122         if (lc < 0 || lc > 8 || lp < 0 || lp > 4)
    123             throw new IllegalArgumentException("Invalid lc or lp");
    124 
    125         // Probability variables have the type "short". There are
    126         // 0x300 (768) probability variables in each literal subcoder.
    127         // The number of literal subcoders is 2^(lc + lp).
    128         //
    129         // Roughly 10 KiB for the base state + LZ decoder's dictionary buffer
    130         // + sizeof(short) * number probability variables per literal subcoder
    131         //   * number of literal subcoders
    132         return 10 + getDictSize(dictSize) / 1024
    133                + ((2 * 0x300) << (lc + lp)) / 1024;
    134     }
    135 
    136     private static int getDictSize(int dictSize) {
    137         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
    138             throw new IllegalArgumentException(
    139                     "LZMA dictionary is too big for this implementation");
    140 
    141         // For performance reasons, use a 4 KiB dictionary if something
    142         // smaller was requested. It's a rare situation and the performance
    143         // difference isn't huge, and it starts to matter mostly when the
    144         // dictionary is just a few bytes. But we need to handle the special
    145         // case of dictSize == 0 anyway, which is an allowed value but in
    146         // practice means one-byte dictionary.
    147         //
    148         // Note that using a dictionary bigger than specified in the headers
    149         // can hide errors if there is a reference to data beyond the original
    150         // dictionary size but is still within 4 KiB.
    151         if (dictSize < 4096)
    152             dictSize = 4096;
    153 
    154         // Round dictionary size upward to a multiple of 16. This way LZMA
    155         // can use LZDecoder.getPos() for calculating LZMA's posMask.
    156         return (dictSize + 15) & ~15;
    157     }
    158 
    159     /**
    160      * Creates a new .lzma file format decompressor without
    161      * a memory usage limit.
    162      *
    163      * @param       in          input stream from which .lzma data is read;
    164      *                          it might be a good idea to wrap it in
    165      *                          <code>BufferedInputStream</code>, see the
    166      *                          note at the top of this page
    167      *
    168      * @throws      CorruptedInputException
    169      *                          file is corrupt or perhaps not in
    170      *                          the .lzma format at all
    171      *
    172      * @throws      UnsupportedOptionsException
    173      *                          dictionary size or uncompressed size is too
    174      *                          big for this implementation
    175      *
    176      * @throws      EOFException
    177      *                          file is truncated or perhaps not in
    178      *                          the .lzma format at all
    179      *
    180      * @throws      IOException may be thrown by <code>in</code>
    181      */
    182     public LZMAInputStream(InputStream in) throws IOException {
    183         this(in, -1);
    184     }
    185 
    186     /**
    187      * Creates a new .lzma file format decompressor without
    188      * a memory usage limit.
    189      * <p>
    190      * This is identical to <code>LZMAInputStream(InputStream)</code>
    191      * except that this also takes the <code>arrayCache</code> argument.
    192      *
    193      * @param       in          input stream from which .lzma data is read;
    194      *                          it might be a good idea to wrap it in
    195      *                          <code>BufferedInputStream</code>, see the
    196      *                          note at the top of this page
    197      *
    198      *
    199      * @param       arrayCache  cache to be used for allocating large arrays
    200      *
    201      * @throws      CorruptedInputException
    202      *                          file is corrupt or perhaps not in
    203      *                          the .lzma format at all
    204      *
    205      * @throws      UnsupportedOptionsException
    206      *                          dictionary size or uncompressed size is too
    207      *                          big for this implementation
    208      *
    209      * @throws      EOFException
    210      *                          file is truncated or perhaps not in
    211      *                          the .lzma format at all
    212      *
    213      * @throws      IOException may be thrown by <code>in</code>
    214      *
    215      * @since 1.7
    216      */
    217     public LZMAInputStream(InputStream in, ArrayCache arrayCache)
    218             throws IOException {
    219         this(in, -1, arrayCache);
    220     }
    221 
    222     /**
    223      * Creates a new .lzma file format decompressor with an optional
    224      * memory usage limit.
    225      *
    226      * @param       in          input stream from which .lzma data is read;
    227      *                          it might be a good idea to wrap it in
    228      *                          <code>BufferedInputStream</code>, see the
    229      *                          note at the top of this page
    230      *
    231      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    232      *                          or <code>-1</code> to impose no
    233      *                          memory usage limit
    234      *
    235      * @throws      CorruptedInputException
    236      *                          file is corrupt or perhaps not in
    237      *                          the .lzma format at all
    238      *
    239      * @throws      UnsupportedOptionsException
    240      *                          dictionary size or uncompressed size is too
    241      *                          big for this implementation
    242      *
    243      * @throws      MemoryLimitException
    244      *                          memory usage limit was exceeded
    245      *
    246      * @throws      EOFException
    247      *                          file is truncated or perhaps not in
    248      *                          the .lzma format at all
    249      *
    250      * @throws      IOException may be thrown by <code>in</code>
    251      */
    252     public LZMAInputStream(InputStream in, int memoryLimit)
    253             throws IOException {
    254         this(in, memoryLimit, ArrayCache.getDefaultCache());
    255     }
    256 
    257     /**
    258      * Creates a new .lzma file format decompressor with an optional
    259      * memory usage limit.
    260      * <p>
    261      * This is identical to <code>LZMAInputStream(InputStream, int)</code>
    262      * except that this also takes the <code>arrayCache</code> argument.
    263      *
    264      * @param       in          input stream from which .lzma data is read;
    265      *                          it might be a good idea to wrap it in
    266      *                          <code>BufferedInputStream</code>, see the
    267      *                          note at the top of this page
    268      *
    269      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    270      *                          or <code>-1</code> to impose no
    271      *                          memory usage limit
    272      *
    273      * @param       arrayCache  cache to be used for allocating large arrays
    274      *
    275      * @throws      CorruptedInputException
    276      *                          file is corrupt or perhaps not in
    277      *                          the .lzma format at all
    278      *
    279      * @throws      UnsupportedOptionsException
    280      *                          dictionary size or uncompressed size is too
    281      *                          big for this implementation
    282      *
    283      * @throws      MemoryLimitException
    284      *                          memory usage limit was exceeded
    285      *
    286      * @throws      EOFException
    287      *                          file is truncated or perhaps not in
    288      *                          the .lzma format at all
    289      *
    290      * @throws      IOException may be thrown by <code>in</code>
    291      *
    292      * @since 1.7
    293      */
    294     public LZMAInputStream(InputStream in, int memoryLimit,
    295                            ArrayCache arrayCache) throws IOException {
    296         DataInputStream inData = new DataInputStream(in);
    297 
    298         // Properties byte (lc, lp, and pb)
    299         byte propsByte = inData.readByte();
    300 
    301         // Dictionary size is an unsigned 32-bit little endian integer.
    302         int dictSize = 0;
    303         for (int i = 0; i < 4; ++i)
    304             dictSize |= inData.readUnsignedByte() << (8 * i);
    305 
    306         // Uncompressed size is an unsigned 64-bit little endian integer.
    307         // The maximum 64-bit value is a special case (becomes -1 here)
    308         // which indicates that the end marker is used instead of knowing
    309         // the uncompressed size beforehand.
    310         long uncompSize = 0;
    311         for (int i = 0; i < 8; ++i)
    312             uncompSize |= (long)inData.readUnsignedByte() << (8 * i);
    313 
    314         // Check the memory usage limit.
    315         int memoryNeeded = getMemoryUsage(dictSize, propsByte);
    316         if (memoryLimit != -1 && memoryNeeded > memoryLimit)
    317             throw new MemoryLimitException(memoryNeeded, memoryLimit);
    318 
    319         initialize(in, uncompSize, propsByte, dictSize, null, arrayCache);
    320     }
    321 
    322     /**
    323      * Creates a new input stream that decompresses raw LZMA data (no .lzma
    324      * header) from <code>in</code>.
    325      * <p>
    326      * The caller needs to know if the "end of payload marker (EOPM)" alias
    327      * "end of stream marker (EOS marker)" alias "end marker" present.
    328      * If the end marker isn't used, the caller must know the exact
    329      * uncompressed size of the stream.
    330      * <p>
    331      * The caller also needs to provide the LZMA properties byte that encodes
    332      * the number of literal context bits (lc), literal position bits (lp),
    333      * and position bits (pb).
    334      * <p>
    335      * The dictionary size used when compressing is also needed. Specifying
    336      * a too small dictionary size will prevent decompressing the stream.
    337      * Specifying a too big dictionary is waste of memory but decompression
    338      * will work.
    339      * <p>
    340      * There is no need to specify a dictionary bigger than
    341      * the uncompressed size of the data even if a bigger dictionary
    342      * was used when compressing. If you know the uncompressed size
    343      * of the data, this might allow saving some memory.
    344      *
    345      * @param       in          input stream from which compressed
    346      *                          data is read
    347      *
    348      * @param       uncompSize  uncompressed size of the LZMA stream or -1
    349      *                          if the end marker is used in the LZMA stream
    350      *
    351      * @param       propsByte   LZMA properties byte that has the encoded
    352      *                          values for literal context bits (lc), literal
    353      *                          position bits (lp), and position bits (pb)
    354      *
    355      * @param       dictSize    dictionary size as bytes, must be in the range
    356      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
    357      *
    358      * @throws      CorruptedInputException
    359      *                          if <code>propsByte</code> is invalid or
    360      *                          the first input byte is not 0x00
    361      *
    362      * @throws      UnsupportedOptionsException
    363      *                          dictionary size or uncompressed size is too
    364      *                          big for this implementation
    365      *
    366      *
    367      */
    368     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
    369                            int dictSize) throws IOException {
    370         initialize(in, uncompSize, propsByte, dictSize, null,
    371                    ArrayCache.getDefaultCache());
    372     }
    373 
    374     /**
    375      * Creates a new input stream that decompresses raw LZMA data (no .lzma
    376      * header) from <code>in</code> optionally with a preset dictionary.
    377      *
    378      * @param       in          input stream from which LZMA-compressed
    379      *                          data is read
    380      *
    381      * @param       uncompSize  uncompressed size of the LZMA stream or -1
    382      *                          if the end marker is used in the LZMA stream
    383      *
    384      * @param       propsByte   LZMA properties byte that has the encoded
    385      *                          values for literal context bits (lc), literal
    386      *                          position bits (lp), and position bits (pb)
    387      *
    388      * @param       dictSize    dictionary size as bytes, must be in the range
    389      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
    390      *
    391      * @param       presetDict  preset dictionary or <code>null</code>
    392      *                          to use no preset dictionary
    393      *
    394      * @throws      CorruptedInputException
    395      *                          if <code>propsByte</code> is invalid or
    396      *                          the first input byte is not 0x00
    397      *
    398      * @throws      UnsupportedOptionsException
    399      *                          dictionary size or uncompressed size is too
    400      *                          big for this implementation
    401      *
    402      * @throws      EOFException file is truncated or corrupt
    403      *
    404      * @throws      IOException may be thrown by <code>in</code>
    405      */
    406     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
    407                            int dictSize, byte[] presetDict)
    408             throws IOException {
    409         initialize(in, uncompSize, propsByte, dictSize, presetDict,
    410                    ArrayCache.getDefaultCache());
    411     }
    412 
    413     /**
    414      * Creates a new input stream that decompresses raw LZMA data (no .lzma
    415      * header) from <code>in</code> optionally with a preset dictionary.
    416      * <p>
    417      * This is identical to <code>LZMAInputStream(InputStream, long, byte, int,
    418      * byte[])</code> except that this also takes the <code>arrayCache</code>
    419      * argument.
    420      *
    421      * @param       in          input stream from which LZMA-compressed
    422      *                          data is read
    423      *
    424      * @param       uncompSize  uncompressed size of the LZMA stream or -1
    425      *                          if the end marker is used in the LZMA stream
    426      *
    427      * @param       propsByte   LZMA properties byte that has the encoded
    428      *                          values for literal context bits (lc), literal
    429      *                          position bits (lp), and position bits (pb)
    430      *
    431      * @param       dictSize    dictionary size as bytes, must be in the range
    432      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
    433      *
    434      * @param       presetDict  preset dictionary or <code>null</code>
    435      *                          to use no preset dictionary
    436      *
    437      * @param       arrayCache  cache to be used for allocating large arrays
    438      *
    439      * @throws      CorruptedInputException
    440      *                          if <code>propsByte</code> is invalid or
    441      *                          the first input byte is not 0x00
    442      *
    443      * @throws      UnsupportedOptionsException
    444      *                          dictionary size or uncompressed size is too
    445      *                          big for this implementation
    446      *
    447      * @throws      EOFException file is truncated or corrupt
    448      *
    449      * @throws      IOException may be thrown by <code>in</code>
    450      *
    451      * @since 1.7
    452      */
    453     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
    454                            int dictSize, byte[] presetDict,
    455                            ArrayCache arrayCache)
    456             throws IOException {
    457         initialize(in, uncompSize, propsByte, dictSize, presetDict,
    458                    arrayCache);
    459     }
    460 
    461     /**
    462      * Creates a new input stream that decompresses raw LZMA data (no .lzma
    463      * header) from <code>in</code> optionally with a preset dictionary.
    464      *
    465      * @param       in          input stream from which LZMA-compressed
    466      *                          data is read
    467      *
    468      * @param       uncompSize  uncompressed size of the LZMA stream or -1
    469      *                          if the end marker is used in the LZMA stream
    470      *
    471      * @param       lc          number of literal context bits, must be
    472      *                          in the range [0, 8]
    473      *
    474      * @param       lp          number of literal position bits, must be
    475      *                          in the range [0, 4]
    476      *
    477      * @param       pb          number position bits, must be
    478      *                          in the range [0, 4]
    479      *
    480      * @param       dictSize    dictionary size as bytes, must be in the range
    481      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
    482      *
    483      * @param       presetDict  preset dictionary or <code>null</code>
    484      *                          to use no preset dictionary
    485      *
    486      * @throws      CorruptedInputException
    487      *                          if the first input byte is not 0x00
    488      *
    489      * @throws      EOFException file is truncated or corrupt
    490      *
    491      * @throws      IOException may be thrown by <code>in</code>
    492      */
    493     public LZMAInputStream(InputStream in, long uncompSize,
    494                            int lc, int lp, int pb,
    495                            int dictSize, byte[] presetDict)
    496             throws IOException {
    497         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
    498                    ArrayCache.getDefaultCache());
    499     }
    500 
    501     /**
    502      * Creates a new input stream that decompresses raw LZMA data (no .lzma
    503      * header) from <code>in</code> optionally with a preset dictionary.
    504      * <p>
    505      * This is identical to <code>LZMAInputStream(InputStream, long, int, int,
    506      * int, int, byte[])</code> except that this also takes the
    507      * <code>arrayCache</code> argument.
    508      *
    509      * @param       in          input stream from which LZMA-compressed
    510      *                          data is read
    511      *
    512      * @param       uncompSize  uncompressed size of the LZMA stream or -1
    513      *                          if the end marker is used in the LZMA stream
    514      *
    515      * @param       lc          number of literal context bits, must be
    516      *                          in the range [0, 8]
    517      *
    518      * @param       lp          number of literal position bits, must be
    519      *                          in the range [0, 4]
    520      *
    521      * @param       pb          number position bits, must be
    522      *                          in the range [0, 4]
    523      *
    524      * @param       dictSize    dictionary size as bytes, must be in the range
    525      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
    526      *
    527      * @param       presetDict  preset dictionary or <code>null</code>
    528      *                          to use no preset dictionary
    529      *
    530      * @param       arrayCache  cache to be used for allocating large arrays
    531      *
    532      * @throws      CorruptedInputException
    533      *                          if the first input byte is not 0x00
    534      *
    535      * @throws      EOFException file is truncated or corrupt
    536      *
    537      * @throws      IOException may be thrown by <code>in</code>
    538      *
    539      * @since 1.7
    540      */
    541     public LZMAInputStream(InputStream in, long uncompSize,
    542                            int lc, int lp, int pb,
    543                            int dictSize, byte[] presetDict,
    544                            ArrayCache arrayCache)
    545             throws IOException {
    546         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
    547                    arrayCache);
    548     }
    549 
    550     private void initialize(InputStream in, long uncompSize, byte propsByte,
    551                             int dictSize, byte[] presetDict,
    552                             ArrayCache arrayCache)
    553             throws IOException {
    554         // Validate the uncompressed size since the other "initialize" throws
    555         // IllegalArgumentException if uncompSize < -1.
    556         if (uncompSize < -1)
    557             throw new UnsupportedOptionsException(
    558                     "Uncompressed size is too big");
    559 
    560         // Decode the properties byte. In contrast to LZMA2, there is no
    561         // limit of lc + lp <= 4.
    562         int props = propsByte & 0xFF;
    563         if (props > (4 * 5 + 4) * 9 + 8)
    564             throw new CorruptedInputException("Invalid LZMA properties byte");
    565 
    566         int pb = props / (9 * 5);
    567         props -= pb * 9 * 5;
    568         int lp = props / 9;
    569         int lc = props - lp * 9;
    570 
    571         // Validate the dictionary size since the other "initialize" throws
    572         // IllegalArgumentException if dictSize is not supported.
    573         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
    574             throw new UnsupportedOptionsException(
    575                     "LZMA dictionary is too big for this implementation");
    576 
    577         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
    578                    arrayCache);
    579     }
    580 
    581     private void initialize(InputStream in, long uncompSize,
    582                             int lc, int lp, int pb,
    583                             int dictSize, byte[] presetDict,
    584                             ArrayCache arrayCache)
    585             throws IOException {
    586         // getDictSize validates dictSize and gives a message in
    587         // the exception too, so skip validating dictSize here.
    588         if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4
    589                 || pb < 0 || pb > 4)
    590             throw new IllegalArgumentException();
    591 
    592         this.in = in;
    593         this.arrayCache = arrayCache;
    594 
    595         // If uncompressed size is known, use it to avoid wasting memory for
    596         // a uselessly large dictionary buffer.
    597         dictSize = getDictSize(dictSize);
    598         if (uncompSize >= 0 && dictSize > uncompSize)
    599             dictSize = getDictSize((int)uncompSize);
    600 
    601         lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
    602         rc = new RangeDecoderFromStream(in);
    603         lzma = new LZMADecoder(lz, rc, lc, lp, pb);
    604 
    605         remainingSize = uncompSize;
    606     }
    607 
    608     /**
    609      * Decompresses the next byte from this input stream.
    610      * <p>
    611      * Reading lots of data with <code>read()</code> from this input stream
    612      * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
    613      * if you need to read lots of data one byte at a time.
    614      *
    615      * @return      the next decompressed byte, or <code>-1</code>
    616      *              to indicate the end of the compressed stream
    617      *
    618      * @throws      CorruptedInputException
    619      *
    620      * @throws      XZIOException if the stream has been closed
    621      *
    622      * @throws      EOFException
    623      *                          compressed input is truncated or corrupt
    624      *
    625      * @throws      IOException may be thrown by <code>in</code>
    626      */
    627     public int read() throws IOException {
    628         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
    629     }
    630 
    631     /**
    632      * Decompresses into an array of bytes.
    633      * <p>
    634      * If <code>len</code> is zero, no bytes are read and <code>0</code>
    635      * is returned. Otherwise this will block until <code>len</code>
    636      * bytes have been decompressed, the end of the LZMA stream is reached,
    637      * or an exception is thrown.
    638      *
    639      * @param       buf         target buffer for uncompressed data
    640      * @param       off         start offset in <code>buf</code>
    641      * @param       len         maximum number of uncompressed bytes to read
    642      *
    643      * @return      number of bytes read, or <code>-1</code> to indicate
    644      *              the end of the compressed stream
    645      *
    646      * @throws      CorruptedInputException
    647      *
    648      * @throws      XZIOException if the stream has been closed
    649      *
    650      * @throws      EOFException compressed input is truncated or corrupt
    651      *
    652      * @throws      IOException may be thrown by <code>in</code>
    653      */
    654     public int read(byte[] buf, int off, int len) throws IOException {
    655         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
    656             throw new IndexOutOfBoundsException();
    657 
    658         if (len == 0)
    659             return 0;
    660 
    661         if (in == null)
    662             throw new XZIOException("Stream closed");
    663 
    664         if (exception != null)
    665             throw exception;
    666 
    667         if (endReached)
    668             return -1;
    669 
    670         try {
    671             int size = 0;
    672 
    673             while (len > 0) {
    674                 // If uncompressed size is known and thus no end marker will
    675                 // be present, set the limit so that the uncompressed size
    676                 // won't be exceeded.
    677                 int copySizeMax = len;
    678                 if (remainingSize >= 0 && remainingSize < len)
    679                     copySizeMax = (int)remainingSize;
    680 
    681                 lz.setLimit(copySizeMax);
    682 
    683                 // Decode into the dictionary buffer.
    684                 try {
    685                     lzma.decode();
    686                 } catch (CorruptedInputException e) {
    687                     // The end marker is encoded with a LZMA symbol that
    688                     // indicates maximum match distance. This is larger
    689                     // than any supported dictionary and thus causes
    690                     // CorruptedInputException from LZDecoder.repeat.
    691                     if (remainingSize != -1 || !lzma.endMarkerDetected())
    692                         throw e;
    693 
    694                     endReached = true;
    695 
    696                     // The exception makes lzma.decode() miss the last range
    697                     // decoder normalization, so do it here. This might
    698                     // cause an IOException if it needs to read a byte
    699                     // from the input stream.
    700                     rc.normalize();
    701                 }
    702 
    703                 // Copy from the dictionary to buf.
    704                 int copiedSize = lz.flush(buf, off);
    705                 off += copiedSize;
    706                 len -= copiedSize;
    707                 size += copiedSize;
    708 
    709                 if (remainingSize >= 0) {
    710                     // Update the number of bytes left to be decompressed.
    711                     remainingSize -= copiedSize;
    712                     assert remainingSize >= 0;
    713 
    714                     if (remainingSize == 0)
    715                         endReached = true;
    716                 }
    717 
    718                 if (endReached) {
    719                     // Checking these helps a lot when catching corrupt
    720                     // or truncated .lzma files. LZMA Utils doesn't do
    721                     // the first check and thus it accepts many invalid
    722                     // files that this implementation and XZ Utils don't.
    723                     if (!rc.isFinished() || lz.hasPending())
    724                         throw new CorruptedInputException();
    725 
    726                     putArraysToCache();
    727                     return size == 0 ? -1 : size;
    728                 }
    729             }
    730 
    731             return size;
    732 
    733         } catch (IOException e) {
    734             exception = e;
    735             throw e;
    736         }
    737     }
    738 
    739     private void putArraysToCache() {
    740         if (lz != null) {
    741             lz.putArraysToCache(arrayCache);
    742             lz = null;
    743         }
    744     }
    745 
    746     /**
    747      * Closes the stream and calls <code>in.close()</code>.
    748      * If the stream was already closed, this does nothing.
    749      *
    750      * @throws  IOException if thrown by <code>in.close()</code>
    751      */
    752     public void close() throws IOException {
    753         if (in != null) {
    754             putArraysToCache();
    755 
    756             try {
    757                 in.close();
    758             } finally {
    759                 in = null;
    760             }
    761         }
    762     }
    763 }
    764