Home | History | Annotate | Download | only in xz
      1 /*
      2  * XZInputStream
      3  *
      4  * Author: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *
      6  * This file has been put into the public domain.
      7  * You can do whatever you want with this file.
      8  */
      9 
     10 package org.tukaani.xz;
     11 
     12 import java.io.InputStream;
     13 import java.io.DataInputStream;
     14 import java.io.IOException;
     15 import java.io.EOFException;
     16 import org.tukaani.xz.common.DecoderUtil;
     17 
     18 /**
     19  * Decompresses a .xz file in streamed mode (no seeking).
     20  * <p>
     21  * Use this to decompress regular standalone .xz files. This reads from
     22  * its input stream until the end of the input or until an error occurs.
     23  * This supports decompressing concatenated .xz files.
     24  *
     25  * <h4>Typical use cases</h4>
     26  * <p>
     27  * Getting an input stream to decompress a .xz file:
     28  * <p><blockquote><pre>
     29  * InputStream infile = new FileInputStream("foo.xz");
     30  * XZInputStream inxz = new XZInputStream(infile);
     31  * </pre></blockquote>
     32  * <p>
     33  * It's important to keep in mind that decompressor memory usage depends
     34  * on the settings used to compress the file. The worst-case memory usage
     35  * of XZInputStream is currently 1.5&nbsp;GiB. Still, very few files will
     36  * require more than about 65&nbsp;MiB because that's how much decompressing
     37  * a file created with the highest preset level will need, and only a few
     38  * people use settings other than the predefined presets.
     39  * <p>
     40  * It is possible to specify a memory usage limit for
     41  * <code>XZInputStream</code>. If decompression requires more memory than
     42  * the specified limit, MemoryLimitException will be thrown when reading
     43  * from the stream. For example, the following sets the memory usage limit
     44  * to 100&nbsp;MiB:
     45  * <p><blockquote><pre>
     46  * InputStream infile = new FileInputStream("foo.xz");
     47  * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
     48  * </pre></blockquote>
     49  *
     50  * <h4>When uncompressed size is known beforehand</h4>
     51  * <p>
     52  * If you are decompressing complete files and your application knows
     53  * exactly how much uncompressed data there should be, it is good to try
     54  * reading one more byte by calling <code>read()</code> and checking
     55  * that it returns <code>-1</code>. This way the decompressor will parse the
     56  * file footers and verify the integrity checks, giving the caller more
     57  * confidence that the uncompressed data is valid. (This advice seems to
     58  * apply to
     59  * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
     60  *
     61  * @see SingleXZInputStream
     62  */
     63 public class XZInputStream extends InputStream {
     64     private final int memoryLimit;
     65     private InputStream in;
     66     private SingleXZInputStream xzIn;
     67     private final boolean verifyCheck;
     68     private boolean endReached = false;
     69     private IOException exception = null;
     70 
     71     private final byte[] tempBuf = new byte[1];
     72 
     73     /**
     74      * Creates a new XZ decompressor without a memory usage limit.
     75      * <p>
     76      * This constructor reads and parses the XZ Stream Header (12 bytes)
     77      * from <code>in</code>. The header of the first Block is not read
     78      * until <code>read</code> is called.
     79      *
     80      * @param       in          input stream from which XZ-compressed
     81      *                          data is read
     82      *
     83      * @throws      XZFormatException
     84      *                          input is not in the XZ format
     85      *
     86      * @throws      CorruptedInputException
     87      *                          XZ header CRC32 doesn't match
     88      *
     89      * @throws      UnsupportedOptionsException
     90      *                          XZ header is valid but specifies options
     91      *                          not supported by this implementation
     92      *
     93      * @throws      EOFException
     94      *                          less than 12 bytes of input was available
     95      *                          from <code>in</code>
     96      *
     97      * @throws      IOException may be thrown by <code>in</code>
     98      */
     99     public XZInputStream(InputStream in) throws IOException {
    100         this(in, -1);
    101     }
    102 
    103     /**
    104      * Creates a new XZ decompressor with an optional memory usage limit.
    105      * <p>
    106      * This is identical to <code>XZInputStream(InputStream)</code> except
    107      * that this takes also the <code>memoryLimit</code> argument.
    108      *
    109      * @param       in          input stream from which XZ-compressed
    110      *                          data is read
    111      *
    112      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    113      *                          or <code>-1</code> to impose no
    114      *                          memory usage limit
    115      *
    116      * @throws      XZFormatException
    117      *                          input is not in the XZ format
    118      *
    119      * @throws      CorruptedInputException
    120      *                          XZ header CRC32 doesn't match
    121      *
    122      * @throws      UnsupportedOptionsException
    123      *                          XZ header is valid but specifies options
    124      *                          not supported by this implementation
    125      *
    126      * @throws      EOFException
    127      *                          less than 12 bytes of input was available
    128      *                          from <code>in</code>
    129      *
    130      * @throws      IOException may be thrown by <code>in</code>
    131      */
    132     public XZInputStream(InputStream in, int memoryLimit) throws IOException {
    133         this(in, memoryLimit, true);
    134     }
    135 
    136     /**
    137      * Creates a new XZ decompressor with an optional memory usage limit
    138      * and ability to disable verification of integrity checks.
    139      * <p>
    140      * This is identical to <code>XZInputStream(InputStream,int)</code> except
    141      * that this takes also the <code>verifyCheck</code> argument.
    142      * <p>
    143      * Note that integrity check verification should almost never be disabled.
    144      * Possible reasons to disable integrity check verification:
    145      * <ul>
    146      *   <li>Trying to recover data from a corrupt .xz file.</li>
    147      *   <li>Speeding up decompression. This matters mostly with SHA-256
    148      *   or with files that have compressed extremely well. It's recommended
    149      *   that integrity checking isn't disabled for performance reasons
    150      *   unless the file integrity is verified externally in some other
    151      *   way.</li>
    152      * </ul>
    153      * <p>
    154      * <code>verifyCheck</code> only affects the integrity check of
    155      * the actual compressed data. The CRC32 fields in the headers
    156      * are always verified.
    157      *
    158      * @param       in          input stream from which XZ-compressed
    159      *                          data is read
    160      *
    161      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    162      *                          or <code>-1</code> to impose no
    163      *                          memory usage limit
    164      *
    165      * @param       verifyCheck if <code>true</code>, the integrity checks
    166      *                          will be verified; this should almost never
    167      *                          be set to <code>false</code>
    168      *
    169      * @throws      XZFormatException
    170      *                          input is not in the XZ format
    171      *
    172      * @throws      CorruptedInputException
    173      *                          XZ header CRC32 doesn't match
    174      *
    175      * @throws      UnsupportedOptionsException
    176      *                          XZ header is valid but specifies options
    177      *                          not supported by this implementation
    178      *
    179      * @throws      EOFException
    180      *                          less than 12 bytes of input was available
    181      *                          from <code>in</code>
    182      *
    183      * @throws      IOException may be thrown by <code>in</code>
    184      *
    185      * @since 1.6
    186      */
    187     public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
    188             throws IOException {
    189         this.in = in;
    190         this.memoryLimit = memoryLimit;
    191         this.verifyCheck = verifyCheck;
    192         this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck);
    193     }
    194 
    195     /**
    196      * Decompresses the next byte from this input stream.
    197      * <p>
    198      * Reading lots of data with <code>read()</code> from this input stream
    199      * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
    200      * if you need to read lots of data one byte at a time.
    201      *
    202      * @return      the next decompressed byte, or <code>-1</code>
    203      *              to indicate the end of the compressed stream
    204      *
    205      * @throws      CorruptedInputException
    206      * @throws      UnsupportedOptionsException
    207      * @throws      MemoryLimitException
    208      *
    209      * @throws      XZIOException if the stream has been closed
    210      *
    211      * @throws      EOFException
    212      *                          compressed input is truncated or corrupt
    213      *
    214      * @throws      IOException may be thrown by <code>in</code>
    215      */
    216     public int read() throws IOException {
    217         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
    218     }
    219 
    220     /**
    221      * Decompresses into an array of bytes.
    222      * <p>
    223      * If <code>len</code> is zero, no bytes are read and <code>0</code>
    224      * is returned. Otherwise this will try to decompress <code>len</code>
    225      * bytes of uncompressed data. Less than <code>len</code> bytes may
    226      * be read only in the following situations:
    227      * <ul>
    228      *   <li>The end of the compressed data was reached successfully.</li>
    229      *   <li>An error is detected after at least one but less <code>len</code>
    230      *       bytes have already been successfully decompressed.
    231      *       The next call with non-zero <code>len</code> will immediately
    232      *       throw the pending exception.</li>
    233      *   <li>An exception is thrown.</li>
    234      * </ul>
    235      *
    236      * @param       buf         target buffer for uncompressed data
    237      * @param       off         start offset in <code>buf</code>
    238      * @param       len         maximum number of uncompressed bytes to read
    239      *
    240      * @return      number of bytes read, or <code>-1</code> to indicate
    241      *              the end of the compressed stream
    242      *
    243      * @throws      CorruptedInputException
    244      * @throws      UnsupportedOptionsException
    245      * @throws      MemoryLimitException
    246      *
    247      * @throws      XZIOException if the stream has been closed
    248      *
    249      * @throws      EOFException
    250      *                          compressed input is truncated or corrupt
    251      *
    252      * @throws      IOException may be thrown by <code>in</code>
    253      */
    254     public int read(byte[] buf, int off, int len) throws IOException {
    255         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
    256             throw new IndexOutOfBoundsException();
    257 
    258         if (len == 0)
    259             return 0;
    260 
    261         if (in == null)
    262             throw new XZIOException("Stream closed");
    263 
    264         if (exception != null)
    265             throw exception;
    266 
    267         if (endReached)
    268             return -1;
    269 
    270         int size = 0;
    271 
    272         try {
    273             while (len > 0) {
    274                 if (xzIn == null) {
    275                     prepareNextStream();
    276                     if (endReached)
    277                         return size == 0 ? -1 : size;
    278                 }
    279 
    280                 int ret = xzIn.read(buf, off, len);
    281 
    282                 if (ret > 0) {
    283                     size += ret;
    284                     off += ret;
    285                     len -= ret;
    286                 } else if (ret == -1) {
    287                     xzIn = null;
    288                 }
    289             }
    290         } catch (IOException e) {
    291             exception = e;
    292             if (size == 0)
    293                 throw e;
    294         }
    295 
    296         return size;
    297     }
    298 
    299     private void prepareNextStream() throws IOException {
    300         DataInputStream inData = new DataInputStream(in);
    301         byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
    302 
    303         // The size of Stream Padding must be a multiple of four bytes,
    304         // all bytes zero.
    305         do {
    306             // First try to read one byte to see if we have reached the end
    307             // of the file.
    308             int ret = inData.read(buf, 0, 1);
    309             if (ret == -1) {
    310                 endReached = true;
    311                 return;
    312             }
    313 
    314             // Since we got one byte of input, there must be at least
    315             // three more available in a valid file.
    316             inData.readFully(buf, 1, 3);
    317 
    318         } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
    319 
    320         // Not all bytes are zero. In a valid Stream it indicates the
    321         // beginning of the next Stream. Read the rest of the Stream Header
    322         // and initialize the XZ decoder.
    323         inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
    324 
    325         try {
    326             xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf);
    327         } catch (XZFormatException e) {
    328             // Since this isn't the first .xz Stream, it is more
    329             // logical to tell that the data is corrupt.
    330             throw new CorruptedInputException(
    331                     "Garbage after a valid XZ Stream");
    332         }
    333     }
    334 
    335     /**
    336      * Returns the number of uncompressed bytes that can be read
    337      * without blocking. The value is returned with an assumption
    338      * that the compressed input data will be valid. If the compressed
    339      * data is corrupt, <code>CorruptedInputException</code> may get
    340      * thrown before the number of bytes claimed to be available have
    341      * been read from this input stream.
    342      *
    343      * @return      the number of uncompressed bytes that can be read
    344      *              without blocking
    345      */
    346     public int available() throws IOException {
    347         if (in == null)
    348             throw new XZIOException("Stream closed");
    349 
    350         if (exception != null)
    351             throw exception;
    352 
    353         return xzIn == null ? 0 : xzIn.available();
    354     }
    355 
    356     /**
    357      * Closes the stream and calls <code>in.close()</code>.
    358      * If the stream was already closed, this does nothing.
    359      *
    360      * @throws  IOException if thrown by <code>in.close()</code>
    361      */
    362     public void close() throws IOException {
    363         if (in != null) {
    364             try {
    365                 in.close();
    366             } finally {
    367                 in = null;
    368             }
    369         }
    370     }
    371 }
    372