Home | History | Annotate | Download | only in xz
      1 /*
      2  * SingleXZInputStream
      3  *
      4  * Author: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *
      6  * This file has been put into the public domain.
      7  * You can do whatever you want with this file.
      8  */
      9 
     10 package org.tukaani.xz;
     11 
     12 import java.io.InputStream;
     13 import java.io.DataInputStream;
     14 import java.io.IOException;
     15 import java.io.EOFException;
     16 import org.tukaani.xz.common.DecoderUtil;
     17 import org.tukaani.xz.common.StreamFlags;
     18 import org.tukaani.xz.index.IndexHash;
     19 import org.tukaani.xz.check.Check;
     20 
     21 /**
     22  * Decompresses exactly one XZ Stream in streamed mode (no seeking).
     23  * The decompression stops after the first XZ Stream has been decompressed,
     24  * and the read position in the input stream is left at the first byte
     25  * after the end of the XZ Stream. This can be useful when XZ data has
     26  * been stored inside some other file format or protocol.
     27  * <p>
     28  * Unless you know what you are doing, don't use this class to decompress
     29  * standalone .xz files. For that purpose, use <code>XZInputStream</code>.
     30  *
     31  * <h4>When uncompressed size is known beforehand</h4>
     32  * <p>
     33  * If you are decompressing complete XZ streams and your application knows
     34  * exactly how much uncompressed data there should be, it is good to try
     35  * reading one more byte by calling <code>read()</code> and checking
     36  * that it returns <code>-1</code>. This way the decompressor will parse the
     37  * file footers and verify the integrity checks, giving the caller more
     38  * confidence that the uncompressed data is valid.
     39  *
     40  * @see XZInputStream
     41  */
     42 public class SingleXZInputStream extends InputStream {
     43     private InputStream in;
     44     private final int memoryLimit;
     45     private final StreamFlags streamHeaderFlags;
     46     private final Check check;
     47     private final boolean verifyCheck;
     48     private BlockInputStream blockDecoder = null;
     49     private final IndexHash indexHash = new IndexHash();
     50     private boolean endReached = false;
     51     private IOException exception = null;
     52 
     53     private final byte[] tempBuf = new byte[1];
     54 
     55     /**
     56      * Reads the Stream Header into a buffer.
     57      * This is a helper function for the constructors.
     58      */
     59     private static byte[] readStreamHeader(InputStream in) throws IOException {
     60         byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
     61         new DataInputStream(in).readFully(streamHeader);
     62         return streamHeader;
     63     }
     64 
     65     /**
     66      * Creates a new XZ decompressor that decompresses exactly one
     67      * XZ Stream from <code>in</code> without a memory usage limit.
     68      * <p>
     69      * This constructor reads and parses the XZ Stream Header (12 bytes)
     70      * from <code>in</code>. The header of the first Block is not read
     71      * until <code>read</code> is called.
     72      *
     73      * @param       in          input stream from which XZ-compressed
     74      *                          data is read
     75      *
     76      * @throws      XZFormatException
     77      *                          input is not in the XZ format
     78      *
     79      * @throws      CorruptedInputException
     80      *                          XZ header CRC32 doesn't match
     81      *
     82      * @throws      UnsupportedOptionsException
     83      *                          XZ header is valid but specifies options
     84      *                          not supported by this implementation
     85      *
     86      * @throws      EOFException
     87      *                          less than 12 bytes of input was available
     88      *                          from <code>in</code>
     89      *
     90      * @throws      IOException may be thrown by <code>in</code>
     91      */
     92     public SingleXZInputStream(InputStream in) throws IOException {
     93         this(in, -1);
     94     }
     95 
     96     /**
     97      * Creates a new XZ decompressor that decompresses exactly one
     98      * XZ Stream from <code>in</code> with an optional memory usage limit.
     99      * <p>
    100      * This is identical to <code>SingleXZInputStream(InputStream)</code>
    101      * except that this takes also the <code>memoryLimit</code> argument.
    102      *
    103      * @param       in          input stream from which XZ-compressed
    104      *                          data is read
    105      *
    106      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    107      *                          or <code>-1</code> to impose no
    108      *                          memory usage limit
    109      *
    110      * @throws      XZFormatException
    111      *                          input is not in the XZ format
    112      *
    113      * @throws      CorruptedInputException
    114      *                          XZ header CRC32 doesn't match
    115      *
    116      * @throws      UnsupportedOptionsException
    117      *                          XZ header is valid but specifies options
    118      *                          not supported by this implementation
    119      *
    120      * @throws      EOFException
    121      *                          less than 12 bytes of input was available
    122      *                          from <code>in</code>
    123      *
    124      * @throws      IOException may be thrown by <code>in</code>
    125      */
    126     public SingleXZInputStream(InputStream in, int memoryLimit)
    127             throws IOException {
    128         this(in, memoryLimit, true, readStreamHeader(in));
    129     }
    130 
    131     /**
    132      * Creates a new XZ decompressor that decompresses exactly one
    133      * XZ Stream from <code>in</code> with an optional memory usage limit
    134      * and ability to disable verification of integrity checks.
    135      * <p>
    136      * This is identical to <code>SingleXZInputStream(InputStream,int)</code>
    137      * except that this takes also the <code>verifyCheck</code> argument.
    138      * <p>
    139      * Note that integrity check verification should almost never be disabled.
    140      * Possible reasons to disable integrity check verification:
    141      * <ul>
    142      *   <li>Trying to recover data from a corrupt .xz file.</li>
    143      *   <li>Speeding up decompression. This matters mostly with SHA-256
    144      *   or with files that have compressed extremely well. It's recommended
    145      *   that integrity checking isn't disabled for performance reasons
    146      *   unless the file integrity is verified externally in some other
    147      *   way.</li>
    148      * </ul>
    149      * <p>
    150      * <code>verifyCheck</code> only affects the integrity check of
    151      * the actual compressed data. The CRC32 fields in the headers
    152      * are always verified.
    153      *
    154      * @param       in          input stream from which XZ-compressed
    155      *                          data is read
    156      *
    157      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    158      *                          or <code>-1</code> to impose no
    159      *                          memory usage limit
    160      *
    161      * @param       verifyCheck if <code>true</code>, the integrity checks
    162      *                          will be verified; this should almost never
    163      *                          be set to <code>false</code>
    164      *
    165      * @throws      XZFormatException
    166      *                          input is not in the XZ format
    167      *
    168      * @throws      CorruptedInputException
    169      *                          XZ header CRC32 doesn't match
    170      *
    171      * @throws      UnsupportedOptionsException
    172      *                          XZ header is valid but specifies options
    173      *                          not supported by this implementation
    174      *
    175      * @throws      EOFException
    176      *                          less than 12 bytes of input was available
    177      *                          from <code>in</code>
    178      *
    179      * @throws      IOException may be thrown by <code>in</code>
    180      *
    181      * @since 1.6
    182      */
    183     public SingleXZInputStream(InputStream in, int memoryLimit,
    184                                boolean verifyCheck) throws IOException {
    185         this(in, memoryLimit, verifyCheck, readStreamHeader(in));
    186     }
    187 
    188     SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
    189                         byte[] streamHeader) throws IOException {
    190         this.in = in;
    191         this.memoryLimit = memoryLimit;
    192         this.verifyCheck = verifyCheck;
    193         streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
    194         check = Check.getInstance(streamHeaderFlags.checkType);
    195     }
    196 
    197     /**
    198      * Gets the ID of the integrity check used in this XZ Stream.
    199      *
    200      * @return      the Check ID specified in the XZ Stream Header
    201      */
    202     public int getCheckType() {
    203         return streamHeaderFlags.checkType;
    204     }
    205 
    206     /**
    207      * Gets the name of the integrity check used in this XZ Stream.
    208      *
    209      * @return      the name of the check specified in the XZ Stream Header
    210      */
    211     public String getCheckName() {
    212         return check.getName();
    213     }
    214 
    215     /**
    216      * Decompresses the next byte from this input stream.
    217      * <p>
    218      * Reading lots of data with <code>read()</code> from this input stream
    219      * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
    220      * if you need to read lots of data one byte at a time.
    221      *
    222      * @return      the next decompressed byte, or <code>-1</code>
    223      *              to indicate the end of the compressed stream
    224      *
    225      * @throws      CorruptedInputException
    226      * @throws      UnsupportedOptionsException
    227      * @throws      MemoryLimitException
    228      *
    229      * @throws      XZIOException if the stream has been closed
    230      *
    231      * @throws      EOFException
    232      *                          compressed input is truncated or corrupt
    233      *
    234      * @throws      IOException may be thrown by <code>in</code>
    235      */
    236     public int read() throws IOException {
    237         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
    238     }
    239 
    240     /**
    241      * Decompresses into an array of bytes.
    242      * <p>
    243      * If <code>len</code> is zero, no bytes are read and <code>0</code>
    244      * is returned. Otherwise this will try to decompress <code>len</code>
    245      * bytes of uncompressed data. Less than <code>len</code> bytes may
    246      * be read only in the following situations:
    247      * <ul>
    248      *   <li>The end of the compressed data was reached successfully.</li>
    249      *   <li>An error is detected after at least one but less <code>len</code>
    250      *       bytes have already been successfully decompressed.
    251      *       The next call with non-zero <code>len</code> will immediately
    252      *       throw the pending exception.</li>
    253      *   <li>An exception is thrown.</li>
    254      * </ul>
    255      *
    256      * @param       buf         target buffer for uncompressed data
    257      * @param       off         start offset in <code>buf</code>
    258      * @param       len         maximum number of uncompressed bytes to read
    259      *
    260      * @return      number of bytes read, or <code>-1</code> to indicate
    261      *              the end of the compressed stream
    262      *
    263      * @throws      CorruptedInputException
    264      * @throws      UnsupportedOptionsException
    265      * @throws      MemoryLimitException
    266      *
    267      * @throws      XZIOException if the stream has been closed
    268      *
    269      * @throws      EOFException
    270      *                          compressed input is truncated or corrupt
    271      *
    272      * @throws      IOException may be thrown by <code>in</code>
    273      */
    274     public int read(byte[] buf, int off, int len) throws IOException {
    275         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
    276             throw new IndexOutOfBoundsException();
    277 
    278         if (len == 0)
    279             return 0;
    280 
    281         if (in == null)
    282             throw new XZIOException("Stream closed");
    283 
    284         if (exception != null)
    285             throw exception;
    286 
    287         if (endReached)
    288             return -1;
    289 
    290         int size = 0;
    291 
    292         try {
    293             while (len > 0) {
    294                 if (blockDecoder == null) {
    295                     try {
    296                         blockDecoder = new BlockInputStream(
    297                                 in, check, verifyCheck, memoryLimit, -1, -1);
    298                     } catch (IndexIndicatorException e) {
    299                         indexHash.validate(in);
    300                         validateStreamFooter();
    301                         endReached = true;
    302                         return size > 0 ? size : -1;
    303                     }
    304                 }
    305 
    306                 int ret = blockDecoder.read(buf, off, len);
    307 
    308                 if (ret > 0) {
    309                     size += ret;
    310                     off += ret;
    311                     len -= ret;
    312                 } else if (ret == -1) {
    313                     indexHash.add(blockDecoder.getUnpaddedSize(),
    314                                   blockDecoder.getUncompressedSize());
    315                     blockDecoder = null;
    316                 }
    317             }
    318         } catch (IOException e) {
    319             exception = e;
    320             if (size == 0)
    321                 throw e;
    322         }
    323 
    324         return size;
    325     }
    326 
    327     private void validateStreamFooter() throws IOException {
    328         byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
    329         new DataInputStream(in).readFully(buf);
    330         StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
    331 
    332         if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
    333                                              streamFooterFlags)
    334                 || indexHash.getIndexSize() != streamFooterFlags.backwardSize)
    335             throw new CorruptedInputException(
    336                     "XZ Stream Footer does not match Stream Header");
    337     }
    338 
    339     /**
    340      * Returns the number of uncompressed bytes that can be read
    341      * without blocking. The value is returned with an assumption
    342      * that the compressed input data will be valid. If the compressed
    343      * data is corrupt, <code>CorruptedInputException</code> may get
    344      * thrown before the number of bytes claimed to be available have
    345      * been read from this input stream.
    346      *
    347      * @return      the number of uncompressed bytes that can be read
    348      *              without blocking
    349      */
    350     public int available() throws IOException {
    351         if (in == null)
    352             throw new XZIOException("Stream closed");
    353 
    354         if (exception != null)
    355             throw exception;
    356 
    357         return blockDecoder == null ? 0 : blockDecoder.available();
    358     }
    359 
    360     /**
    361      * Closes the stream and calls <code>in.close()</code>.
    362      * If the stream was already closed, this does nothing.
    363      *
    364      * @throws  IOException if thrown by <code>in.close()</code>
    365      */
    366     public void close() throws IOException {
    367         if (in != null) {
    368             try {
    369                 in.close();
    370             } finally {
    371                 in = null;
    372             }
    373         }
    374     }
    375 }
    376