Home | History | Annotate | Download | only in xz
      1 /*
      2  * SeekableXZInputStream
      3  *
      4  * Author: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *
      6  * This file has been put into the public domain.
      7  * You can do whatever you want with this file.
      8  */
      9 
     10 package org.tukaani.xz;
     11 
     12 import java.util.Arrays;
     13 import java.util.ArrayList;
     14 import java.io.DataInputStream;
     15 import java.io.IOException;
     16 import java.io.EOFException;
     17 import org.tukaani.xz.common.DecoderUtil;
     18 import org.tukaani.xz.common.StreamFlags;
     19 import org.tukaani.xz.check.Check;
     20 import org.tukaani.xz.index.IndexDecoder;
     21 import org.tukaani.xz.index.BlockInfo;
     22 
     23 /**
     24  * Decompresses a .xz file in random access mode.
     25  * This supports decompressing concatenated .xz files.
     26  * <p>
     27  * Each .xz file consist of one or more Streams. Each Stream consist of zero
     28  * or more Blocks. Each Stream contains an Index of Streams' Blocks.
     29  * The Indexes from all Streams are loaded in RAM by a constructor of this
     30  * class. A typical .xz file has only one Stream, and parsing its Index will
     31  * need only three or four seeks.
     32  * <p>
     33  * To make random access possible, the data in a .xz file must be splitted
     34  * into multiple Blocks of reasonable size. Decompression can only start at
     35  * a Block boundary. When seeking to an uncompressed position that is not at
     36  * a Block boundary, decompression starts at the beginning of the Block and
     37  * throws away data until the target position is reached. Thus, smaller Blocks
     38  * mean faster seeks to arbitrary uncompressed positions. On the other hand,
     39  * smaller Blocks mean worse compression. So one has to make a compromise
     40  * between random access speed and compression ratio.
     41  * <p>
     42  * Implementation note: This class uses linear search to locate the correct
     43  * Stream from the data structures in RAM. It was the simplest to implement
     44  * and should be fine as long as there aren't too many Streams. The correct
     45  * Block inside a Stream is located using binary search and thus is fast
     46  * even with a huge number of Blocks.
     47  *
     48  * <h4>Memory usage</h4>
     49  * <p>
     50  * The amount of memory needed for the Indexes is taken into account when
     51  * checking the memory usage limit. Each Stream is calculated to need at
     52  * least 1&nbsp;KiB of memory and each Block 16 bytes of memory, rounded up
     53  * to the next kibibyte. So unless the file has a huge number of Streams or
     54  * Blocks, these don't take significant amount of memory.
     55  *
     56  * <h4>Creating random-accessible .xz files</h4>
     57  * <p>
     58  * When using {@link XZOutputStream}, a new Block can be started by calling
     59  * its {@link XZOutputStream#endBlock() endBlock} method. If you know
     60  * that the decompressor will only need to seek to certain uncompressed
     61  * positions, it can be a good idea to start a new Block at (some of) these
     62  * positions (and only at these positions to get better compression ratio).
     63  * <p>
     64  * liblzma in XZ Utils supports starting a new Block with
     65  * <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded
     66  * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha
     67  * also added the option <code>--block-size=SIZE</code> to the xz command
     68  * line tool. XZ Utils 5.1.2alpha added a partial implementation of
     69  * <code>--block-list=SIZES</code> which allows specifying sizes of
     70  * individual Blocks.
     71  *
     72  * @see SeekableFileInputStream
     73  * @see XZInputStream
     74  * @see XZOutputStream
     75  */
     76 public class SeekableXZInputStream extends SeekableInputStream {
     77     /**
     78      * The input stream containing XZ compressed data.
     79      */
     80     private SeekableInputStream in;
     81 
     82     /**
     83      * Memory usage limit after the memory usage of the IndexDecoders have
     84      * been substracted.
     85      */
     86     private final int memoryLimit;
     87 
     88     /**
     89      * Memory usage of the IndexDecoders.
     90      * <code>memoryLimit + indexMemoryUsage</code> equals the original
     91      * memory usage limit that was passed to the constructor.
     92      */
     93     private int indexMemoryUsage = 0;
     94 
     95     /**
     96      * List of IndexDecoders, one for each Stream in the file.
     97      * The list is in reverse order: The first element is
     98      * the last Stream in the file.
     99      */
    100     private final ArrayList streams = new ArrayList();
    101 
    102     /**
    103      * Bitmask of all Check IDs seen.
    104      */
    105     private int checkTypes = 0;
    106 
    107     /**
    108      * Uncompressed size of the file (all Streams).
    109      */
    110     private long uncompressedSize = 0;
    111 
    112     /**
    113      * Uncompressed size of the largest XZ Block in the file.
    114      */
    115     private long largestBlockSize = 0;
    116 
    117     /**
    118      * Number of XZ Blocks in the file.
    119      */
    120     private int blockCount = 0;
    121 
    122     /**
    123      * Size and position information about the current Block.
    124      * If there are no Blocks, all values will be <code>-1</code>.
    125      */
    126     private final BlockInfo curBlockInfo;
    127 
    128     /**
    129      * Temporary (and cached) information about the Block whose information
    130      * is queried via <code>getBlockPos</code> and related functions.
    131      */
    132     private final BlockInfo queriedBlockInfo;
    133 
    134     /**
    135      * Integrity Check in the current XZ Stream. The constructor leaves
    136      * this to point to the Check of the first Stream.
    137      */
    138     private Check check;
    139 
    140     /**
    141      * Flag indicating if the integrity checks will be verified.
    142      */
    143     private final boolean verifyCheck;
    144 
    145     /**
    146      * Decoder of the current XZ Block, if any.
    147      */
    148     private BlockInputStream blockDecoder = null;
    149 
    150     /**
    151      * Current uncompressed position.
    152      */
    153     private long curPos = 0;
    154 
    155     /**
    156      * Target position for seeking.
    157      */
    158     private long seekPos;
    159 
    160     /**
    161      * True when <code>seek(long)</code> has been called but the actual
    162      * seeking hasn't been done yet.
    163      */
    164     private boolean seekNeeded = false;
    165 
    166     /**
    167      * True when end of the file was reached. This can be cleared by
    168      * calling <code>seek(long)</code>.
    169      */
    170     private boolean endReached = false;
    171 
    172     /**
    173      * Pending exception from an earlier error.
    174      */
    175     private IOException exception = null;
    176 
    177     /**
    178      * Temporary buffer for read(). This avoids reallocating memory
    179      * on every read() call.
    180      */
    181     private final byte[] tempBuf = new byte[1];
    182 
    183     /**
    184      * Creates a new seekable XZ decompressor without a memory usage limit.
    185      *
    186      * @param       in          seekable input stream containing one or more
    187      *                          XZ Streams; the whole input stream is used
    188      *
    189      * @throws      XZFormatException
    190      *                          input is not in the XZ format
    191      *
    192      * @throws      CorruptedInputException
    193      *                          XZ data is corrupt or truncated
    194      *
    195      * @throws      UnsupportedOptionsException
    196      *                          XZ headers seem valid but they specify
    197      *                          options not supported by this implementation
    198      *
    199      * @throws      EOFException
    200      *                          less than 6 bytes of input was available
    201      *                          from <code>in</code>, or (unlikely) the size
    202      *                          of the underlying stream got smaller while
    203      *                          this was reading from it
    204      *
    205      * @throws      IOException may be thrown by <code>in</code>
    206      */
    207     public SeekableXZInputStream(SeekableInputStream in)
    208             throws IOException {
    209         this(in, -1);
    210     }
    211 
    212     /**
    213      * Creates a new seekable XZ decomporessor with an optional
    214      * memory usage limit.
    215      *
    216      * @param       in          seekable input stream containing one or more
    217      *                          XZ Streams; the whole input stream is used
    218      *
    219      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    220      *                          or <code>-1</code> to impose no
    221      *                          memory usage limit
    222      *
    223      * @throws      XZFormatException
    224      *                          input is not in the XZ format
    225      *
    226      * @throws      CorruptedInputException
    227      *                          XZ data is corrupt or truncated
    228      *
    229      * @throws      UnsupportedOptionsException
    230      *                          XZ headers seem valid but they specify
    231      *                          options not supported by this implementation
    232      *
    233      * @throws      MemoryLimitException
    234      *                          decoded XZ Indexes would need more memory
    235      *                          than allowed by the memory usage limit
    236      *
    237      * @throws      EOFException
    238      *                          less than 6 bytes of input was available
    239      *                          from <code>in</code>, or (unlikely) the size
    240      *                          of the underlying stream got smaller while
    241      *                          this was reading from it
    242      *
    243      * @throws      IOException may be thrown by <code>in</code>
    244      */
    245     public SeekableXZInputStream(SeekableInputStream in, int memoryLimit)
    246             throws IOException {
    247         this(in, memoryLimit, true);
    248     }
    249 
    250     /**
    251      * Creates a new seekable XZ decomporessor with an optional
    252      * memory usage limit and ability to disable verification
    253      * of integrity checks.
    254      * <p>
    255      * Note that integrity check verification should almost never be disabled.
    256      * Possible reasons to disable integrity check verification:
    257      * <ul>
    258      *   <li>Trying to recover data from a corrupt .xz file.</li>
    259      *   <li>Speeding up decompression. This matters mostly with SHA-256
    260      *   or with files that have compressed extremely well. It's recommended
    261      *   that integrity checking isn't disabled for performance reasons
    262      *   unless the file integrity is verified externally in some other
    263      *   way.</li>
    264      * </ul>
    265      * <p>
    266      * <code>verifyCheck</code> only affects the integrity check of
    267      * the actual compressed data. The CRC32 fields in the headers
    268      * are always verified.
    269      *
    270      * @param       in          seekable input stream containing one or more
    271      *                          XZ Streams; the whole input stream is used
    272      *
    273      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    274      *                          or <code>-1</code> to impose no
    275      *                          memory usage limit
    276      *
    277      * @param       verifyCheck if <code>true</code>, the integrity checks
    278      *                          will be verified; this should almost never
    279      *                          be set to <code>false</code>
    280      *
    281      * @throws      XZFormatException
    282      *                          input is not in the XZ format
    283      *
    284      * @throws      CorruptedInputException
    285      *                          XZ data is corrupt or truncated
    286      *
    287      * @throws      UnsupportedOptionsException
    288      *                          XZ headers seem valid but they specify
    289      *                          options not supported by this implementation
    290      *
    291      * @throws      MemoryLimitException
    292      *                          decoded XZ Indexes would need more memory
    293      *                          than allowed by the memory usage limit
    294      *
    295      * @throws      EOFException
    296      *                          less than 6 bytes of input was available
    297      *                          from <code>in</code>, or (unlikely) the size
    298      *                          of the underlying stream got smaller while
    299      *                          this was reading from it
    300      *
    301      * @throws      IOException may be thrown by <code>in</code>
    302      *
    303      * @since 1.6
    304      */
    305     public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
    306                                  boolean verifyCheck)
    307             throws IOException {
    308         this.verifyCheck = verifyCheck;
    309         this.in = in;
    310         DataInputStream inData = new DataInputStream(in);
    311 
    312         // Check the magic bytes in the beginning of the file.
    313         {
    314             in.seek(0);
    315             byte[] buf = new byte[XZ.HEADER_MAGIC.length];
    316             inData.readFully(buf);
    317             if (!Arrays.equals(buf, XZ.HEADER_MAGIC))
    318                 throw new XZFormatException();
    319         }
    320 
    321         // Get the file size and verify that it is a multiple of 4 bytes.
    322         long pos = in.length();
    323         if ((pos & 3) != 0)
    324             throw new CorruptedInputException(
    325                     "XZ file size is not a multiple of 4 bytes");
    326 
    327         // Parse the headers starting from the end of the file.
    328         byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
    329         long streamPadding = 0;
    330 
    331         while (pos > 0) {
    332             if (pos < DecoderUtil.STREAM_HEADER_SIZE)
    333                 throw new CorruptedInputException();
    334 
    335             // Read the potential Stream Footer.
    336             in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE);
    337             inData.readFully(buf);
    338 
    339             // Skip Stream Padding four bytes at a time.
    340             // Skipping more at once would be faster,
    341             // but usually there isn't much Stream Padding.
    342             if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00
    343                     && buf[11] == 0x00) {
    344                 streamPadding += 4;
    345                 pos -= 4;
    346                 continue;
    347             }
    348 
    349             // It's not Stream Padding. Update pos.
    350             pos -= DecoderUtil.STREAM_HEADER_SIZE;
    351 
    352             // Decode the Stream Footer and check if Backward Size
    353             // looks reasonable.
    354             StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf);
    355             if (streamFooter.backwardSize >= pos)
    356                 throw new CorruptedInputException(
    357                         "Backward Size in XZ Stream Footer is too big");
    358 
    359             // Check that the Check ID is supported. Store it in case this
    360             // is the first Stream in the file.
    361             check = Check.getInstance(streamFooter.checkType);
    362 
    363             // Remember which Check IDs have been seen.
    364             checkTypes |= 1 << streamFooter.checkType;
    365 
    366             // Seek to the beginning of the Index.
    367             in.seek(pos - streamFooter.backwardSize);
    368 
    369             // Decode the Index field.
    370             IndexDecoder index;
    371             try {
    372                 index = new IndexDecoder(in, streamFooter, streamPadding,
    373                                          memoryLimit);
    374             } catch (MemoryLimitException e) {
    375                 // IndexDecoder doesn't know how much memory we had
    376                 // already needed so we need to recreate the exception.
    377                 assert memoryLimit >= 0;
    378                 throw new MemoryLimitException(
    379                         e.getMemoryNeeded() + indexMemoryUsage,
    380                         memoryLimit + indexMemoryUsage);
    381             }
    382 
    383             // Update the memory usage and limit counters.
    384             indexMemoryUsage += index.getMemoryUsage();
    385             if (memoryLimit >= 0) {
    386                 memoryLimit -= index.getMemoryUsage();
    387                 assert memoryLimit >= 0;
    388             }
    389 
    390             // Remember the uncompressed size of the largest Block.
    391             if (largestBlockSize < index.getLargestBlockSize())
    392                 largestBlockSize = index.getLargestBlockSize();
    393 
    394             // Calculate the offset to the beginning of this XZ Stream and
    395             // check that it looks sane.
    396             long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE;
    397             if (pos < off)
    398                 throw new CorruptedInputException("XZ Index indicates "
    399                         + "too big compressed size for the XZ Stream");
    400 
    401             // Seek to the beginning of this Stream.
    402             pos -= off;
    403             in.seek(pos);
    404 
    405             // Decode the Stream Header.
    406             inData.readFully(buf);
    407             StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf);
    408 
    409             // Verify that the Stream Header matches the Stream Footer.
    410             if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter))
    411                 throw new CorruptedInputException(
    412                         "XZ Stream Footer does not match Stream Header");
    413 
    414             // Update the total uncompressed size of the file and check that
    415             // it doesn't overflow.
    416             uncompressedSize += index.getUncompressedSize();
    417             if (uncompressedSize < 0)
    418                 throw new UnsupportedOptionsException("XZ file is too big");
    419 
    420             // Update the Block count and check that it fits into an int.
    421             blockCount += index.getRecordCount();
    422             if (blockCount < 0)
    423                 throw new UnsupportedOptionsException(
    424                         "XZ file has over " + Integer.MAX_VALUE + " Blocks");
    425 
    426             // Add this Stream to the list of Streams.
    427             streams.add(index);
    428 
    429             // Reset to be ready to parse the next Stream.
    430             streamPadding = 0;
    431         }
    432 
    433         assert pos == 0;
    434 
    435         // Save it now that indexMemoryUsage has been substracted from it.
    436         this.memoryLimit = memoryLimit;
    437 
    438         // Store the relative offsets of the Streams. This way we don't
    439         // need to recalculate them in this class when seeking; the
    440         // IndexDecoder instances will handle them.
    441         IndexDecoder prev = (IndexDecoder)streams.get(streams.size() - 1);
    442         for (int i = streams.size() - 2; i >= 0; --i) {
    443             IndexDecoder cur = (IndexDecoder)streams.get(i);
    444             cur.setOffsets(prev);
    445             prev = cur;
    446         }
    447 
    448         // Initialize curBlockInfo to point to the first Stream.
    449         // The blockNumber will be left to -1 so that .hasNext()
    450         // and .setNext() work to get the first Block when starting
    451         // to decompress from the beginning of the file.
    452         IndexDecoder first = (IndexDecoder)streams.get(streams.size() - 1);
    453         curBlockInfo = new BlockInfo(first);
    454 
    455         // queriedBlockInfo needs to be allocated too. The Stream used for
    456         // initialization doesn't matter though.
    457         queriedBlockInfo = new BlockInfo(first);
    458     }
    459 
    460     /**
    461      * Gets the types of integrity checks used in the .xz file.
    462      * Multiple checks are possible only if there are multiple
    463      * concatenated XZ Streams.
    464      * <p>
    465      * The returned value has a bit set for every check type that is present.
    466      * For example, if CRC64 and SHA-256 were used, the return value is
    467      * <code>(1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_CRC64)
    468      * | (1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_SHA256)</code>.
    469      */
    470     public int getCheckTypes() {
    471         return checkTypes;
    472     }
    473 
    474     /**
    475      * Gets the amount of memory in kibibytes (KiB) used by
    476      * the data structures needed to locate the XZ Blocks.
    477      * This is usually useless information but since it is calculated
    478      * for memory usage limit anyway, it is nice to make it available to too.
    479      */
    480     public int getIndexMemoryUsage() {
    481         return indexMemoryUsage;
    482     }
    483 
    484     /**
    485      * Gets the uncompressed size of the largest XZ Block in bytes.
    486      * This can be useful if you want to check that the file doesn't
    487      * have huge XZ Blocks which could make seeking to arbitrary offsets
    488      * very slow. Note that huge Blocks don't automatically mean that
    489      * seeking would be slow, for example, seeking to the beginning of
    490      * any Block is always fast.
    491      */
    492     public long getLargestBlockSize() {
    493         return largestBlockSize;
    494     }
    495 
    496     /**
    497      * Gets the number of Streams in the .xz file.
    498      *
    499      * @since 1.3
    500      */
    501     public int getStreamCount() {
    502         return streams.size();
    503     }
    504 
    505     /**
    506      * Gets the number of Blocks in the .xz file.
    507      *
    508      * @since 1.3
    509      */
    510     public int getBlockCount() {
    511         return blockCount;
    512     }
    513 
    514     /**
    515      * Gets the uncompressed start position of the given Block.
    516      *
    517      * @throws  IndexOutOfBoundsException if
    518      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    519      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    520      *
    521      * @since 1.3
    522      */
    523     public long getBlockPos(int blockNumber) {
    524         locateBlockByNumber(queriedBlockInfo, blockNumber);
    525         return queriedBlockInfo.uncompressedOffset;
    526     }
    527 
    528     /**
    529      * Gets the uncompressed size of the given Block.
    530      *
    531      * @throws  IndexOutOfBoundsException if
    532      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    533      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    534      *
    535      * @since 1.3
    536      */
    537     public long getBlockSize(int blockNumber) {
    538         locateBlockByNumber(queriedBlockInfo, blockNumber);
    539         return queriedBlockInfo.uncompressedSize;
    540     }
    541 
    542     /**
    543      * Gets the position where the given compressed Block starts in
    544      * the underlying .xz file.
    545      * This information is rarely useful to the users of this class.
    546      *
    547      * @throws  IndexOutOfBoundsException if
    548      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    549      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    550      *
    551      * @since 1.3
    552      */
    553     public long getBlockCompPos(int blockNumber) {
    554         locateBlockByNumber(queriedBlockInfo, blockNumber);
    555         return queriedBlockInfo.compressedOffset;
    556     }
    557 
    558     /**
    559      * Gets the compressed size of the given Block.
    560      * This together with the uncompressed size can be used to calculate
    561      * the compression ratio of the specific Block.
    562      *
    563      * @throws  IndexOutOfBoundsException if
    564      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    565      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    566      *
    567      * @since 1.3
    568      */
    569     public long getBlockCompSize(int blockNumber) {
    570         locateBlockByNumber(queriedBlockInfo, blockNumber);
    571         return (queriedBlockInfo.unpaddedSize + 3) & ~3;
    572     }
    573 
    574     /**
    575      * Gets integrity check type (Check ID) of the given Block.
    576      *
    577      * @throws  IndexOutOfBoundsException if
    578      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    579      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    580      *
    581      * @see #getCheckTypes()
    582      *
    583      * @since 1.3
    584      */
    585     public int getBlockCheckType(int blockNumber) {
    586         locateBlockByNumber(queriedBlockInfo, blockNumber);
    587         return queriedBlockInfo.getCheckType();
    588     }
    589 
    590     /**
    591      * Gets the number of the Block that contains the byte at the given
    592      * uncompressed position.
    593      *
    594      * @throws  IndexOutOfBoundsException if
    595      *          <code>pos&nbsp;&lt;&nbsp;0</code> or
    596      *          <code>pos&nbsp;&gt;=&nbsp;length()</code>.
    597      *
    598      * @since 1.3
    599      */
    600     public int getBlockNumber(long pos) {
    601         locateBlockByPos(queriedBlockInfo, pos);
    602         return queriedBlockInfo.blockNumber;
    603     }
    604 
    605     /**
    606      * Decompresses the next byte from this input stream.
    607      *
    608      * @return      the next decompressed byte, or <code>-1</code>
    609      *              to indicate the end of the compressed stream
    610      *
    611      * @throws      CorruptedInputException
    612      * @throws      UnsupportedOptionsException
    613      * @throws      MemoryLimitException
    614      *
    615      * @throws      XZIOException if the stream has been closed
    616      *
    617      * @throws      IOException may be thrown by <code>in</code>
    618      */
    619     public int read() throws IOException {
    620         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
    621     }
    622 
    623     /**
    624      * Decompresses into an array of bytes.
    625      * <p>
    626      * If <code>len</code> is zero, no bytes are read and <code>0</code>
    627      * is returned. Otherwise this will try to decompress <code>len</code>
    628      * bytes of uncompressed data. Less than <code>len</code> bytes may
    629      * be read only in the following situations:
    630      * <ul>
    631      *   <li>The end of the compressed data was reached successfully.</li>
    632      *   <li>An error is detected after at least one but less than
    633      *       <code>len</code> bytes have already been successfully
    634      *       decompressed. The next call with non-zero <code>len</code>
    635      *       will immediately throw the pending exception.</li>
    636      *   <li>An exception is thrown.</li>
    637      * </ul>
    638      *
    639      * @param       buf         target buffer for uncompressed data
    640      * @param       off         start offset in <code>buf</code>
    641      * @param       len         maximum number of uncompressed bytes to read
    642      *
    643      * @return      number of bytes read, or <code>-1</code> to indicate
    644      *              the end of the compressed stream
    645      *
    646      * @throws      CorruptedInputException
    647      * @throws      UnsupportedOptionsException
    648      * @throws      MemoryLimitException
    649      *
    650      * @throws      XZIOException if the stream has been closed
    651      *
    652      * @throws      IOException may be thrown by <code>in</code>
    653      */
    654     public int read(byte[] buf, int off, int len) throws IOException {
    655         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
    656             throw new IndexOutOfBoundsException();
    657 
    658         if (len == 0)
    659             return 0;
    660 
    661         if (in == null)
    662             throw new XZIOException("Stream closed");
    663 
    664         if (exception != null)
    665             throw exception;
    666 
    667         int size = 0;
    668 
    669         try {
    670             if (seekNeeded)
    671                 seek();
    672 
    673             if (endReached)
    674                 return -1;
    675 
    676             while (len > 0) {
    677                 if (blockDecoder == null) {
    678                     seek();
    679                     if (endReached)
    680                         break;
    681                 }
    682 
    683                 int ret = blockDecoder.read(buf, off, len);
    684 
    685                 if (ret > 0) {
    686                     curPos += ret;
    687                     size += ret;
    688                     off += ret;
    689                     len -= ret;
    690                 } else if (ret == -1) {
    691                     blockDecoder = null;
    692                 }
    693             }
    694         } catch (IOException e) {
    695             // We know that the file isn't simply truncated because we could
    696             // parse the Indexes in the constructor. So convert EOFException
    697             // to CorruptedInputException.
    698             if (e instanceof EOFException)
    699                 e = new CorruptedInputException();
    700 
    701             exception = e;
    702             if (size == 0)
    703                 throw e;
    704         }
    705 
    706         return size;
    707     }
    708 
    709     /**
    710      * Returns the number of uncompressed bytes that can be read
    711      * without blocking. The value is returned with an assumption
    712      * that the compressed input data will be valid. If the compressed
    713      * data is corrupt, <code>CorruptedInputException</code> may get
    714      * thrown before the number of bytes claimed to be available have
    715      * been read from this input stream.
    716      *
    717      * @return      the number of uncompressed bytes that can be read
    718      *              without blocking
    719      */
    720     public int available() throws IOException {
    721         if (in == null)
    722             throw new XZIOException("Stream closed");
    723 
    724         if (exception != null)
    725             throw exception;
    726 
    727         if (endReached || seekNeeded || blockDecoder == null)
    728             return 0;
    729 
    730         return blockDecoder.available();
    731     }
    732 
    733     /**
    734      * Closes the stream and calls <code>in.close()</code>.
    735      * If the stream was already closed, this does nothing.
    736      *
    737      * @throws  IOException if thrown by <code>in.close()</code>
    738      */
    739     public void close() throws IOException {
    740         if (in != null) {
    741             try {
    742                 in.close();
    743             } finally {
    744                 in = null;
    745             }
    746         }
    747     }
    748 
    749     /**
    750      * Gets the uncompressed size of this input stream. If there are multiple
    751      * XZ Streams, the total uncompressed size of all XZ Streams is returned.
    752      */
    753     public long length() {
    754         return uncompressedSize;
    755     }
    756 
    757     /**
    758      * Gets the current uncompressed position in this input stream.
    759      *
    760      * @throws      XZIOException if the stream has been closed
    761      */
    762     public long position() throws IOException {
    763         if (in == null)
    764             throw new XZIOException("Stream closed");
    765 
    766         return seekNeeded ? seekPos : curPos;
    767     }
    768 
    769     /**
    770      * Seeks to the specified absolute uncompressed position in the stream.
    771      * This only stores the new position, so this function itself is always
    772      * very fast. The actual seek is done when <code>read</code> is called
    773      * to read at least one byte.
    774      * <p>
    775      * Seeking past the end of the stream is possible. In that case
    776      * <code>read</code> will return <code>-1</code> to indicate
    777      * the end of the stream.
    778      *
    779      * @param       pos         new uncompressed read position
    780      *
    781      * @throws      XZIOException
    782      *                          if <code>pos</code> is negative, or
    783      *                          if stream has been closed
    784      */
    785     public void seek(long pos) throws IOException {
    786         if (in == null)
    787             throw new XZIOException("Stream closed");
    788 
    789         if (pos < 0)
    790             throw new XZIOException("Negative seek position: " + pos);
    791 
    792         seekPos = pos;
    793         seekNeeded = true;
    794     }
    795 
    796     /**
    797      * Seeks to the beginning of the given XZ Block.
    798      *
    799      * @throws      XZIOException
    800      *              if <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    801      *              <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>,
    802      *              or if stream has been closed
    803      *
    804      * @since 1.3
    805      */
    806     public void seekToBlock(int blockNumber) throws IOException {
    807         if (in == null)
    808             throw new XZIOException("Stream closed");
    809 
    810         if (blockNumber < 0 || blockNumber >= blockCount)
    811             throw new XZIOException("Invalid XZ Block number: " + blockNumber);
    812 
    813         // This is a bit silly implementation. Here we locate the uncompressed
    814         // offset of the specified Block, then when doing the actual seek in
    815         // seek(), we need to find the Block number based on seekPos.
    816         seekPos = getBlockPos(blockNumber);
    817         seekNeeded = true;
    818     }
    819 
    820     /**
    821      * Does the actual seeking. This is also called when <code>read</code>
    822      * needs a new Block to decode.
    823      */
    824     private void seek() throws IOException {
    825         // If seek(long) wasn't called, we simply need to get the next Block
    826         // from the same Stream. If there are no more Blocks in this Stream,
    827         // then we behave as if seek(long) had been called.
    828         if (!seekNeeded) {
    829             if (curBlockInfo.hasNext()) {
    830                 curBlockInfo.setNext();
    831                 initBlockDecoder();
    832                 return;
    833             }
    834 
    835             seekPos = curPos;
    836         }
    837 
    838         seekNeeded = false;
    839 
    840         // Check if we are seeking to or past the end of the file.
    841         if (seekPos >= uncompressedSize) {
    842             curPos = seekPos;
    843             blockDecoder = null;
    844             endReached = true;
    845             return;
    846         }
    847 
    848         endReached = false;
    849 
    850         // Locate the Block that contains the uncompressed target position.
    851         locateBlockByPos(curBlockInfo, seekPos);
    852 
    853         // Seek in the underlying stream and create a new Block decoder
    854         // only if really needed. We can skip it if the current position
    855         // is already in the correct Block and the target position hasn't
    856         // been decompressed yet.
    857         //
    858         // NOTE: If curPos points to the beginning of this Block, it's
    859         // because it was left there after decompressing an earlier Block.
    860         // In that case, decoding of the current Block hasn't been started
    861         // yet. (Decoding of a Block won't be started until at least one
    862         // byte will also be read from it.)
    863         if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
    864             // Seek to the beginning of the Block.
    865             in.seek(curBlockInfo.compressedOffset);
    866 
    867             // Since it is possible that this Block is from a different
    868             // Stream than the previous Block, initialize a new Check.
    869             check = Check.getInstance(curBlockInfo.getCheckType());
    870 
    871             // Create a new Block decoder.
    872             initBlockDecoder();
    873             curPos = curBlockInfo.uncompressedOffset;
    874         }
    875 
    876         // If the target wasn't at a Block boundary, decompress and throw
    877         // away data to reach the target position.
    878         if (seekPos > curPos) {
    879             // NOTE: The "if" below is there just in case. In this situation,
    880             // blockDecoder.skip will always skip the requested amount
    881             // or throw an exception.
    882             long skipAmount = seekPos - curPos;
    883             if (blockDecoder.skip(skipAmount) != skipAmount)
    884                 throw new CorruptedInputException();
    885 
    886             curPos = seekPos;
    887         }
    888     }
    889 
    890     /**
    891      * Locates the Block that contains the given uncompressed position.
    892      */
    893     private void locateBlockByPos(BlockInfo info, long pos) {
    894         if (pos < 0 || pos >= uncompressedSize)
    895             throw new IndexOutOfBoundsException(
    896                     "Invalid uncompressed position: " + pos);
    897 
    898         // Locate the Stream that contains the target position.
    899         IndexDecoder index;
    900         for (int i = 0; ; ++i) {
    901             index = (IndexDecoder)streams.get(i);
    902             if (index.hasUncompressedOffset(pos))
    903                 break;
    904         }
    905 
    906         // Locate the Block from the Stream that contains the target position.
    907         index.locateBlock(info, pos);
    908 
    909         assert (info.compressedOffset & 3) == 0;
    910         assert info.uncompressedSize > 0;
    911         assert pos >= info.uncompressedOffset;
    912         assert pos < info.uncompressedOffset + info.uncompressedSize;
    913     }
    914 
    915     /**
    916      * Locates the given Block and stores information about it
    917      * to <code>info</code>.
    918      */
    919     private void locateBlockByNumber(BlockInfo info, int blockNumber) {
    920         // Validate.
    921         if (blockNumber < 0 || blockNumber >= blockCount)
    922             throw new IndexOutOfBoundsException(
    923                     "Invalid XZ Block number: " + blockNumber);
    924 
    925         // Skip the search if info already points to the correct Block.
    926         if (info.blockNumber == blockNumber)
    927             return;
    928 
    929         // Search the Stream that contains the given Block and then
    930         // search the Block from that Stream.
    931         for (int i = 0; ; ++i) {
    932             IndexDecoder index = (IndexDecoder)streams.get(i);
    933             if (index.hasRecord(blockNumber)) {
    934                 index.setBlockInfo(info, blockNumber);
    935                 return;
    936             }
    937         }
    938     }
    939 
    940     /**
    941      * Initializes a new BlockInputStream. This is a helper function for
    942      * <code>seek()</code>.
    943      */
    944     private void initBlockDecoder() throws IOException {
    945         try {
    946             // Set it to null first so that GC can collect it if memory
    947             // runs tight when initializing a new BlockInputStream.
    948             blockDecoder = null;
    949             blockDecoder = new BlockInputStream(
    950                     in, check, verifyCheck, memoryLimit,
    951                     curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize);
    952         } catch (MemoryLimitException e) {
    953             // BlockInputStream doesn't know how much memory we had
    954             // already needed so we need to recreate the exception.
    955             assert memoryLimit >= 0;
    956             throw new MemoryLimitException(
    957                     e.getMemoryNeeded() + indexMemoryUsage,
    958                     memoryLimit + indexMemoryUsage);
    959         } catch (IndexIndicatorException e) {
    960             // It cannot be Index so the file must be corrupt.
    961             throw new CorruptedInputException();
    962         }
    963     }
    964 }
    965