Home | History | Annotate | Download | only in xz
      1 /*
      2  * SeekableXZInputStream
      3  *
      4  * Author: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *
      6  * This file has been put into the public domain.
      7  * You can do whatever you want with this file.
      8  */
      9 
     10 package org.tukaani.xz;
     11 
     12 import java.util.Arrays;
     13 import java.util.ArrayList;
     14 import java.io.DataInputStream;
     15 import java.io.IOException;
     16 import java.io.EOFException;
     17 import org.tukaani.xz.common.DecoderUtil;
     18 import org.tukaani.xz.common.StreamFlags;
     19 import org.tukaani.xz.check.Check;
     20 import org.tukaani.xz.index.IndexDecoder;
     21 import org.tukaani.xz.index.BlockInfo;
     22 
     23 /**
     24  * Decompresses a .xz file in random access mode.
     25  * This supports decompressing concatenated .xz files.
     26  * <p>
     27  * Each .xz file consist of one or more Streams. Each Stream consist of zero
     28  * or more Blocks. Each Stream contains an Index of Streams' Blocks.
     29  * The Indexes from all Streams are loaded in RAM by a constructor of this
     30  * class. A typical .xz file has only one Stream, and parsing its Index will
     31  * need only three or four seeks.
     32  * <p>
     33  * To make random access possible, the data in a .xz file must be splitted
     34  * into multiple Blocks of reasonable size. Decompression can only start at
     35  * a Block boundary. When seeking to an uncompressed position that is not at
     36  * a Block boundary, decompression starts at the beginning of the Block and
     37  * throws away data until the target position is reached. Thus, smaller Blocks
     38  * mean faster seeks to arbitrary uncompressed positions. On the other hand,
     39  * smaller Blocks mean worse compression. So one has to make a compromise
     40  * between random access speed and compression ratio.
     41  * <p>
     42  * Implementation note: This class uses linear search to locate the correct
     43  * Stream from the data structures in RAM. It was the simplest to implement
     44  * and should be fine as long as there aren't too many Streams. The correct
     45  * Block inside a Stream is located using binary search and thus is fast
     46  * even with a huge number of Blocks.
     47  *
     48  * <h4>Memory usage</h4>
     49  * <p>
     50  * The amount of memory needed for the Indexes is taken into account when
     51  * checking the memory usage limit. Each Stream is calculated to need at
     52  * least 1&nbsp;KiB of memory and each Block 16 bytes of memory, rounded up
     53  * to the next kibibyte. So unless the file has a huge number of Streams or
     54  * Blocks, these don't take significant amount of memory.
     55  *
     56  * <h4>Creating random-accessible .xz files</h4>
     57  * <p>
     58  * When using {@link XZOutputStream}, a new Block can be started by calling
     59  * its {@link XZOutputStream#endBlock() endBlock} method. If you know
     60  * that the decompressor will only need to seek to certain uncompressed
     61  * positions, it can be a good idea to start a new Block at (some of) these
     62  * positions (and only at these positions to get better compression ratio).
     63  * <p>
     64  * liblzma in XZ Utils supports starting a new Block with
     65  * <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded
     66  * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha
     67  * also added the option <code>--block-size=SIZE</code> to the xz command
     68  * line tool. XZ Utils 5.1.2alpha added a partial implementation of
     69  * <code>--block-list=SIZES</code> which allows specifying sizes of
     70  * individual Blocks.
     71  *
     72  * @see SeekableFileInputStream
     73  * @see XZInputStream
     74  * @see XZOutputStream
     75  */
     76 public class SeekableXZInputStream extends SeekableInputStream {
     77     /**
     78      * Cache for big arrays.
     79      */
     80     private final ArrayCache arrayCache;
     81 
     82     /**
     83      * The input stream containing XZ compressed data.
     84      */
     85     private SeekableInputStream in;
     86 
     87     /**
     88      * Memory usage limit after the memory usage of the IndexDecoders have
     89      * been substracted.
     90      */
     91     private final int memoryLimit;
     92 
     93     /**
     94      * Memory usage of the IndexDecoders.
     95      * <code>memoryLimit + indexMemoryUsage</code> equals the original
     96      * memory usage limit that was passed to the constructor.
     97      */
     98     private int indexMemoryUsage = 0;
     99 
    100     /**
    101      * List of IndexDecoders, one for each Stream in the file.
    102      * The list is in reverse order: The first element is
    103      * the last Stream in the file.
    104      */
    105     private final ArrayList<IndexDecoder> streams
    106             = new ArrayList<IndexDecoder>();
    107 
    108     /**
    109      * Bitmask of all Check IDs seen.
    110      */
    111     private int checkTypes = 0;
    112 
    113     /**
    114      * Uncompressed size of the file (all Streams).
    115      */
    116     private long uncompressedSize = 0;
    117 
    118     /**
    119      * Uncompressed size of the largest XZ Block in the file.
    120      */
    121     private long largestBlockSize = 0;
    122 
    123     /**
    124      * Number of XZ Blocks in the file.
    125      */
    126     private int blockCount = 0;
    127 
    128     /**
    129      * Size and position information about the current Block.
    130      * If there are no Blocks, all values will be <code>-1</code>.
    131      */
    132     private final BlockInfo curBlockInfo;
    133 
    134     /**
    135      * Temporary (and cached) information about the Block whose information
    136      * is queried via <code>getBlockPos</code> and related functions.
    137      */
    138     private final BlockInfo queriedBlockInfo;
    139 
    140     /**
    141      * Integrity Check in the current XZ Stream. The constructor leaves
    142      * this to point to the Check of the first Stream.
    143      */
    144     private Check check;
    145 
    146     /**
    147      * Flag indicating if the integrity checks will be verified.
    148      */
    149     private final boolean verifyCheck;
    150 
    151     /**
    152      * Decoder of the current XZ Block, if any.
    153      */
    154     private BlockInputStream blockDecoder = null;
    155 
    156     /**
    157      * Current uncompressed position.
    158      */
    159     private long curPos = 0;
    160 
    161     /**
    162      * Target position for seeking.
    163      */
    164     private long seekPos;
    165 
    166     /**
    167      * True when <code>seek(long)</code> has been called but the actual
    168      * seeking hasn't been done yet.
    169      */
    170     private boolean seekNeeded = false;
    171 
    172     /**
    173      * True when end of the file was reached. This can be cleared by
    174      * calling <code>seek(long)</code>.
    175      */
    176     private boolean endReached = false;
    177 
    178     /**
    179      * Pending exception from an earlier error.
    180      */
    181     private IOException exception = null;
    182 
    183     /**
    184      * Temporary buffer for read(). This avoids reallocating memory
    185      * on every read() call.
    186      */
    187     private final byte[] tempBuf = new byte[1];
    188 
    189     /**
    190      * Creates a new seekable XZ decompressor without a memory usage limit.
    191      *
    192      * @param       in          seekable input stream containing one or more
    193      *                          XZ Streams; the whole input stream is used
    194      *
    195      * @throws      XZFormatException
    196      *                          input is not in the XZ format
    197      *
    198      * @throws      CorruptedInputException
    199      *                          XZ data is corrupt or truncated
    200      *
    201      * @throws      UnsupportedOptionsException
    202      *                          XZ headers seem valid but they specify
    203      *                          options not supported by this implementation
    204      *
    205      * @throws      EOFException
    206      *                          less than 6 bytes of input was available
    207      *                          from <code>in</code>, or (unlikely) the size
    208      *                          of the underlying stream got smaller while
    209      *                          this was reading from it
    210      *
    211      * @throws      IOException may be thrown by <code>in</code>
    212      */
    213     public SeekableXZInputStream(SeekableInputStream in)
    214             throws IOException {
    215         this(in, -1);
    216     }
    217 
    218     /**
    219      * Creates a new seekable XZ decompressor without a memory usage limit.
    220      * <p>
    221      * This is identical to
    222      * <code>SeekableXZInputStream(SeekableInputStream)</code> except that
    223      * this also takes the <code>arrayCache</code> argument.
    224      *
    225      * @param       in          seekable input stream containing one or more
    226      *                          XZ Streams; the whole input stream is used
    227      *
    228      * @param       arrayCache  cache to be used for allocating large arrays
    229      *
    230      * @throws      XZFormatException
    231      *                          input is not in the XZ format
    232      *
    233      * @throws      CorruptedInputException
    234      *                          XZ data is corrupt or truncated
    235      *
    236      * @throws      UnsupportedOptionsException
    237      *                          XZ headers seem valid but they specify
    238      *                          options not supported by this implementation
    239      *
    240      * @throws      EOFException
    241      *                          less than 6 bytes of input was available
    242      *                          from <code>in</code>, or (unlikely) the size
    243      *                          of the underlying stream got smaller while
    244      *                          this was reading from it
    245      *
    246      * @throws      IOException may be thrown by <code>in</code>
    247      *
    248      * @since 1.7
    249      */
    250     public SeekableXZInputStream(SeekableInputStream in, ArrayCache arrayCache)
    251             throws IOException {
    252         this(in, -1, arrayCache);
    253     }
    254 
    255     /**
    256      * Creates a new seekable XZ decomporessor with an optional
    257      * memory usage limit.
    258      *
    259      * @param       in          seekable input stream containing one or more
    260      *                          XZ Streams; the whole input stream is used
    261      *
    262      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    263      *                          or <code>-1</code> to impose no
    264      *                          memory usage limit
    265      *
    266      * @throws      XZFormatException
    267      *                          input is not in the XZ format
    268      *
    269      * @throws      CorruptedInputException
    270      *                          XZ data is corrupt or truncated
    271      *
    272      * @throws      UnsupportedOptionsException
    273      *                          XZ headers seem valid but they specify
    274      *                          options not supported by this implementation
    275      *
    276      * @throws      MemoryLimitException
    277      *                          decoded XZ Indexes would need more memory
    278      *                          than allowed by the memory usage limit
    279      *
    280      * @throws      EOFException
    281      *                          less than 6 bytes of input was available
    282      *                          from <code>in</code>, or (unlikely) the size
    283      *                          of the underlying stream got smaller while
    284      *                          this was reading from it
    285      *
    286      * @throws      IOException may be thrown by <code>in</code>
    287      */
    288     public SeekableXZInputStream(SeekableInputStream in, int memoryLimit)
    289             throws IOException {
    290         this(in, memoryLimit, true);
    291     }
    292 
    293     /**
    294      * Creates a new seekable XZ decomporessor with an optional
    295      * memory usage limit.
    296      * <p>
    297      * This is identical to
    298      * <code>SeekableXZInputStream(SeekableInputStream,int)</code>
    299      * except that this also takes the <code>arrayCache</code> argument.
    300      *
    301      * @param       in          seekable input stream containing one or more
    302      *                          XZ Streams; the whole input stream is used
    303      *
    304      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    305      *                          or <code>-1</code> to impose no
    306      *                          memory usage limit
    307      *
    308      * @param       arrayCache  cache to be used for allocating large arrays
    309      *
    310      * @throws      XZFormatException
    311      *                          input is not in the XZ format
    312      *
    313      * @throws      CorruptedInputException
    314      *                          XZ data is corrupt or truncated
    315      *
    316      * @throws      UnsupportedOptionsException
    317      *                          XZ headers seem valid but they specify
    318      *                          options not supported by this implementation
    319      *
    320      * @throws      MemoryLimitException
    321      *                          decoded XZ Indexes would need more memory
    322      *                          than allowed by the memory usage limit
    323      *
    324      * @throws      EOFException
    325      *                          less than 6 bytes of input was available
    326      *                          from <code>in</code>, or (unlikely) the size
    327      *                          of the underlying stream got smaller while
    328      *                          this was reading from it
    329      *
    330      * @throws      IOException may be thrown by <code>in</code>
    331      *
    332      * @since 1.7
    333      */
    334     public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
    335                                  ArrayCache arrayCache)
    336             throws IOException {
    337         this(in, memoryLimit, true, arrayCache);
    338     }
    339 
    340     /**
    341      * Creates a new seekable XZ decomporessor with an optional
    342      * memory usage limit and ability to disable verification
    343      * of integrity checks.
    344      * <p>
    345      * Note that integrity check verification should almost never be disabled.
    346      * Possible reasons to disable integrity check verification:
    347      * <ul>
    348      *   <li>Trying to recover data from a corrupt .xz file.</li>
    349      *   <li>Speeding up decompression. This matters mostly with SHA-256
    350      *   or with files that have compressed extremely well. It's recommended
    351      *   that integrity checking isn't disabled for performance reasons
    352      *   unless the file integrity is verified externally in some other
    353      *   way.</li>
    354      * </ul>
    355      * <p>
    356      * <code>verifyCheck</code> only affects the integrity check of
    357      * the actual compressed data. The CRC32 fields in the headers
    358      * are always verified.
    359      *
    360      * @param       in          seekable input stream containing one or more
    361      *                          XZ Streams; the whole input stream is used
    362      *
    363      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    364      *                          or <code>-1</code> to impose no
    365      *                          memory usage limit
    366      *
    367      * @param       verifyCheck if <code>true</code>, the integrity checks
    368      *                          will be verified; this should almost never
    369      *                          be set to <code>false</code>
    370      *
    371      * @throws      XZFormatException
    372      *                          input is not in the XZ format
    373      *
    374      * @throws      CorruptedInputException
    375      *                          XZ data is corrupt or truncated
    376      *
    377      * @throws      UnsupportedOptionsException
    378      *                          XZ headers seem valid but they specify
    379      *                          options not supported by this implementation
    380      *
    381      * @throws      MemoryLimitException
    382      *                          decoded XZ Indexes would need more memory
    383      *                          than allowed by the memory usage limit
    384      *
    385      * @throws      EOFException
    386      *                          less than 6 bytes of input was available
    387      *                          from <code>in</code>, or (unlikely) the size
    388      *                          of the underlying stream got smaller while
    389      *                          this was reading from it
    390      *
    391      * @throws      IOException may be thrown by <code>in</code>
    392      *
    393      * @since 1.6
    394      */
    395     public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
    396                                  boolean verifyCheck)
    397             throws IOException {
    398         this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
    399     }
    400 
    401     /**
    402      * Creates a new seekable XZ decomporessor with an optional
    403      * memory usage limit and ability to disable verification
    404      * of integrity checks.
    405      * <p>
    406      * This is identical to
    407      * <code>SeekableXZInputStream(SeekableInputStream,int,boolean)</code>
    408      * except that this also takes the <code>arrayCache</code> argument.
    409      *
    410      * @param       in          seekable input stream containing one or more
    411      *                          XZ Streams; the whole input stream is used
    412      *
    413      * @param       memoryLimit memory usage limit in kibibytes (KiB)
    414      *                          or <code>-1</code> to impose no
    415      *                          memory usage limit
    416      *
    417      * @param       verifyCheck if <code>true</code>, the integrity checks
    418      *                          will be verified; this should almost never
    419      *                          be set to <code>false</code>
    420      *
    421      * @param       arrayCache  cache to be used for allocating large arrays
    422      *
    423      * @throws      XZFormatException
    424      *                          input is not in the XZ format
    425      *
    426      * @throws      CorruptedInputException
    427      *                          XZ data is corrupt or truncated
    428      *
    429      * @throws      UnsupportedOptionsException
    430      *                          XZ headers seem valid but they specify
    431      *                          options not supported by this implementation
    432      *
    433      * @throws      MemoryLimitException
    434      *                          decoded XZ Indexes would need more memory
    435      *                          than allowed by the memory usage limit
    436      *
    437      * @throws      EOFException
    438      *                          less than 6 bytes of input was available
    439      *                          from <code>in</code>, or (unlikely) the size
    440      *                          of the underlying stream got smaller while
    441      *                          this was reading from it
    442      *
    443      * @throws      IOException may be thrown by <code>in</code>
    444      *
    445      * @since 1.7
    446      */
    447     public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
    448                                  boolean verifyCheck, ArrayCache arrayCache)
    449             throws IOException {
    450         this.arrayCache = arrayCache;
    451         this.verifyCheck = verifyCheck;
    452         this.in = in;
    453         DataInputStream inData = new DataInputStream(in);
    454 
    455         // Check the magic bytes in the beginning of the file.
    456         {
    457             in.seek(0);
    458             byte[] buf = new byte[XZ.HEADER_MAGIC.length];
    459             inData.readFully(buf);
    460             if (!Arrays.equals(buf, XZ.HEADER_MAGIC))
    461                 throw new XZFormatException();
    462         }
    463 
    464         // Get the file size and verify that it is a multiple of 4 bytes.
    465         long pos = in.length();
    466         if ((pos & 3) != 0)
    467             throw new CorruptedInputException(
    468                     "XZ file size is not a multiple of 4 bytes");
    469 
    470         // Parse the headers starting from the end of the file.
    471         byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
    472         long streamPadding = 0;
    473 
    474         while (pos > 0) {
    475             if (pos < DecoderUtil.STREAM_HEADER_SIZE)
    476                 throw new CorruptedInputException();
    477 
    478             // Read the potential Stream Footer.
    479             in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE);
    480             inData.readFully(buf);
    481 
    482             // Skip Stream Padding four bytes at a time.
    483             // Skipping more at once would be faster,
    484             // but usually there isn't much Stream Padding.
    485             if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00
    486                     && buf[11] == 0x00) {
    487                 streamPadding += 4;
    488                 pos -= 4;
    489                 continue;
    490             }
    491 
    492             // It's not Stream Padding. Update pos.
    493             pos -= DecoderUtil.STREAM_HEADER_SIZE;
    494 
    495             // Decode the Stream Footer and check if Backward Size
    496             // looks reasonable.
    497             StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf);
    498             if (streamFooter.backwardSize >= pos)
    499                 throw new CorruptedInputException(
    500                         "Backward Size in XZ Stream Footer is too big");
    501 
    502             // Check that the Check ID is supported. Store it in case this
    503             // is the first Stream in the file.
    504             check = Check.getInstance(streamFooter.checkType);
    505 
    506             // Remember which Check IDs have been seen.
    507             checkTypes |= 1 << streamFooter.checkType;
    508 
    509             // Seek to the beginning of the Index.
    510             in.seek(pos - streamFooter.backwardSize);
    511 
    512             // Decode the Index field.
    513             IndexDecoder index;
    514             try {
    515                 index = new IndexDecoder(in, streamFooter, streamPadding,
    516                                          memoryLimit);
    517             } catch (MemoryLimitException e) {
    518                 // IndexDecoder doesn't know how much memory we had
    519                 // already needed so we need to recreate the exception.
    520                 assert memoryLimit >= 0;
    521                 throw new MemoryLimitException(
    522                         e.getMemoryNeeded() + indexMemoryUsage,
    523                         memoryLimit + indexMemoryUsage);
    524             }
    525 
    526             // Update the memory usage and limit counters.
    527             indexMemoryUsage += index.getMemoryUsage();
    528             if (memoryLimit >= 0) {
    529                 memoryLimit -= index.getMemoryUsage();
    530                 assert memoryLimit >= 0;
    531             }
    532 
    533             // Remember the uncompressed size of the largest Block.
    534             if (largestBlockSize < index.getLargestBlockSize())
    535                 largestBlockSize = index.getLargestBlockSize();
    536 
    537             // Calculate the offset to the beginning of this XZ Stream and
    538             // check that it looks sane.
    539             long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE;
    540             if (pos < off)
    541                 throw new CorruptedInputException("XZ Index indicates "
    542                         + "too big compressed size for the XZ Stream");
    543 
    544             // Seek to the beginning of this Stream.
    545             pos -= off;
    546             in.seek(pos);
    547 
    548             // Decode the Stream Header.
    549             inData.readFully(buf);
    550             StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf);
    551 
    552             // Verify that the Stream Header matches the Stream Footer.
    553             if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter))
    554                 throw new CorruptedInputException(
    555                         "XZ Stream Footer does not match Stream Header");
    556 
    557             // Update the total uncompressed size of the file and check that
    558             // it doesn't overflow.
    559             uncompressedSize += index.getUncompressedSize();
    560             if (uncompressedSize < 0)
    561                 throw new UnsupportedOptionsException("XZ file is too big");
    562 
    563             // Update the Block count and check that it fits into an int.
    564             blockCount += index.getRecordCount();
    565             if (blockCount < 0)
    566                 throw new UnsupportedOptionsException(
    567                         "XZ file has over " + Integer.MAX_VALUE + " Blocks");
    568 
    569             // Add this Stream to the list of Streams.
    570             streams.add(index);
    571 
    572             // Reset to be ready to parse the next Stream.
    573             streamPadding = 0;
    574         }
    575 
    576         assert pos == 0;
    577 
    578         // Save it now that indexMemoryUsage has been substracted from it.
    579         this.memoryLimit = memoryLimit;
    580 
    581         // Store the relative offsets of the Streams. This way we don't
    582         // need to recalculate them in this class when seeking; the
    583         // IndexDecoder instances will handle them.
    584         IndexDecoder prev = streams.get(streams.size() - 1);
    585         for (int i = streams.size() - 2; i >= 0; --i) {
    586             IndexDecoder cur = streams.get(i);
    587             cur.setOffsets(prev);
    588             prev = cur;
    589         }
    590 
    591         // Initialize curBlockInfo to point to the first Stream.
    592         // The blockNumber will be left to -1 so that .hasNext()
    593         // and .setNext() work to get the first Block when starting
    594         // to decompress from the beginning of the file.
    595         IndexDecoder first = streams.get(streams.size() - 1);
    596         curBlockInfo = new BlockInfo(first);
    597 
    598         // queriedBlockInfo needs to be allocated too. The Stream used for
    599         // initialization doesn't matter though.
    600         queriedBlockInfo = new BlockInfo(first);
    601     }
    602 
    603     /**
    604      * Gets the types of integrity checks used in the .xz file.
    605      * Multiple checks are possible only if there are multiple
    606      * concatenated XZ Streams.
    607      * <p>
    608      * The returned value has a bit set for every check type that is present.
    609      * For example, if CRC64 and SHA-256 were used, the return value is
    610      * <code>(1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_CRC64)
    611      * | (1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_SHA256)</code>.
    612      */
    613     public int getCheckTypes() {
    614         return checkTypes;
    615     }
    616 
    617     /**
    618      * Gets the amount of memory in kibibytes (KiB) used by
    619      * the data structures needed to locate the XZ Blocks.
    620      * This is usually useless information but since it is calculated
    621      * for memory usage limit anyway, it is nice to make it available to too.
    622      */
    623     public int getIndexMemoryUsage() {
    624         return indexMemoryUsage;
    625     }
    626 
    627     /**
    628      * Gets the uncompressed size of the largest XZ Block in bytes.
    629      * This can be useful if you want to check that the file doesn't
    630      * have huge XZ Blocks which could make seeking to arbitrary offsets
    631      * very slow. Note that huge Blocks don't automatically mean that
    632      * seeking would be slow, for example, seeking to the beginning of
    633      * any Block is always fast.
    634      */
    635     public long getLargestBlockSize() {
    636         return largestBlockSize;
    637     }
    638 
    639     /**
    640      * Gets the number of Streams in the .xz file.
    641      *
    642      * @since 1.3
    643      */
    644     public int getStreamCount() {
    645         return streams.size();
    646     }
    647 
    648     /**
    649      * Gets the number of Blocks in the .xz file.
    650      *
    651      * @since 1.3
    652      */
    653     public int getBlockCount() {
    654         return blockCount;
    655     }
    656 
    657     /**
    658      * Gets the uncompressed start position of the given Block.
    659      *
    660      * @throws  IndexOutOfBoundsException if
    661      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    662      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    663      *
    664      * @since 1.3
    665      */
    666     public long getBlockPos(int blockNumber) {
    667         locateBlockByNumber(queriedBlockInfo, blockNumber);
    668         return queriedBlockInfo.uncompressedOffset;
    669     }
    670 
    671     /**
    672      * Gets the uncompressed size of the given Block.
    673      *
    674      * @throws  IndexOutOfBoundsException if
    675      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    676      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    677      *
    678      * @since 1.3
    679      */
    680     public long getBlockSize(int blockNumber) {
    681         locateBlockByNumber(queriedBlockInfo, blockNumber);
    682         return queriedBlockInfo.uncompressedSize;
    683     }
    684 
    685     /**
    686      * Gets the position where the given compressed Block starts in
    687      * the underlying .xz file.
    688      * This information is rarely useful to the users of this class.
    689      *
    690      * @throws  IndexOutOfBoundsException if
    691      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    692      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    693      *
    694      * @since 1.3
    695      */
    696     public long getBlockCompPos(int blockNumber) {
    697         locateBlockByNumber(queriedBlockInfo, blockNumber);
    698         return queriedBlockInfo.compressedOffset;
    699     }
    700 
    701     /**
    702      * Gets the compressed size of the given Block.
    703      * This together with the uncompressed size can be used to calculate
    704      * the compression ratio of the specific Block.
    705      *
    706      * @throws  IndexOutOfBoundsException if
    707      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    708      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    709      *
    710      * @since 1.3
    711      */
    712     public long getBlockCompSize(int blockNumber) {
    713         locateBlockByNumber(queriedBlockInfo, blockNumber);
    714         return (queriedBlockInfo.unpaddedSize + 3) & ~3;
    715     }
    716 
    717     /**
    718      * Gets integrity check type (Check ID) of the given Block.
    719      *
    720      * @throws  IndexOutOfBoundsException if
    721      *          <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    722      *          <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
    723      *
    724      * @see #getCheckTypes()
    725      *
    726      * @since 1.3
    727      */
    728     public int getBlockCheckType(int blockNumber) {
    729         locateBlockByNumber(queriedBlockInfo, blockNumber);
    730         return queriedBlockInfo.getCheckType();
    731     }
    732 
    733     /**
    734      * Gets the number of the Block that contains the byte at the given
    735      * uncompressed position.
    736      *
    737      * @throws  IndexOutOfBoundsException if
    738      *          <code>pos&nbsp;&lt;&nbsp;0</code> or
    739      *          <code>pos&nbsp;&gt;=&nbsp;length()</code>.
    740      *
    741      * @since 1.3
    742      */
    743     public int getBlockNumber(long pos) {
    744         locateBlockByPos(queriedBlockInfo, pos);
    745         return queriedBlockInfo.blockNumber;
    746     }
    747 
    748     /**
    749      * Decompresses the next byte from this input stream.
    750      *
    751      * @return      the next decompressed byte, or <code>-1</code>
    752      *              to indicate the end of the compressed stream
    753      *
    754      * @throws      CorruptedInputException
    755      * @throws      UnsupportedOptionsException
    756      * @throws      MemoryLimitException
    757      *
    758      * @throws      XZIOException if the stream has been closed
    759      *
    760      * @throws      IOException may be thrown by <code>in</code>
    761      */
    762     public int read() throws IOException {
    763         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
    764     }
    765 
    766     /**
    767      * Decompresses into an array of bytes.
    768      * <p>
    769      * If <code>len</code> is zero, no bytes are read and <code>0</code>
    770      * is returned. Otherwise this will try to decompress <code>len</code>
    771      * bytes of uncompressed data. Less than <code>len</code> bytes may
    772      * be read only in the following situations:
    773      * <ul>
    774      *   <li>The end of the compressed data was reached successfully.</li>
    775      *   <li>An error is detected after at least one but less than
    776      *       <code>len</code> bytes have already been successfully
    777      *       decompressed. The next call with non-zero <code>len</code>
    778      *       will immediately throw the pending exception.</li>
    779      *   <li>An exception is thrown.</li>
    780      * </ul>
    781      *
    782      * @param       buf         target buffer for uncompressed data
    783      * @param       off         start offset in <code>buf</code>
    784      * @param       len         maximum number of uncompressed bytes to read
    785      *
    786      * @return      number of bytes read, or <code>-1</code> to indicate
    787      *              the end of the compressed stream
    788      *
    789      * @throws      CorruptedInputException
    790      * @throws      UnsupportedOptionsException
    791      * @throws      MemoryLimitException
    792      *
    793      * @throws      XZIOException if the stream has been closed
    794      *
    795      * @throws      IOException may be thrown by <code>in</code>
    796      */
    797     public int read(byte[] buf, int off, int len) throws IOException {
    798         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
    799             throw new IndexOutOfBoundsException();
    800 
    801         if (len == 0)
    802             return 0;
    803 
    804         if (in == null)
    805             throw new XZIOException("Stream closed");
    806 
    807         if (exception != null)
    808             throw exception;
    809 
    810         int size = 0;
    811 
    812         try {
    813             if (seekNeeded)
    814                 seek();
    815 
    816             if (endReached)
    817                 return -1;
    818 
    819             while (len > 0) {
    820                 if (blockDecoder == null) {
    821                     seek();
    822                     if (endReached)
    823                         break;
    824                 }
    825 
    826                 int ret = blockDecoder.read(buf, off, len);
    827 
    828                 if (ret > 0) {
    829                     curPos += ret;
    830                     size += ret;
    831                     off += ret;
    832                     len -= ret;
    833                 } else if (ret == -1) {
    834                     blockDecoder = null;
    835                 }
    836             }
    837         } catch (IOException e) {
    838             // We know that the file isn't simply truncated because we could
    839             // parse the Indexes in the constructor. So convert EOFException
    840             // to CorruptedInputException.
    841             if (e instanceof EOFException)
    842                 e = new CorruptedInputException();
    843 
    844             exception = e;
    845             if (size == 0)
    846                 throw e;
    847         }
    848 
    849         return size;
    850     }
    851 
    852     /**
    853      * Returns the number of uncompressed bytes that can be read
    854      * without blocking. The value is returned with an assumption
    855      * that the compressed input data will be valid. If the compressed
    856      * data is corrupt, <code>CorruptedInputException</code> may get
    857      * thrown before the number of bytes claimed to be available have
    858      * been read from this input stream.
    859      *
    860      * @return      the number of uncompressed bytes that can be read
    861      *              without blocking
    862      */
    863     public int available() throws IOException {
    864         if (in == null)
    865             throw new XZIOException("Stream closed");
    866 
    867         if (exception != null)
    868             throw exception;
    869 
    870         if (endReached || seekNeeded || blockDecoder == null)
    871             return 0;
    872 
    873         return blockDecoder.available();
    874     }
    875 
    876     /**
    877      * Closes the stream and calls <code>in.close()</code>.
    878      * If the stream was already closed, this does nothing.
    879      * <p>
    880      * This is equivalent to <code>close(true)</code>.
    881      *
    882      * @throws  IOException if thrown by <code>in.close()</code>
    883      */
    884     public void close() throws IOException {
    885         close(true);
    886     }
    887 
    888     /**
    889      * Closes the stream and optionally calls <code>in.close()</code>.
    890      * If the stream was already closed, this does nothing.
    891      * If <code>close(false)</code> has been called, a further
    892      * call of <code>close(true)</code> does nothing (it doesn't call
    893      * <code>in.close()</code>).
    894      * <p>
    895      * If you don't want to close the underlying <code>InputStream</code>,
    896      * there is usually no need to worry about closing this stream either;
    897      * it's fine to do nothing and let the garbage collector handle it.
    898      * However, if you are using {@link ArrayCache}, <code>close(false)</code>
    899      * can be useful to put the allocated arrays back to the cache without
    900      * closing the underlying <code>InputStream</code>.
    901      * <p>
    902      * Note that if you successfully reach the end of the stream
    903      * (<code>read</code> returns <code>-1</code>), the arrays are
    904      * automatically put back to the cache by that <code>read</code> call. In
    905      * this situation <code>close(false)</code> is redundant (but harmless).
    906      *
    907      * @throws  IOException if thrown by <code>in.close()</code>
    908      *
    909      * @since 1.7
    910      */
    911     public void close(boolean closeInput) throws IOException {
    912         if (in != null) {
    913             if (blockDecoder != null) {
    914                 blockDecoder.close();
    915                 blockDecoder = null;
    916             }
    917 
    918             try {
    919                 if (closeInput)
    920                     in.close();
    921             } finally {
    922                 in = null;
    923             }
    924         }
    925     }
    926 
    927     /**
    928      * Gets the uncompressed size of this input stream. If there are multiple
    929      * XZ Streams, the total uncompressed size of all XZ Streams is returned.
    930      */
    931     public long length() {
    932         return uncompressedSize;
    933     }
    934 
    935     /**
    936      * Gets the current uncompressed position in this input stream.
    937      *
    938      * @throws      XZIOException if the stream has been closed
    939      */
    940     public long position() throws IOException {
    941         if (in == null)
    942             throw new XZIOException("Stream closed");
    943 
    944         return seekNeeded ? seekPos : curPos;
    945     }
    946 
    947     /**
    948      * Seeks to the specified absolute uncompressed position in the stream.
    949      * This only stores the new position, so this function itself is always
    950      * very fast. The actual seek is done when <code>read</code> is called
    951      * to read at least one byte.
    952      * <p>
    953      * Seeking past the end of the stream is possible. In that case
    954      * <code>read</code> will return <code>-1</code> to indicate
    955      * the end of the stream.
    956      *
    957      * @param       pos         new uncompressed read position
    958      *
    959      * @throws      XZIOException
    960      *                          if <code>pos</code> is negative, or
    961      *                          if stream has been closed
    962      */
    963     public void seek(long pos) throws IOException {
    964         if (in == null)
    965             throw new XZIOException("Stream closed");
    966 
    967         if (pos < 0)
    968             throw new XZIOException("Negative seek position: " + pos);
    969 
    970         seekPos = pos;
    971         seekNeeded = true;
    972     }
    973 
    974     /**
    975      * Seeks to the beginning of the given XZ Block.
    976      *
    977      * @throws      XZIOException
    978      *              if <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
    979      *              <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>,
    980      *              or if stream has been closed
    981      *
    982      * @since 1.3
    983      */
    984     public void seekToBlock(int blockNumber) throws IOException {
    985         if (in == null)
    986             throw new XZIOException("Stream closed");
    987 
    988         if (blockNumber < 0 || blockNumber >= blockCount)
    989             throw new XZIOException("Invalid XZ Block number: " + blockNumber);
    990 
    991         // This is a bit silly implementation. Here we locate the uncompressed
    992         // offset of the specified Block, then when doing the actual seek in
    993         // seek(), we need to find the Block number based on seekPos.
    994         seekPos = getBlockPos(blockNumber);
    995         seekNeeded = true;
    996     }
    997 
    998     /**
    999      * Does the actual seeking. This is also called when <code>read</code>
   1000      * needs a new Block to decode.
   1001      */
   1002     private void seek() throws IOException {
   1003         // If seek(long) wasn't called, we simply need to get the next Block
   1004         // from the same Stream. If there are no more Blocks in this Stream,
   1005         // then we behave as if seek(long) had been called.
   1006         if (!seekNeeded) {
   1007             if (curBlockInfo.hasNext()) {
   1008                 curBlockInfo.setNext();
   1009                 initBlockDecoder();
   1010                 return;
   1011             }
   1012 
   1013             seekPos = curPos;
   1014         }
   1015 
   1016         seekNeeded = false;
   1017 
   1018         // Check if we are seeking to or past the end of the file.
   1019         if (seekPos >= uncompressedSize) {
   1020             curPos = seekPos;
   1021 
   1022             if (blockDecoder != null) {
   1023                 blockDecoder.close();
   1024                 blockDecoder = null;
   1025             }
   1026 
   1027             endReached = true;
   1028             return;
   1029         }
   1030 
   1031         endReached = false;
   1032 
   1033         // Locate the Block that contains the uncompressed target position.
   1034         locateBlockByPos(curBlockInfo, seekPos);
   1035 
   1036         // Seek in the underlying stream and create a new Block decoder
   1037         // only if really needed. We can skip it if the current position
   1038         // is already in the correct Block and the target position hasn't
   1039         // been decompressed yet.
   1040         //
   1041         // NOTE: If curPos points to the beginning of this Block, it's
   1042         // because it was left there after decompressing an earlier Block.
   1043         // In that case, decoding of the current Block hasn't been started
   1044         // yet. (Decoding of a Block won't be started until at least one
   1045         // byte will also be read from it.)
   1046         if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
   1047             // Seek to the beginning of the Block.
   1048             in.seek(curBlockInfo.compressedOffset);
   1049 
   1050             // Since it is possible that this Block is from a different
   1051             // Stream than the previous Block, initialize a new Check.
   1052             check = Check.getInstance(curBlockInfo.getCheckType());
   1053 
   1054             // Create a new Block decoder.
   1055             initBlockDecoder();
   1056             curPos = curBlockInfo.uncompressedOffset;
   1057         }
   1058 
   1059         // If the target wasn't at a Block boundary, decompress and throw
   1060         // away data to reach the target position.
   1061         if (seekPos > curPos) {
   1062             // NOTE: The "if" below is there just in case. In this situation,
   1063             // blockDecoder.skip will always skip the requested amount
   1064             // or throw an exception.
   1065             long skipAmount = seekPos - curPos;
   1066             if (blockDecoder.skip(skipAmount) != skipAmount)
   1067                 throw new CorruptedInputException();
   1068 
   1069             curPos = seekPos;
   1070         }
   1071     }
   1072 
   1073     /**
   1074      * Locates the Block that contains the given uncompressed position.
   1075      */
   1076     private void locateBlockByPos(BlockInfo info, long pos) {
   1077         if (pos < 0 || pos >= uncompressedSize)
   1078             throw new IndexOutOfBoundsException(
   1079                     "Invalid uncompressed position: " + pos);
   1080 
   1081         // Locate the Stream that contains the target position.
   1082         IndexDecoder index;
   1083         for (int i = 0; ; ++i) {
   1084             index = streams.get(i);
   1085             if (index.hasUncompressedOffset(pos))
   1086                 break;
   1087         }
   1088 
   1089         // Locate the Block from the Stream that contains the target position.
   1090         index.locateBlock(info, pos);
   1091 
   1092         assert (info.compressedOffset & 3) == 0;
   1093         assert info.uncompressedSize > 0;
   1094         assert pos >= info.uncompressedOffset;
   1095         assert pos < info.uncompressedOffset + info.uncompressedSize;
   1096     }
   1097 
   1098     /**
   1099      * Locates the given Block and stores information about it
   1100      * to <code>info</code>.
   1101      */
   1102     private void locateBlockByNumber(BlockInfo info, int blockNumber) {
   1103         // Validate.
   1104         if (blockNumber < 0 || blockNumber >= blockCount)
   1105             throw new IndexOutOfBoundsException(
   1106                     "Invalid XZ Block number: " + blockNumber);
   1107 
   1108         // Skip the search if info already points to the correct Block.
   1109         if (info.blockNumber == blockNumber)
   1110             return;
   1111 
   1112         // Search the Stream that contains the given Block and then
   1113         // search the Block from that Stream.
   1114         for (int i = 0; ; ++i) {
   1115             IndexDecoder index = streams.get(i);
   1116             if (index.hasRecord(blockNumber)) {
   1117                 index.setBlockInfo(info, blockNumber);
   1118                 return;
   1119             }
   1120         }
   1121     }
   1122 
   1123     /**
   1124      * Initializes a new BlockInputStream. This is a helper function for
   1125      * <code>seek()</code>.
   1126      */
   1127     private void initBlockDecoder() throws IOException {
   1128         try {
   1129             // Set it to null first so that GC can collect it if memory
   1130             // runs tight when initializing a new BlockInputStream.
   1131             if (blockDecoder != null) {
   1132                 blockDecoder.close();
   1133                 blockDecoder = null;
   1134             }
   1135 
   1136             blockDecoder = new BlockInputStream(
   1137                     in, check, verifyCheck, memoryLimit,
   1138                     curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize,
   1139                     arrayCache);
   1140         } catch (MemoryLimitException e) {
   1141             // BlockInputStream doesn't know how much memory we had
   1142             // already needed so we need to recreate the exception.
   1143             assert memoryLimit >= 0;
   1144             throw new MemoryLimitException(
   1145                     e.getMemoryNeeded() + indexMemoryUsage,
   1146                     memoryLimit + indexMemoryUsage);
   1147         } catch (IndexIndicatorException e) {
   1148             // It cannot be Index so the file must be corrupt.
   1149             throw new CorruptedInputException();
   1150         }
   1151     }
   1152 }
   1153