Home | History | Annotate | Download | only in index
      1 /*
      2  * IndexDecoder
      3  *
      4  * Author: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *
      6  * This file has been put into the public domain.
      7  * You can do whatever you want with this file.
      8  */
      9 
     10 package org.tukaani.xz.index;
     11 
     12 import java.io.IOException;
     13 import java.io.EOFException;
     14 import java.util.zip.CheckedInputStream;
     15 import org.tukaani.xz.common.DecoderUtil;
     16 import org.tukaani.xz.common.StreamFlags;
     17 import org.tukaani.xz.SeekableInputStream;
     18 import org.tukaani.xz.CorruptedInputException;
     19 import org.tukaani.xz.MemoryLimitException;
     20 import org.tukaani.xz.UnsupportedOptionsException;
     21 
     22 public class IndexDecoder extends IndexBase {
     23     private final StreamFlags streamFlags;
     24     private final long streamPadding;
     25     private final int memoryUsage;
     26 
     27     // Unpadded Size and Uncompressed Size fields
     28     private final long[] unpadded;
     29     private final long[] uncompressed;
     30 
     31     // Uncompressed size of the largest Block. It is used by
     32     // SeekableXZInputStream to find out the largest Block of the .xz file.
     33     private long largestBlockSize = 0;
     34 
     35     // Offsets relative to the beginning of the .xz file. These are all zero
     36     // for the first Stream in the file.
     37     private int recordOffset = 0;
     38     private long compressedOffset = 0;
     39     private long uncompressedOffset = 0;
     40 
     41     public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags,
     42                         long streamPadding, int memoryLimit)
     43             throws IOException {
     44         super(new CorruptedInputException("XZ Index is corrupt"));
     45         this.streamFlags = streamFooterFlags;
     46         this.streamPadding = streamPadding;
     47 
     48         // If endPos is exceeded before the CRC32 field has been decoded,
     49         // the Index is corrupt.
     50         long endPos = in.position() + streamFooterFlags.backwardSize - 4;
     51 
     52         java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
     53         CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
     54 
     55         // Index Indicator
     56         if (inChecked.read() != 0x00)
     57             throw new CorruptedInputException("XZ Index is corrupt");
     58 
     59         try {
     60             // Number of Records
     61             long count = DecoderUtil.decodeVLI(inChecked);
     62 
     63             // Catch Record counts that obviously too high to be valid.
     64             // This test isn't exact because it ignores Index Indicator,
     65             // Number of Records, and CRC32 fields, but this is good enough
     66             // to catch the most obvious problems.
     67             if (count >= streamFooterFlags.backwardSize / 2)
     68                 throw new CorruptedInputException("XZ Index is corrupt");
     69 
     70             // If the Record count doesn't fit into an int, we cannot
     71             // allocate the arrays to hold the Records.
     72             if (count > Integer.MAX_VALUE)
     73                 throw new UnsupportedOptionsException("XZ Index has over "
     74                         + Integer.MAX_VALUE + " Records");
     75 
     76             // Calculate approximate memory requirements and check the
     77             // memory usage limit.
     78             memoryUsage = 1 + (int)((16L * count + 1023) / 1024);
     79             if (memoryLimit >= 0 && memoryUsage > memoryLimit)
     80                 throw new MemoryLimitException(memoryUsage, memoryLimit);
     81 
     82             // Allocate the arrays for the Records.
     83             unpadded = new long[(int)count];
     84             uncompressed = new long[(int)count];
     85             int record = 0;
     86 
     87             // Decode the Records.
     88             for (int i = (int)count; i > 0; --i) {
     89                 // Get the next Record.
     90                 long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
     91                 long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
     92 
     93                 // Check that the input position stays sane. Since this is
     94                 // checked only once per loop iteration instead of for
     95                 // every input byte read, it's still possible that
     96                 // EOFException gets thrown with corrupt input.
     97                 if (in.position() > endPos)
     98                     throw new CorruptedInputException("XZ Index is corrupt");
     99 
    100                 // Add the new Record.
    101                 unpadded[record] = blocksSum + unpaddedSize;
    102                 uncompressed[record] = uncompressedSum + uncompressedSize;
    103                 ++record;
    104                 super.add(unpaddedSize, uncompressedSize);
    105                 assert record == recordCount;
    106 
    107                 // Remember the uncompressed size of the largest Block.
    108                 if (largestBlockSize < uncompressedSize)
    109                     largestBlockSize = uncompressedSize;
    110             }
    111         } catch (EOFException e) {
    112             // EOFException is caught just in case a corrupt input causes
    113             // DecoderUtil.decodeVLI to read too much at once.
    114             throw new CorruptedInputException("XZ Index is corrupt");
    115         }
    116 
    117         // Validate that the size of the Index field matches
    118         // Backward Size.
    119         int indexPaddingSize = getIndexPaddingSize();
    120         if (in.position() + indexPaddingSize != endPos)
    121             throw new CorruptedInputException("XZ Index is corrupt");
    122 
    123         // Index Padding
    124         while (indexPaddingSize-- > 0)
    125             if (inChecked.read() != 0x00)
    126                 throw new CorruptedInputException("XZ Index is corrupt");
    127 
    128         // CRC32
    129         long value = crc32.getValue();
    130         for (int i = 0; i < 4; ++i)
    131             if (((value >>> (i * 8)) & 0xFF) != in.read())
    132                 throw new CorruptedInputException("XZ Index is corrupt");
    133     }
    134 
    135     public void setOffsets(IndexDecoder prev) {
    136         // NOTE: SeekableXZInputStream checks that the total number of Blocks
    137         // in concatenated Streams fits into an int.
    138         recordOffset = prev.recordOffset + (int)prev.recordCount;
    139         compressedOffset = prev.compressedOffset
    140                            + prev.getStreamSize() + prev.streamPadding;
    141         assert (compressedOffset & 3) == 0;
    142         uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum;
    143     }
    144 
    145     public int getMemoryUsage() {
    146         return memoryUsage;
    147     }
    148 
    149     public StreamFlags getStreamFlags() {
    150         return streamFlags;
    151     }
    152 
    153     public int getRecordCount() {
    154         // It was already checked in the constructor that it fits into an int.
    155         // Otherwise we couldn't have allocated the arrays.
    156         return (int)recordCount;
    157     }
    158 
    159     public long getUncompressedSize() {
    160         return uncompressedSum;
    161     }
    162 
    163     public long getLargestBlockSize() {
    164         return largestBlockSize;
    165     }
    166 
    167     public boolean hasUncompressedOffset(long pos) {
    168         return pos >= uncompressedOffset
    169                && pos < uncompressedOffset + uncompressedSum;
    170     }
    171 
    172     public boolean hasRecord(int blockNumber) {
    173         return blockNumber >= recordOffset
    174                && blockNumber < recordOffset + recordCount;
    175     }
    176 
    177     public void locateBlock(BlockInfo info, long target) {
    178         assert target >= uncompressedOffset;
    179         target -= uncompressedOffset;
    180         assert target < uncompressedSum;
    181 
    182         int left = 0;
    183         int right = unpadded.length - 1;
    184 
    185         while (left < right) {
    186             int i = left + (right - left) / 2;
    187 
    188             if (uncompressed[i] <= target)
    189                 left = i + 1;
    190             else
    191                 right = i;
    192         }
    193 
    194         setBlockInfo(info, recordOffset + left);
    195     }
    196 
    197     public void setBlockInfo(BlockInfo info, int blockNumber) {
    198         // The caller has checked that the given Block number is inside
    199         // this Index.
    200         assert blockNumber >= recordOffset;
    201         assert blockNumber - recordOffset < recordCount;
    202 
    203         info.index = this;
    204         info.blockNumber = blockNumber;
    205 
    206         int pos = blockNumber - recordOffset;
    207 
    208         if (pos == 0) {
    209             info.compressedOffset = 0;
    210             info.uncompressedOffset = 0;
    211         } else {
    212             info.compressedOffset = (unpadded[pos - 1] + 3) & ~3;
    213             info.uncompressedOffset = uncompressed[pos - 1];
    214         }
    215 
    216         info.unpaddedSize = unpadded[pos] - info.compressedOffset;
    217         info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset;
    218 
    219         info.compressedOffset += compressedOffset
    220                                  + DecoderUtil.STREAM_HEADER_SIZE;
    221         info.uncompressedOffset += uncompressedOffset;
    222     }
    223 }
    224