1 /* 2 * SingleXZInputStream 3 * 4 * Author: Lasse Collin <lasse.collin (at) tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 package org.tukaani.xz; 11 12 import java.io.InputStream; 13 import java.io.DataInputStream; 14 import java.io.IOException; 15 import java.io.EOFException; 16 import org.tukaani.xz.common.DecoderUtil; 17 import org.tukaani.xz.common.StreamFlags; 18 import org.tukaani.xz.index.IndexHash; 19 import org.tukaani.xz.check.Check; 20 21 /** 22 * Decompresses exactly one XZ Stream in streamed mode (no seeking). 23 * The decompression stops after the first XZ Stream has been decompressed, 24 * and the read position in the input stream is left at the first byte 25 * after the end of the XZ Stream. This can be useful when XZ data has 26 * been stored inside some other file format or protocol. 27 * <p> 28 * Unless you know what you are doing, don't use this class to decompress 29 * standalone .xz files. For that purpose, use <code>XZInputStream</code>. 30 * 31 * <h4>When uncompressed size is known beforehand</h4> 32 * <p> 33 * If you are decompressing complete XZ streams and your application knows 34 * exactly how much uncompressed data there should be, it is good to try 35 * reading one more byte by calling <code>read()</code> and checking 36 * that it returns <code>-1</code>. This way the decompressor will parse the 37 * file footers and verify the integrity checks, giving the caller more 38 * confidence that the uncompressed data is valid. 39 * 40 * @see XZInputStream 41 */ 42 public class SingleXZInputStream extends InputStream { 43 private InputStream in; 44 private final int memoryLimit; 45 private final StreamFlags streamHeaderFlags; 46 private final Check check; 47 private final boolean verifyCheck; 48 private BlockInputStream blockDecoder = null; 49 private final IndexHash indexHash = new IndexHash(); 50 private boolean endReached = false; 51 private IOException exception = null; 52 53 private final byte[] tempBuf = new byte[1]; 54 55 /** 56 * Reads the Stream Header into a buffer. 57 * This is a helper function for the constructors. 58 */ 59 private static byte[] readStreamHeader(InputStream in) throws IOException { 60 byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE]; 61 new DataInputStream(in).readFully(streamHeader); 62 return streamHeader; 63 } 64 65 /** 66 * Creates a new XZ decompressor that decompresses exactly one 67 * XZ Stream from <code>in</code> without a memory usage limit. 68 * <p> 69 * This constructor reads and parses the XZ Stream Header (12 bytes) 70 * from <code>in</code>. The header of the first Block is not read 71 * until <code>read</code> is called. 72 * 73 * @param in input stream from which XZ-compressed 74 * data is read 75 * 76 * @throws XZFormatException 77 * input is not in the XZ format 78 * 79 * @throws CorruptedInputException 80 * XZ header CRC32 doesn't match 81 * 82 * @throws UnsupportedOptionsException 83 * XZ header is valid but specifies options 84 * not supported by this implementation 85 * 86 * @throws EOFException 87 * less than 12 bytes of input was available 88 * from <code>in</code> 89 * 90 * @throws IOException may be thrown by <code>in</code> 91 */ 92 public SingleXZInputStream(InputStream in) throws IOException { 93 this(in, -1); 94 } 95 96 /** 97 * Creates a new XZ decompressor that decompresses exactly one 98 * XZ Stream from <code>in</code> with an optional memory usage limit. 99 * <p> 100 * This is identical to <code>SingleXZInputStream(InputStream)</code> 101 * except that this takes also the <code>memoryLimit</code> argument. 102 * 103 * @param in input stream from which XZ-compressed 104 * data is read 105 * 106 * @param memoryLimit memory usage limit in kibibytes (KiB) 107 * or <code>-1</code> to impose no 108 * memory usage limit 109 * 110 * @throws XZFormatException 111 * input is not in the XZ format 112 * 113 * @throws CorruptedInputException 114 * XZ header CRC32 doesn't match 115 * 116 * @throws UnsupportedOptionsException 117 * XZ header is valid but specifies options 118 * not supported by this implementation 119 * 120 * @throws EOFException 121 * less than 12 bytes of input was available 122 * from <code>in</code> 123 * 124 * @throws IOException may be thrown by <code>in</code> 125 */ 126 public SingleXZInputStream(InputStream in, int memoryLimit) 127 throws IOException { 128 this(in, memoryLimit, true, readStreamHeader(in)); 129 } 130 131 /** 132 * Creates a new XZ decompressor that decompresses exactly one 133 * XZ Stream from <code>in</code> with an optional memory usage limit 134 * and ability to disable verification of integrity checks. 135 * <p> 136 * This is identical to <code>SingleXZInputStream(InputStream,int)</code> 137 * except that this takes also the <code>verifyCheck</code> argument. 138 * <p> 139 * Note that integrity check verification should almost never be disabled. 140 * Possible reasons to disable integrity check verification: 141 * <ul> 142 * <li>Trying to recover data from a corrupt .xz file.</li> 143 * <li>Speeding up decompression. This matters mostly with SHA-256 144 * or with files that have compressed extremely well. It's recommended 145 * that integrity checking isn't disabled for performance reasons 146 * unless the file integrity is verified externally in some other 147 * way.</li> 148 * </ul> 149 * <p> 150 * <code>verifyCheck</code> only affects the integrity check of 151 * the actual compressed data. The CRC32 fields in the headers 152 * are always verified. 153 * 154 * @param in input stream from which XZ-compressed 155 * data is read 156 * 157 * @param memoryLimit memory usage limit in kibibytes (KiB) 158 * or <code>-1</code> to impose no 159 * memory usage limit 160 * 161 * @param verifyCheck if <code>true</code>, the integrity checks 162 * will be verified; this should almost never 163 * be set to <code>false</code> 164 * 165 * @throws XZFormatException 166 * input is not in the XZ format 167 * 168 * @throws CorruptedInputException 169 * XZ header CRC32 doesn't match 170 * 171 * @throws UnsupportedOptionsException 172 * XZ header is valid but specifies options 173 * not supported by this implementation 174 * 175 * @throws EOFException 176 * less than 12 bytes of input was available 177 * from <code>in</code> 178 * 179 * @throws IOException may be thrown by <code>in</code> 180 * 181 * @since 1.6 182 */ 183 public SingleXZInputStream(InputStream in, int memoryLimit, 184 boolean verifyCheck) throws IOException { 185 this(in, memoryLimit, verifyCheck, readStreamHeader(in)); 186 } 187 188 SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, 189 byte[] streamHeader) throws IOException { 190 this.in = in; 191 this.memoryLimit = memoryLimit; 192 this.verifyCheck = verifyCheck; 193 streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader); 194 check = Check.getInstance(streamHeaderFlags.checkType); 195 } 196 197 /** 198 * Gets the ID of the integrity check used in this XZ Stream. 199 * 200 * @return the Check ID specified in the XZ Stream Header 201 */ 202 public int getCheckType() { 203 return streamHeaderFlags.checkType; 204 } 205 206 /** 207 * Gets the name of the integrity check used in this XZ Stream. 208 * 209 * @return the name of the check specified in the XZ Stream Header 210 */ 211 public String getCheckName() { 212 return check.getName(); 213 } 214 215 /** 216 * Decompresses the next byte from this input stream. 217 * <p> 218 * Reading lots of data with <code>read()</code> from this input stream 219 * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} 220 * if you need to read lots of data one byte at a time. 221 * 222 * @return the next decompressed byte, or <code>-1</code> 223 * to indicate the end of the compressed stream 224 * 225 * @throws CorruptedInputException 226 * @throws UnsupportedOptionsException 227 * @throws MemoryLimitException 228 * 229 * @throws XZIOException if the stream has been closed 230 * 231 * @throws EOFException 232 * compressed input is truncated or corrupt 233 * 234 * @throws IOException may be thrown by <code>in</code> 235 */ 236 public int read() throws IOException { 237 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 238 } 239 240 /** 241 * Decompresses into an array of bytes. 242 * <p> 243 * If <code>len</code> is zero, no bytes are read and <code>0</code> 244 * is returned. Otherwise this will try to decompress <code>len</code> 245 * bytes of uncompressed data. Less than <code>len</code> bytes may 246 * be read only in the following situations: 247 * <ul> 248 * <li>The end of the compressed data was reached successfully.</li> 249 * <li>An error is detected after at least one but less <code>len</code> 250 * bytes have already been successfully decompressed. 251 * The next call with non-zero <code>len</code> will immediately 252 * throw the pending exception.</li> 253 * <li>An exception is thrown.</li> 254 * </ul> 255 * 256 * @param buf target buffer for uncompressed data 257 * @param off start offset in <code>buf</code> 258 * @param len maximum number of uncompressed bytes to read 259 * 260 * @return number of bytes read, or <code>-1</code> to indicate 261 * the end of the compressed stream 262 * 263 * @throws CorruptedInputException 264 * @throws UnsupportedOptionsException 265 * @throws MemoryLimitException 266 * 267 * @throws XZIOException if the stream has been closed 268 * 269 * @throws EOFException 270 * compressed input is truncated or corrupt 271 * 272 * @throws IOException may be thrown by <code>in</code> 273 */ 274 public int read(byte[] buf, int off, int len) throws IOException { 275 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 276 throw new IndexOutOfBoundsException(); 277 278 if (len == 0) 279 return 0; 280 281 if (in == null) 282 throw new XZIOException("Stream closed"); 283 284 if (exception != null) 285 throw exception; 286 287 if (endReached) 288 return -1; 289 290 int size = 0; 291 292 try { 293 while (len > 0) { 294 if (blockDecoder == null) { 295 try { 296 blockDecoder = new BlockInputStream( 297 in, check, verifyCheck, memoryLimit, -1, -1); 298 } catch (IndexIndicatorException e) { 299 indexHash.validate(in); 300 validateStreamFooter(); 301 endReached = true; 302 return size > 0 ? size : -1; 303 } 304 } 305 306 int ret = blockDecoder.read(buf, off, len); 307 308 if (ret > 0) { 309 size += ret; 310 off += ret; 311 len -= ret; 312 } else if (ret == -1) { 313 indexHash.add(blockDecoder.getUnpaddedSize(), 314 blockDecoder.getUncompressedSize()); 315 blockDecoder = null; 316 } 317 } 318 } catch (IOException e) { 319 exception = e; 320 if (size == 0) 321 throw e; 322 } 323 324 return size; 325 } 326 327 private void validateStreamFooter() throws IOException { 328 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; 329 new DataInputStream(in).readFully(buf); 330 StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf); 331 332 if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags, 333 streamFooterFlags) 334 || indexHash.getIndexSize() != streamFooterFlags.backwardSize) 335 throw new CorruptedInputException( 336 "XZ Stream Footer does not match Stream Header"); 337 } 338 339 /** 340 * Returns the number of uncompressed bytes that can be read 341 * without blocking. The value is returned with an assumption 342 * that the compressed input data will be valid. If the compressed 343 * data is corrupt, <code>CorruptedInputException</code> may get 344 * thrown before the number of bytes claimed to be available have 345 * been read from this input stream. 346 * 347 * @return the number of uncompressed bytes that can be read 348 * without blocking 349 */ 350 public int available() throws IOException { 351 if (in == null) 352 throw new XZIOException("Stream closed"); 353 354 if (exception != null) 355 throw exception; 356 357 return blockDecoder == null ? 0 : blockDecoder.available(); 358 } 359 360 /** 361 * Closes the stream and calls <code>in.close()</code>. 362 * If the stream was already closed, this does nothing. 363 * 364 * @throws IOException if thrown by <code>in.close()</code> 365 */ 366 public void close() throws IOException { 367 if (in != null) { 368 try { 369 in.close(); 370 } finally { 371 in = null; 372 } 373 } 374 } 375 } 376