1 /* 2 * XZInputStream 3 * 4 * Author: Lasse Collin <lasse.collin (at) tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 package org.tukaani.xz; 11 12 import java.io.InputStream; 13 import java.io.DataInputStream; 14 import java.io.IOException; 15 import java.io.EOFException; 16 import org.tukaani.xz.common.DecoderUtil; 17 18 /** 19 * Decompresses a .xz file in streamed mode (no seeking). 20 * <p> 21 * Use this to decompress regular standalone .xz files. This reads from 22 * its input stream until the end of the input or until an error occurs. 23 * This supports decompressing concatenated .xz files. 24 * 25 * <h4>Typical use cases</h4> 26 * <p> 27 * Getting an input stream to decompress a .xz file: 28 * <p><blockquote><pre> 29 * InputStream infile = new FileInputStream("foo.xz"); 30 * XZInputStream inxz = new XZInputStream(infile); 31 * </pre></blockquote> 32 * <p> 33 * It's important to keep in mind that decompressor memory usage depends 34 * on the settings used to compress the file. The worst-case memory usage 35 * of XZInputStream is currently 1.5 GiB. Still, very few files will 36 * require more than about 65 MiB because that's how much decompressing 37 * a file created with the highest preset level will need, and only a few 38 * people use settings other than the predefined presets. 39 * <p> 40 * It is possible to specify a memory usage limit for 41 * <code>XZInputStream</code>. If decompression requires more memory than 42 * the specified limit, MemoryLimitException will be thrown when reading 43 * from the stream. For example, the following sets the memory usage limit 44 * to 100 MiB: 45 * <p><blockquote><pre> 46 * InputStream infile = new FileInputStream("foo.xz"); 47 * XZInputStream inxz = new XZInputStream(infile, 100 * 1024); 48 * </pre></blockquote> 49 * 50 * <h4>When uncompressed size is known beforehand</h4> 51 * <p> 52 * If you are decompressing complete files and your application knows 53 * exactly how much uncompressed data there should be, it is good to try 54 * reading one more byte by calling <code>read()</code> and checking 55 * that it returns <code>-1</code>. This way the decompressor will parse the 56 * file footers and verify the integrity checks, giving the caller more 57 * confidence that the uncompressed data is valid. (This advice seems to 58 * apply to 59 * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.) 60 * 61 * @see SingleXZInputStream 62 */ 63 public class XZInputStream extends InputStream { 64 private final int memoryLimit; 65 private InputStream in; 66 private SingleXZInputStream xzIn; 67 private final boolean verifyCheck; 68 private boolean endReached = false; 69 private IOException exception = null; 70 71 private final byte[] tempBuf = new byte[1]; 72 73 /** 74 * Creates a new XZ decompressor without a memory usage limit. 75 * <p> 76 * This constructor reads and parses the XZ Stream Header (12 bytes) 77 * from <code>in</code>. The header of the first Block is not read 78 * until <code>read</code> is called. 79 * 80 * @param in input stream from which XZ-compressed 81 * data is read 82 * 83 * @throws XZFormatException 84 * input is not in the XZ format 85 * 86 * @throws CorruptedInputException 87 * XZ header CRC32 doesn't match 88 * 89 * @throws UnsupportedOptionsException 90 * XZ header is valid but specifies options 91 * not supported by this implementation 92 * 93 * @throws EOFException 94 * less than 12 bytes of input was available 95 * from <code>in</code> 96 * 97 * @throws IOException may be thrown by <code>in</code> 98 */ 99 public XZInputStream(InputStream in) throws IOException { 100 this(in, -1); 101 } 102 103 /** 104 * Creates a new XZ decompressor with an optional memory usage limit. 105 * <p> 106 * This is identical to <code>XZInputStream(InputStream)</code> except 107 * that this takes also the <code>memoryLimit</code> argument. 108 * 109 * @param in input stream from which XZ-compressed 110 * data is read 111 * 112 * @param memoryLimit memory usage limit in kibibytes (KiB) 113 * or <code>-1</code> to impose no 114 * memory usage limit 115 * 116 * @throws XZFormatException 117 * input is not in the XZ format 118 * 119 * @throws CorruptedInputException 120 * XZ header CRC32 doesn't match 121 * 122 * @throws UnsupportedOptionsException 123 * XZ header is valid but specifies options 124 * not supported by this implementation 125 * 126 * @throws EOFException 127 * less than 12 bytes of input was available 128 * from <code>in</code> 129 * 130 * @throws IOException may be thrown by <code>in</code> 131 */ 132 public XZInputStream(InputStream in, int memoryLimit) throws IOException { 133 this(in, memoryLimit, true); 134 } 135 136 /** 137 * Creates a new XZ decompressor with an optional memory usage limit 138 * and ability to disable verification of integrity checks. 139 * <p> 140 * This is identical to <code>XZInputStream(InputStream,int)</code> except 141 * that this takes also the <code>verifyCheck</code> argument. 142 * <p> 143 * Note that integrity check verification should almost never be disabled. 144 * Possible reasons to disable integrity check verification: 145 * <ul> 146 * <li>Trying to recover data from a corrupt .xz file.</li> 147 * <li>Speeding up decompression. This matters mostly with SHA-256 148 * or with files that have compressed extremely well. It's recommended 149 * that integrity checking isn't disabled for performance reasons 150 * unless the file integrity is verified externally in some other 151 * way.</li> 152 * </ul> 153 * <p> 154 * <code>verifyCheck</code> only affects the integrity check of 155 * the actual compressed data. The CRC32 fields in the headers 156 * are always verified. 157 * 158 * @param in input stream from which XZ-compressed 159 * data is read 160 * 161 * @param memoryLimit memory usage limit in kibibytes (KiB) 162 * or <code>-1</code> to impose no 163 * memory usage limit 164 * 165 * @param verifyCheck if <code>true</code>, the integrity checks 166 * will be verified; this should almost never 167 * be set to <code>false</code> 168 * 169 * @throws XZFormatException 170 * input is not in the XZ format 171 * 172 * @throws CorruptedInputException 173 * XZ header CRC32 doesn't match 174 * 175 * @throws UnsupportedOptionsException 176 * XZ header is valid but specifies options 177 * not supported by this implementation 178 * 179 * @throws EOFException 180 * less than 12 bytes of input was available 181 * from <code>in</code> 182 * 183 * @throws IOException may be thrown by <code>in</code> 184 * 185 * @since 1.6 186 */ 187 public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck) 188 throws IOException { 189 this.in = in; 190 this.memoryLimit = memoryLimit; 191 this.verifyCheck = verifyCheck; 192 this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck); 193 } 194 195 /** 196 * Decompresses the next byte from this input stream. 197 * <p> 198 * Reading lots of data with <code>read()</code> from this input stream 199 * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} 200 * if you need to read lots of data one byte at a time. 201 * 202 * @return the next decompressed byte, or <code>-1</code> 203 * to indicate the end of the compressed stream 204 * 205 * @throws CorruptedInputException 206 * @throws UnsupportedOptionsException 207 * @throws MemoryLimitException 208 * 209 * @throws XZIOException if the stream has been closed 210 * 211 * @throws EOFException 212 * compressed input is truncated or corrupt 213 * 214 * @throws IOException may be thrown by <code>in</code> 215 */ 216 public int read() throws IOException { 217 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 218 } 219 220 /** 221 * Decompresses into an array of bytes. 222 * <p> 223 * If <code>len</code> is zero, no bytes are read and <code>0</code> 224 * is returned. Otherwise this will try to decompress <code>len</code> 225 * bytes of uncompressed data. Less than <code>len</code> bytes may 226 * be read only in the following situations: 227 * <ul> 228 * <li>The end of the compressed data was reached successfully.</li> 229 * <li>An error is detected after at least one but less <code>len</code> 230 * bytes have already been successfully decompressed. 231 * The next call with non-zero <code>len</code> will immediately 232 * throw the pending exception.</li> 233 * <li>An exception is thrown.</li> 234 * </ul> 235 * 236 * @param buf target buffer for uncompressed data 237 * @param off start offset in <code>buf</code> 238 * @param len maximum number of uncompressed bytes to read 239 * 240 * @return number of bytes read, or <code>-1</code> to indicate 241 * the end of the compressed stream 242 * 243 * @throws CorruptedInputException 244 * @throws UnsupportedOptionsException 245 * @throws MemoryLimitException 246 * 247 * @throws XZIOException if the stream has been closed 248 * 249 * @throws EOFException 250 * compressed input is truncated or corrupt 251 * 252 * @throws IOException may be thrown by <code>in</code> 253 */ 254 public int read(byte[] buf, int off, int len) throws IOException { 255 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 256 throw new IndexOutOfBoundsException(); 257 258 if (len == 0) 259 return 0; 260 261 if (in == null) 262 throw new XZIOException("Stream closed"); 263 264 if (exception != null) 265 throw exception; 266 267 if (endReached) 268 return -1; 269 270 int size = 0; 271 272 try { 273 while (len > 0) { 274 if (xzIn == null) { 275 prepareNextStream(); 276 if (endReached) 277 return size == 0 ? -1 : size; 278 } 279 280 int ret = xzIn.read(buf, off, len); 281 282 if (ret > 0) { 283 size += ret; 284 off += ret; 285 len -= ret; 286 } else if (ret == -1) { 287 xzIn = null; 288 } 289 } 290 } catch (IOException e) { 291 exception = e; 292 if (size == 0) 293 throw e; 294 } 295 296 return size; 297 } 298 299 private void prepareNextStream() throws IOException { 300 DataInputStream inData = new DataInputStream(in); 301 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; 302 303 // The size of Stream Padding must be a multiple of four bytes, 304 // all bytes zero. 305 do { 306 // First try to read one byte to see if we have reached the end 307 // of the file. 308 int ret = inData.read(buf, 0, 1); 309 if (ret == -1) { 310 endReached = true; 311 return; 312 } 313 314 // Since we got one byte of input, there must be at least 315 // three more available in a valid file. 316 inData.readFully(buf, 1, 3); 317 318 } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0); 319 320 // Not all bytes are zero. In a valid Stream it indicates the 321 // beginning of the next Stream. Read the rest of the Stream Header 322 // and initialize the XZ decoder. 323 inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4); 324 325 try { 326 xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf); 327 } catch (XZFormatException e) { 328 // Since this isn't the first .xz Stream, it is more 329 // logical to tell that the data is corrupt. 330 throw new CorruptedInputException( 331 "Garbage after a valid XZ Stream"); 332 } 333 } 334 335 /** 336 * Returns the number of uncompressed bytes that can be read 337 * without blocking. The value is returned with an assumption 338 * that the compressed input data will be valid. If the compressed 339 * data is corrupt, <code>CorruptedInputException</code> may get 340 * thrown before the number of bytes claimed to be available have 341 * been read from this input stream. 342 * 343 * @return the number of uncompressed bytes that can be read 344 * without blocking 345 */ 346 public int available() throws IOException { 347 if (in == null) 348 throw new XZIOException("Stream closed"); 349 350 if (exception != null) 351 throw exception; 352 353 return xzIn == null ? 0 : xzIn.available(); 354 } 355 356 /** 357 * Closes the stream and calls <code>in.close()</code>. 358 * If the stream was already closed, this does nothing. 359 * 360 * @throws IOException if thrown by <code>in.close()</code> 361 */ 362 public void close() throws IOException { 363 if (in != null) { 364 try { 365 in.close(); 366 } finally { 367 in = null; 368 } 369 } 370 } 371 } 372