1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.util.zip; 19 20 import java.io.IOException; 21 import java.io.InputStream; 22 import java.io.PushbackInputStream; 23 import java.nio.ByteOrder; 24 import java.nio.charset.ModifiedUtf8; 25 import java.nio.charset.StandardCharsets; 26 import java.util.Arrays; 27 import libcore.io.Memory; 28 import libcore.io.Streams; 29 30 /** 31 * Used to read (decompress) the data from zip files. 32 * 33 * <p>A zip file (or "archive") is a collection of (possibly) compressed files. 34 * When reading from a {@code ZipInputStream}, you call {@link #getNextEntry} 35 * which returns a {@link ZipEntry} of metadata corresponding to the userdata that follows. 36 * When you appear to have hit the end of this stream (which is really just the end of the current 37 * entry's userdata), call {@code getNextEntry} again. When it returns null, 38 * there are no more entries in the input file. 39 * 40 * <p>Although {@code InflaterInputStream} can only read compressed zip 41 * entries, this class can read non-compressed entries as well. 42 * 43 * <p>Use {@link ZipFile} if you need random access to entries by name, but use this class 44 * if you just want to iterate over all entries. 45 * 46 * <h3>Example</h3> 47 * <p>Using {@code ZipInputStream} is a little more complicated than {@link GZIPInputStream} 48 * because zip files are containers that can contain multiple files. This code pulls all the 49 * files out of a zip file, similar to the {@code unzip(1)} utility. 50 * <pre> 51 * InputStream is = ... 52 * ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is)); 53 * try { 54 * ZipEntry ze; 55 * while ((ze = zis.getNextEntry()) != null) { 56 * ByteArrayOutputStream baos = new ByteArrayOutputStream(); 57 * byte[] buffer = new byte[1024]; 58 * int count; 59 * while ((count = zis.read(buffer)) != -1) { 60 * baos.write(buffer, 0, count); 61 * } 62 * String filename = ze.getName(); 63 * byte[] bytes = baos.toByteArray(); 64 * // do something with 'filename' and 'bytes'... 65 * } 66 * } finally { 67 * zis.close(); 68 * } 69 * </pre> 70 */ 71 public class ZipInputStream extends InflaterInputStream implements ZipConstants { 72 private static final int ZIPLocalHeaderVersionNeeded = 20; 73 74 private boolean entriesEnd = false; 75 76 private boolean hasDD = false; 77 78 private int entryIn = 0; 79 80 private int inRead, lastRead = 0; 81 82 private ZipEntry currentEntry; 83 84 private boolean currentEntryIsZip64; 85 86 private final byte[] hdrBuf = new byte[LOCHDR - LOCVER + 8]; 87 88 private final CRC32 crc = new CRC32(); 89 90 private byte[] stringBytesBuf = new byte[256]; 91 92 private char[] stringCharBuf = new char[256]; 93 94 /** 95 * Constructs a new {@code ZipInputStream} to read zip entries from the given input stream. 96 * 97 * <p>UTF-8 is used to decode all strings in the file. 98 */ 99 public ZipInputStream(InputStream stream) { 100 super(new PushbackInputStream(stream, BUF_SIZE), new Inflater(true)); 101 if (stream == null) { 102 throw new NullPointerException("stream == null"); 103 } 104 } 105 106 /** 107 * Closes this {@code ZipInputStream}. 108 * 109 * @throws IOException 110 * if an {@code IOException} occurs. 111 */ 112 @Override 113 public void close() throws IOException { 114 if (!closed) { 115 closeEntry(); // Close the current entry 116 super.close(); 117 } 118 } 119 120 /** 121 * Closes the current zip entry and prepares to read the next entry. 122 * 123 * @throws IOException 124 * if an {@code IOException} occurs. 125 */ 126 public void closeEntry() throws IOException { 127 checkClosed(); 128 if (currentEntry == null) { 129 return; 130 } 131 132 /* 133 * The following code is careful to leave the ZipInputStream in a 134 * consistent state, even when close() results in an exception. It does 135 * so by: 136 * - pushing bytes back into the source stream 137 * - reading a data descriptor footer from the source stream 138 * - resetting fields that manage the entry being closed 139 */ 140 141 // Ensure all entry bytes are read 142 Exception failure = null; 143 try { 144 Streams.skipAll(this); 145 } catch (Exception e) { 146 failure = e; 147 } 148 149 int inB, out; 150 if (currentEntry.compressionMethod == ZipEntry.DEFLATED) { 151 inB = inf.getTotalIn(); 152 out = inf.getTotalOut(); 153 } else { 154 inB = inRead; 155 out = inRead; 156 } 157 int diff = entryIn - inB; 158 // Pushback any required bytes 159 if (diff != 0) { 160 ((PushbackInputStream) in).unread(buf, len - diff, diff); 161 } 162 163 try { 164 readAndVerifyDataDescriptor(inB, out, currentEntryIsZip64); 165 } catch (Exception e) { 166 if (failure == null) { // otherwise we're already going to throw 167 failure = e; 168 } 169 } 170 171 inf.reset(); 172 lastRead = inRead = entryIn = len = 0; 173 crc.reset(); 174 currentEntry = null; 175 176 if (failure != null) { 177 if (failure instanceof IOException) { 178 throw (IOException) failure; 179 } else if (failure instanceof RuntimeException) { 180 throw (RuntimeException) failure; 181 } 182 AssertionError error = new AssertionError(); 183 error.initCause(failure); 184 throw error; 185 } 186 } 187 188 private void readAndVerifyDataDescriptor(long inB, long out, boolean isZip64) throws IOException { 189 if (hasDD) { 190 if (isZip64) { 191 // 8 additional bytes since the compressed / uncompressed size fields 192 // in the extended header are 8 bytes each, instead of 4 bytes each. 193 Streams.readFully(in, hdrBuf, 0, EXTHDR + 8); 194 } else { 195 Streams.readFully(in, hdrBuf, 0, EXTHDR); 196 } 197 198 int sig = Memory.peekInt(hdrBuf, 0, ByteOrder.LITTLE_ENDIAN); 199 if (sig != (int) EXTSIG) { 200 throw new ZipException(String.format("unknown format (EXTSIG=%x)", sig)); 201 } 202 currentEntry.crc = ((long) Memory.peekInt(hdrBuf, EXTCRC, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL; 203 204 if (isZip64) { 205 currentEntry.compressedSize = Memory.peekLong(hdrBuf, EXTSIZ, ByteOrder.LITTLE_ENDIAN); 206 // Note that we apply an adjustment of 4 bytes to the offset of EXTLEN to account 207 // for the 8 byte size for zip64. 208 currentEntry.size = Memory.peekLong(hdrBuf, EXTLEN + 4, ByteOrder.LITTLE_ENDIAN); 209 } else { 210 currentEntry.compressedSize = ((long) Memory.peekInt(hdrBuf, EXTSIZ, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL; 211 currentEntry.size = ((long) Memory.peekInt(hdrBuf, EXTLEN, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL; 212 } 213 } 214 if (currentEntry.crc != crc.getValue()) { 215 throw new ZipException("CRC mismatch"); 216 } 217 if (currentEntry.compressedSize != inB || currentEntry.size != out) { 218 throw new ZipException("Size mismatch"); 219 } 220 } 221 222 /** 223 * Returns the next entry from this {@code ZipInputStream} or {@code null} if 224 * no more entries are present. 225 * 226 * @throws IOException if an {@code IOException} occurs. 227 */ 228 public ZipEntry getNextEntry() throws IOException { 229 closeEntry(); 230 if (entriesEnd) { 231 return null; 232 } 233 234 // Read the signature to see whether there's another local file header. 235 Streams.readFully(in, hdrBuf, 0, 4); 236 int hdr = Memory.peekInt(hdrBuf, 0, ByteOrder.LITTLE_ENDIAN); 237 if (hdr == CENSIG) { 238 entriesEnd = true; 239 return null; 240 } 241 if (hdr != LOCSIG) { 242 return null; 243 } 244 245 // Read the local file header. 246 Streams.readFully(in, hdrBuf, 0, (LOCHDR - LOCVER)); 247 int version = peekShort(0) & 0xff; 248 if (version > ZIPLocalHeaderVersionNeeded) { 249 throw new ZipException("Cannot read local header version " + version); 250 } 251 int flags = peekShort(LOCFLG - LOCVER); 252 if ((flags & ZipFile.GPBF_UNSUPPORTED_MASK) != 0) { 253 throw new ZipException("Invalid General Purpose Bit Flag: " + flags); 254 } 255 256 hasDD = ((flags & ZipFile.GPBF_DATA_DESCRIPTOR_FLAG) != 0); 257 int ceLastModifiedTime = peekShort(LOCTIM - LOCVER); 258 int ceLastModifiedDate = peekShort(LOCTIM - LOCVER + 2); 259 int ceCompressionMethod = peekShort(LOCHOW - LOCVER); 260 long ceCrc = 0, ceCompressedSize = 0, ceSize = -1; 261 if (!hasDD) { 262 ceCrc = ((long) Memory.peekInt(hdrBuf, LOCCRC - LOCVER, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL; 263 ceCompressedSize = ((long) Memory.peekInt(hdrBuf, LOCSIZ - LOCVER, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL; 264 ceSize = ((long) Memory.peekInt(hdrBuf, LOCLEN - LOCVER, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL; 265 } 266 int nameLength = peekShort(LOCNAM - LOCVER); 267 if (nameLength == 0) { 268 throw new ZipException("Entry is not named"); 269 } 270 int extraLength = peekShort(LOCEXT - LOCVER); 271 272 String name = readString(nameLength); 273 currentEntry = createZipEntry(name); 274 currentEntry.time = ceLastModifiedTime; 275 currentEntry.modDate = ceLastModifiedDate; 276 currentEntry.setMethod(ceCompressionMethod); 277 if (ceSize != -1) { 278 currentEntry.setCrc(ceCrc); 279 currentEntry.setSize(ceSize); 280 currentEntry.setCompressedSize(ceCompressedSize); 281 } 282 if (extraLength > 0) { 283 byte[] extraData = new byte[extraLength]; 284 Streams.readFully(in, extraData, 0, extraLength); 285 currentEntry.setExtra(extraData); 286 currentEntryIsZip64 = Zip64.parseZip64ExtendedInfo(currentEntry, false /* from central directory */); 287 } else { 288 currentEntryIsZip64 = false; 289 } 290 291 return currentEntry; 292 } 293 294 /** 295 * Reads bytes from the current stream position returning the string representation. 296 */ 297 private String readString(int byteLength) throws IOException { 298 if (byteLength > stringBytesBuf.length) { 299 stringBytesBuf = new byte[byteLength]; 300 } 301 Streams.readFully(in, stringBytesBuf, 0, byteLength); 302 // The number of chars will always be less than or equal to the number of bytes. It's 303 // fine if this buffer is too long. 304 if (byteLength > stringCharBuf.length) { 305 stringCharBuf = new char[byteLength]; 306 } 307 return ModifiedUtf8.decode(stringBytesBuf, stringCharBuf, 0, byteLength); 308 } 309 310 private int peekShort(int offset) { 311 return Memory.peekShort(hdrBuf, offset, ByteOrder.LITTLE_ENDIAN) & 0xffff; 312 } 313 314 /** 315 * Reads up to {@code byteCount} uncompressed bytes into the buffer 316 * starting at {@code byteOffset}. Returns the number of bytes actually read, or -1. 317 */ 318 @Override 319 public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException { 320 checkClosed(); 321 Arrays.checkOffsetAndCount(buffer.length, byteOffset, byteCount); 322 323 if (inf.finished() || currentEntry == null) { 324 return -1; 325 } 326 327 if (currentEntry.compressionMethod == ZipEntry.STORED) { 328 int csize = (int) currentEntry.size; 329 if (inRead >= csize) { 330 return -1; 331 } 332 if (lastRead >= len) { 333 lastRead = 0; 334 if ((len = in.read(buf)) == -1) { 335 eof = true; 336 return -1; 337 } 338 entryIn += len; 339 } 340 int toRead = byteCount > (len - lastRead) ? len - lastRead : byteCount; 341 if ((csize - inRead) < toRead) { 342 toRead = csize - inRead; 343 } 344 System.arraycopy(buf, lastRead, buffer, byteOffset, toRead); 345 lastRead += toRead; 346 inRead += toRead; 347 crc.update(buffer, byteOffset, toRead); 348 return toRead; 349 } 350 if (inf.needsInput()) { 351 fill(); 352 if (len > 0) { 353 entryIn += len; 354 } 355 } 356 int read; 357 try { 358 read = inf.inflate(buffer, byteOffset, byteCount); 359 } catch (DataFormatException e) { 360 throw new ZipException(e.getMessage()); 361 } 362 if (read == 0 && inf.finished()) { 363 return -1; 364 } 365 crc.update(buffer, byteOffset, read); 366 return read; 367 } 368 369 @Override 370 public int available() throws IOException { 371 checkClosed(); 372 // The InflaterInputStream contract says we must only return 0 or 1. 373 return (currentEntry == null || inRead < currentEntry.size) ? 1 : 0; 374 } 375 376 /** 377 * creates a {@link ZipEntry } with the given name. 378 * 379 * @param name 380 * the name of the entry. 381 * @return the created {@code ZipEntry}. 382 */ 383 protected ZipEntry createZipEntry(String name) { 384 return new ZipEntry(name); 385 } 386 387 private void checkClosed() throws IOException { 388 if (closed) { 389 throw new IOException("Stream is closed"); 390 } 391 } 392 } 393