Home | History | Annotate | Download | only in zip
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package java.util.zip;
     19 
     20 import java.io.IOException;
     21 import java.io.InputStream;
     22 import java.io.PushbackInputStream;
     23 import java.nio.ByteOrder;
     24 import java.nio.charset.ModifiedUtf8;
     25 import java.nio.charset.StandardCharsets;
     26 import java.util.Arrays;
     27 import libcore.io.Memory;
     28 import libcore.io.Streams;
     29 
     30 /**
     31  * Used to read (decompress) the data from zip files.
     32  *
     33  * <p>A zip file (or "archive") is a collection of (possibly) compressed files.
     34  * When reading from a {@code ZipInputStream}, you call {@link #getNextEntry}
     35  * which returns a {@link ZipEntry} of metadata corresponding to the userdata that follows.
     36  * When you appear to have hit the end of this stream (which is really just the end of the current
     37  * entry's userdata), call {@code getNextEntry} again. When it returns null,
     38  * there are no more entries in the input file.
     39  *
     40  * <p>Although {@code InflaterInputStream} can only read compressed zip
     41  * entries, this class can read non-compressed entries as well.
     42  *
     43  * <p>Use {@link ZipFile} if you need random access to entries by name, but use this class
     44  * if you just want to iterate over all entries.
     45  *
     46  * <h3>Example</h3>
     47  * <p>Using {@code ZipInputStream} is a little more complicated than {@link GZIPInputStream}
     48  * because zip files are containers that can contain multiple files. This code pulls all the
     49  * files out of a zip file, similar to the {@code unzip(1)} utility.
     50  * <pre>
     51  * InputStream is = ...
     52  * ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is));
     53  * try {
     54  *     ZipEntry ze;
     55  *     while ((ze = zis.getNextEntry()) != null) {
     56  *         ByteArrayOutputStream baos = new ByteArrayOutputStream();
     57  *         byte[] buffer = new byte[1024];
     58  *         int count;
     59  *         while ((count = zis.read(buffer)) != -1) {
     60  *             baos.write(buffer, 0, count);
     61  *         }
     62  *         String filename = ze.getName();
     63  *         byte[] bytes = baos.toByteArray();
     64  *         // do something with 'filename' and 'bytes'...
     65  *     }
     66  * } finally {
     67  *     zis.close();
     68  * }
     69  * </pre>
     70  */
     71 public class ZipInputStream extends InflaterInputStream implements ZipConstants {
     72     private static final int ZIPLocalHeaderVersionNeeded = 20;
     73 
     74     private boolean entriesEnd = false;
     75 
     76     private boolean hasDD = false;
     77 
     78     private int entryIn = 0;
     79 
     80     private int inRead, lastRead = 0;
     81 
     82     private ZipEntry currentEntry;
     83 
     84     private boolean currentEntryIsZip64;
     85 
     86     private final byte[] hdrBuf = new byte[LOCHDR - LOCVER + 8];
     87 
     88     private final CRC32 crc = new CRC32();
     89 
     90     private byte[] stringBytesBuf = new byte[256];
     91 
     92     private char[] stringCharBuf = new char[256];
     93 
     94     /**
     95      * Constructs a new {@code ZipInputStream} to read zip entries from the given input stream.
     96      *
     97      * <p>UTF-8 is used to decode all strings in the file.
     98      */
     99     public ZipInputStream(InputStream stream) {
    100         super(new PushbackInputStream(stream, BUF_SIZE), new Inflater(true));
    101         if (stream == null) {
    102             throw new NullPointerException("stream == null");
    103         }
    104     }
    105 
    106     /**
    107      * Closes this {@code ZipInputStream}.
    108      *
    109      * @throws IOException
    110      *             if an {@code IOException} occurs.
    111      */
    112     @Override
    113     public void close() throws IOException {
    114         if (!closed) {
    115             closeEntry(); // Close the current entry
    116             super.close();
    117         }
    118     }
    119 
    120     /**
    121      * Closes the current zip entry and prepares to read the next entry.
    122      *
    123      * @throws IOException
    124      *             if an {@code IOException} occurs.
    125      */
    126     public void closeEntry() throws IOException {
    127         checkClosed();
    128         if (currentEntry == null) {
    129             return;
    130         }
    131 
    132         /*
    133          * The following code is careful to leave the ZipInputStream in a
    134          * consistent state, even when close() results in an exception. It does
    135          * so by:
    136          *  - pushing bytes back into the source stream
    137          *  - reading a data descriptor footer from the source stream
    138          *  - resetting fields that manage the entry being closed
    139          */
    140 
    141         // Ensure all entry bytes are read
    142         Exception failure = null;
    143         try {
    144             Streams.skipAll(this);
    145         } catch (Exception e) {
    146             failure = e;
    147         }
    148 
    149         int inB, out;
    150         if (currentEntry.compressionMethod == ZipEntry.DEFLATED) {
    151             inB = inf.getTotalIn();
    152             out = inf.getTotalOut();
    153         } else {
    154             inB = inRead;
    155             out = inRead;
    156         }
    157         int diff = entryIn - inB;
    158         // Pushback any required bytes
    159         if (diff != 0) {
    160             ((PushbackInputStream) in).unread(buf, len - diff, diff);
    161         }
    162 
    163         try {
    164             readAndVerifyDataDescriptor(inB, out, currentEntryIsZip64);
    165         } catch (Exception e) {
    166             if (failure == null) { // otherwise we're already going to throw
    167                 failure = e;
    168             }
    169         }
    170 
    171         inf.reset();
    172         lastRead = inRead = entryIn = len = 0;
    173         crc.reset();
    174         currentEntry = null;
    175 
    176         if (failure != null) {
    177             if (failure instanceof IOException) {
    178                 throw (IOException) failure;
    179             } else if (failure instanceof RuntimeException) {
    180                 throw (RuntimeException) failure;
    181             }
    182             AssertionError error = new AssertionError();
    183             error.initCause(failure);
    184             throw error;
    185         }
    186     }
    187 
    188     private void readAndVerifyDataDescriptor(long inB, long out, boolean isZip64) throws IOException {
    189         if (hasDD) {
    190             if (isZip64) {
    191                 // 8 additional bytes since the compressed / uncompressed size fields
    192                 // in the extended header are 8 bytes each, instead of 4 bytes each.
    193                 Streams.readFully(in, hdrBuf, 0, EXTHDR + 8);
    194             } else {
    195                 Streams.readFully(in, hdrBuf, 0, EXTHDR);
    196             }
    197 
    198             int sig = Memory.peekInt(hdrBuf, 0, ByteOrder.LITTLE_ENDIAN);
    199             if (sig != (int) EXTSIG) {
    200                 throw new ZipException(String.format("unknown format (EXTSIG=%x)", sig));
    201             }
    202             currentEntry.crc = ((long) Memory.peekInt(hdrBuf, EXTCRC, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL;
    203 
    204             if (isZip64) {
    205                 currentEntry.compressedSize = Memory.peekLong(hdrBuf, EXTSIZ, ByteOrder.LITTLE_ENDIAN);
    206                 // Note that we apply an adjustment of 4 bytes to the offset of EXTLEN to account
    207                 // for the 8 byte size for zip64.
    208                 currentEntry.size = Memory.peekLong(hdrBuf, EXTLEN + 4, ByteOrder.LITTLE_ENDIAN);
    209             } else {
    210                 currentEntry.compressedSize = ((long) Memory.peekInt(hdrBuf, EXTSIZ, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL;
    211                 currentEntry.size = ((long) Memory.peekInt(hdrBuf, EXTLEN, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL;
    212             }
    213         }
    214         if (currentEntry.crc != crc.getValue()) {
    215             throw new ZipException("CRC mismatch");
    216         }
    217         if (currentEntry.compressedSize != inB || currentEntry.size != out) {
    218             throw new ZipException("Size mismatch");
    219         }
    220     }
    221 
    222     /**
    223      * Returns the next entry from this {@code ZipInputStream} or {@code null} if
    224      * no more entries are present.
    225      *
    226      * @throws IOException if an {@code IOException} occurs.
    227      */
    228     public ZipEntry getNextEntry() throws IOException {
    229         closeEntry();
    230         if (entriesEnd) {
    231             return null;
    232         }
    233 
    234         // Read the signature to see whether there's another local file header.
    235         Streams.readFully(in, hdrBuf, 0, 4);
    236         int hdr = Memory.peekInt(hdrBuf, 0, ByteOrder.LITTLE_ENDIAN);
    237         if (hdr == CENSIG) {
    238             entriesEnd = true;
    239             return null;
    240         }
    241         if (hdr != LOCSIG) {
    242             return null;
    243         }
    244 
    245         // Read the local file header.
    246         Streams.readFully(in, hdrBuf, 0, (LOCHDR - LOCVER));
    247         int version = peekShort(0) & 0xff;
    248         if (version > ZIPLocalHeaderVersionNeeded) {
    249             throw new ZipException("Cannot read local header version " + version);
    250         }
    251         int flags = peekShort(LOCFLG - LOCVER);
    252         if ((flags & ZipFile.GPBF_UNSUPPORTED_MASK) != 0) {
    253             throw new ZipException("Invalid General Purpose Bit Flag: " + flags);
    254         }
    255 
    256         hasDD = ((flags & ZipFile.GPBF_DATA_DESCRIPTOR_FLAG) != 0);
    257         int ceLastModifiedTime = peekShort(LOCTIM - LOCVER);
    258         int ceLastModifiedDate = peekShort(LOCTIM - LOCVER + 2);
    259         int ceCompressionMethod = peekShort(LOCHOW - LOCVER);
    260         long ceCrc = 0, ceCompressedSize = 0, ceSize = -1;
    261         if (!hasDD) {
    262             ceCrc = ((long) Memory.peekInt(hdrBuf, LOCCRC - LOCVER, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL;
    263             ceCompressedSize = ((long) Memory.peekInt(hdrBuf, LOCSIZ - LOCVER, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL;
    264             ceSize = ((long) Memory.peekInt(hdrBuf, LOCLEN - LOCVER, ByteOrder.LITTLE_ENDIAN)) & 0xffffffffL;
    265         }
    266         int nameLength = peekShort(LOCNAM - LOCVER);
    267         if (nameLength == 0) {
    268             throw new ZipException("Entry is not named");
    269         }
    270         int extraLength = peekShort(LOCEXT - LOCVER);
    271 
    272         String name = readString(nameLength);
    273         currentEntry = createZipEntry(name);
    274         currentEntry.time = ceLastModifiedTime;
    275         currentEntry.modDate = ceLastModifiedDate;
    276         currentEntry.setMethod(ceCompressionMethod);
    277         if (ceSize != -1) {
    278             currentEntry.setCrc(ceCrc);
    279             currentEntry.setSize(ceSize);
    280             currentEntry.setCompressedSize(ceCompressedSize);
    281         }
    282         if (extraLength > 0) {
    283             byte[] extraData = new byte[extraLength];
    284             Streams.readFully(in, extraData, 0, extraLength);
    285             currentEntry.setExtra(extraData);
    286             currentEntryIsZip64 = Zip64.parseZip64ExtendedInfo(currentEntry, false /* from central directory */);
    287         } else {
    288             currentEntryIsZip64 = false;
    289         }
    290 
    291         return currentEntry;
    292     }
    293 
    294     /**
    295      * Reads bytes from the current stream position returning the string representation.
    296      */
    297     private String readString(int byteLength) throws IOException {
    298         if (byteLength > stringBytesBuf.length) {
    299             stringBytesBuf = new byte[byteLength];
    300         }
    301         Streams.readFully(in, stringBytesBuf, 0, byteLength);
    302         // The number of chars will always be less than or equal to the number of bytes. It's
    303         // fine if this buffer is too long.
    304         if (byteLength > stringCharBuf.length) {
    305             stringCharBuf = new char[byteLength];
    306         }
    307         return ModifiedUtf8.decode(stringBytesBuf, stringCharBuf, 0, byteLength);
    308     }
    309 
    310     private int peekShort(int offset) {
    311         return Memory.peekShort(hdrBuf, offset, ByteOrder.LITTLE_ENDIAN) & 0xffff;
    312     }
    313 
    314     /**
    315      * Reads up to {@code byteCount} uncompressed bytes into the buffer
    316      * starting at {@code byteOffset}. Returns the number of bytes actually read, or -1.
    317      */
    318     @Override
    319     public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException {
    320         checkClosed();
    321         Arrays.checkOffsetAndCount(buffer.length, byteOffset, byteCount);
    322 
    323         if (inf.finished() || currentEntry == null) {
    324             return -1;
    325         }
    326 
    327         if (currentEntry.compressionMethod == ZipEntry.STORED) {
    328             int csize = (int) currentEntry.size;
    329             if (inRead >= csize) {
    330                 return -1;
    331             }
    332             if (lastRead >= len) {
    333                 lastRead = 0;
    334                 if ((len = in.read(buf)) == -1) {
    335                     eof = true;
    336                     return -1;
    337                 }
    338                 entryIn += len;
    339             }
    340             int toRead = byteCount > (len - lastRead) ? len - lastRead : byteCount;
    341             if ((csize - inRead) < toRead) {
    342                 toRead = csize - inRead;
    343             }
    344             System.arraycopy(buf, lastRead, buffer, byteOffset, toRead);
    345             lastRead += toRead;
    346             inRead += toRead;
    347             crc.update(buffer, byteOffset, toRead);
    348             return toRead;
    349         }
    350         if (inf.needsInput()) {
    351             fill();
    352             if (len > 0) {
    353                 entryIn += len;
    354             }
    355         }
    356         int read;
    357         try {
    358             read = inf.inflate(buffer, byteOffset, byteCount);
    359         } catch (DataFormatException e) {
    360             throw new ZipException(e.getMessage());
    361         }
    362         if (read == 0 && inf.finished()) {
    363             return -1;
    364         }
    365         crc.update(buffer, byteOffset, read);
    366         return read;
    367     }
    368 
    369     @Override
    370     public int available() throws IOException {
    371         checkClosed();
    372         // The InflaterInputStream contract says we must only return 0 or 1.
    373         return (currentEntry == null || inRead < currentEntry.size) ? 1 : 0;
    374     }
    375 
    376     /**
    377      * creates a {@link ZipEntry } with the given name.
    378      *
    379      * @param name
    380      *            the name of the entry.
    381      * @return the created {@code ZipEntry}.
    382      */
    383     protected ZipEntry createZipEntry(String name) {
    384         return new ZipEntry(name);
    385     }
    386 
    387     private void checkClosed() throws IOException {
    388         if (closed) {
    389             throw new IOException("Stream is closed");
    390         }
    391     }
    392 }
    393