Home | History | Annotate | Download | only in zip
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package java.util.zip;
     19 
     20 import java.io.EOFException;
     21 import java.io.IOException;
     22 import java.io.InputStream;
     23 import java.io.PushbackInputStream;
     24 import java.nio.ByteOrder;
     25 import java.util.Arrays;
     26 import libcore.io.Memory;
     27 import libcore.io.Streams;
     28 
     29 /**
     30  * The {@code GZIPInputStream} class is used to read data stored in the GZIP
     31  * format, reading and decompressing GZIP data from the underlying stream into
     32  * its buffer.
     33  *
     34  * <h3>Example</h3>
     35  * <p>Using {@code GZIPInputStream} is easier than {@link ZipInputStream}
     36  * because GZIP is only for compression, and is not a container for multiple files.
     37  * This code decompresses the data from a GZIP stream, similar to the {@code gunzip(1)} utility.
     38  * <pre>
     39  * InputStream is = ...
     40  * GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(is));
     41  * try {
     42  *     // Reading from 'zis' gets you the uncompressed bytes...
     43  *     processStream(zis);
     44  * } finally {
     45  *     zis.close();
     46  * }
     47  * </pre>
     48  *
     49  * <p>Note that this class ignores all remaining data at the end of the last
     50  * GZIP member.
     51  */
     52 public class GZIPInputStream extends InflaterInputStream {
     53     private static final int FCOMMENT = 16;
     54 
     55     private static final int FEXTRA = 4;
     56 
     57     private static final int FHCRC = 2;
     58 
     59     private static final int FNAME = 8;
     60 
     61     private static final int GZIP_TRAILER_SIZE = 8;
     62 
     63     /**
     64      * The magic header for the GZIP format.
     65      */
     66     public static final int GZIP_MAGIC = 0x8b1f;
     67 
     68     /**
     69      * The checksum algorithm used when handling uncompressed data.
     70      */
     71     protected CRC32 crc = new CRC32();
     72 
     73     /**
     74      * Indicates the end of the input stream.
     75      */
     76     protected boolean eos = false;
     77 
     78     /**
     79      * Construct a {@code GZIPInputStream} to read from GZIP data from the
     80      * underlying stream.
     81      *
     82      * @param is
     83      *            the {@code InputStream} to read data from.
     84      * @throws IOException
     85      *             if an {@code IOException} occurs.
     86      */
     87     public GZIPInputStream(InputStream is) throws IOException {
     88         this(is, BUF_SIZE);
     89     }
     90 
     91     /**
     92      * Construct a {@code GZIPInputStream} to read from GZIP data from the
     93      * underlying stream. Set the internal buffer size to {@code size}.
     94      *
     95      * @param is
     96      *            the {@code InputStream} to read data from.
     97      * @param size
     98      *            the internal read buffer size.
     99      * @throws IOException
    100      *             if an {@code IOException} occurs.
    101      */
    102     public GZIPInputStream(InputStream is, int size) throws IOException {
    103         super(is, new Inflater(true), size);
    104 
    105         try {
    106             byte[] header = readHeader(is);
    107             final short magic = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN);
    108             if (magic != (short) GZIP_MAGIC) {
    109                 throw new IOException(String.format("unknown format (magic number %x)", magic));
    110             }
    111 
    112             parseGzipHeader(is, header, crc, buf);
    113         } catch (IOException e) {
    114             close(); // release the inflater
    115             throw e;
    116         }
    117     }
    118 
    119     /**
    120      * Closes this stream and any underlying streams.
    121      */
    122     @Override
    123     public void close() throws IOException {
    124         eos = true;
    125         super.close();
    126     }
    127 
    128     @Override
    129     public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException {
    130         if (closed) {
    131             throw new IOException("Stream is closed");
    132         }
    133         if (eos) {
    134             return -1;
    135         }
    136         Arrays.checkOffsetAndCount(buffer.length, byteOffset, byteCount);
    137 
    138         int bytesRead;
    139         try {
    140             bytesRead = super.read(buffer, byteOffset, byteCount);
    141         } finally {
    142             eos = eof; // update eos after every read(), even when it throws
    143         }
    144 
    145         if (bytesRead != -1) {
    146             crc.update(buffer, byteOffset, bytesRead);
    147         }
    148 
    149         if (eos) {
    150             verifyCrc();
    151             eos = maybeReadNextMember();
    152             if (!eos) {
    153                 crc.reset();
    154                 inf.reset();
    155                 eof = false;
    156                 len = 0;
    157             }
    158         }
    159 
    160         return bytesRead;
    161     }
    162 
    163     private boolean maybeReadNextMember() throws IOException {
    164         // If we have any unconsumed data in the inflater buffer, we have to
    165         // scan that first. The fact that we've reached here implies we've
    166         // successfully consumed the GZIP trailer.
    167         final int remaining = inf.getRemaining() - GZIP_TRAILER_SIZE;
    168         if (remaining > 0) {
    169             // NOTE: We make sure we create a pushback stream exactly once,
    170             // even if the input stream contains multiple members.
    171             //
    172             // The push back stream we create must therefore be able to contain
    173             // (worst case) the entire buffer even though there may be fewer bytes
    174             // remaining when it is first created.
    175             if (!(in instanceof PushbackInputStream)) {
    176                 in = new PushbackInputStream(in, buf.length);
    177             }
    178             ((PushbackInputStream) in).unread(buf,
    179                     inf.getCurrentOffset() + GZIP_TRAILER_SIZE, remaining);
    180         }
    181 
    182         final byte[] buffer;
    183         try {
    184             buffer = readHeader(in);
    185         } catch (EOFException eof) {
    186             // We've reached the end of the stream and there are no more members
    187             // to read. Note that we might also hit this if there are fewer than
    188             // GZIP_HEADER_LENGTH bytes at the end of a member. We don't care
    189             // because we're specified to ignore all data at the end of the last
    190             // gzip record.
    191             return true;
    192         }
    193 
    194         final short magic = Memory.peekShort(buffer, 0, ByteOrder.LITTLE_ENDIAN);
    195         if (magic != (short) GZIP_MAGIC) {
    196             // Don't throw here because we've already read one valid member
    197             // from this stream.
    198             return true;
    199         }
    200 
    201         // We've encountered the gzip magic number, so we assume there's another
    202         // member in the stream.
    203         parseGzipHeader(in, buffer, crc, buf);
    204         return false;
    205     }
    206 
    207     private static byte[] readHeader(InputStream in) throws IOException {
    208         byte[] header = new byte[10];
    209         Streams.readFully(in, header, 0, header.length);
    210         return header;
    211     }
    212 
    213     private static void parseGzipHeader(InputStream in, byte[] header,
    214             CRC32 crc, byte[] scratch) throws IOException {
    215         final byte flags = header[3];
    216         final boolean hcrc = (flags & FHCRC) != 0;
    217         if (hcrc) {
    218             crc.update(header, 0, header.length);
    219         }
    220         if ((flags & FEXTRA) != 0) {
    221             Streams.readFully(in, header, 0, 2);
    222             if (hcrc) {
    223                 crc.update(header, 0, 2);
    224             }
    225             int length = Memory.peekShort(scratch, 0, ByteOrder.LITTLE_ENDIAN) & 0xffff;
    226             while (length > 0) {
    227                 int max = length > scratch.length ? scratch.length : length;
    228                 int result = in.read(scratch, 0, max);
    229                 if (result == -1) {
    230                     throw new EOFException();
    231                 }
    232                 if (hcrc) {
    233                     crc.update(scratch, 0, result);
    234                 }
    235                 length -= result;
    236             }
    237         }
    238         if ((flags & FNAME) != 0) {
    239             readZeroTerminated(in, crc, hcrc);
    240         }
    241         if ((flags & FCOMMENT) != 0) {
    242             readZeroTerminated(in, crc, hcrc);
    243         }
    244         if (hcrc) {
    245             Streams.readFully(in, header, 0, 2);
    246             short crc16 = Memory.peekShort(scratch, 0, ByteOrder.LITTLE_ENDIAN);
    247             if ((short) crc.getValue() != crc16) {
    248                 throw new IOException("CRC mismatch");
    249             }
    250             crc.reset();
    251         }
    252     }
    253 
    254     private void verifyCrc() throws IOException {
    255         // Get non-compressed bytes read by fill
    256         int size = inf.getRemaining();
    257         final int trailerSize = 8; // crc (4 bytes) + total out (4 bytes)
    258         byte[] b = new byte[trailerSize];
    259         int copySize = (size > trailerSize) ? trailerSize : size;
    260 
    261         System.arraycopy(buf, len - size, b, 0, copySize);
    262         Streams.readFully(in, b, copySize, trailerSize - copySize);
    263 
    264         if (Memory.peekInt(b, 0, ByteOrder.LITTLE_ENDIAN) != (int) crc.getValue()) {
    265             throw new IOException("CRC mismatch");
    266         }
    267         if (Memory.peekInt(b, 4, ByteOrder.LITTLE_ENDIAN) != inf.getTotalOut()) {
    268             throw new IOException("Size mismatch");
    269         }
    270     }
    271 
    272     private static void readZeroTerminated(InputStream in, CRC32 crc, boolean hcrc)
    273             throws IOException {
    274         int result;
    275         // TODO: Fix these single byte reads. This method is used to consume the
    276         // header FNAME & FCOMMENT which aren't widely used in gzip files.
    277         while ((result = in.read()) > 0) {
    278             if (hcrc) {
    279                 crc.update(result);
    280             }
    281         }
    282         if (result == -1) {
    283             throw new EOFException();
    284         }
    285         // Add the zero
    286         if (hcrc) {
    287             crc.update(result);
    288         }
    289     }
    290 }
    291