Home | History | Annotate | Download | only in okio
      1 /*
      2  * Copyright (C) 2014 Square, Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package okio;
     17 
     18 import java.io.EOFException;
     19 import java.io.IOException;
     20 import java.util.zip.CRC32;
     21 import java.util.zip.Inflater;
     22 
     23 /**
     24  * A source that uses <a href="http://www.ietf.org/rfc/rfc1952.txt">GZIP</a> to
     25  * decompress data read from another source.
     26  */
     27 public final class GzipSource implements Source {
     28   private static final byte FHCRC = 1;
     29   private static final byte FEXTRA = 2;
     30   private static final byte FNAME = 3;
     31   private static final byte FCOMMENT = 4;
     32 
     33   private static final byte SECTION_HEADER = 0;
     34   private static final byte SECTION_BODY = 1;
     35   private static final byte SECTION_TRAILER = 2;
     36   private static final byte SECTION_DONE = 3;
     37 
     38   /** The current section. Always progresses forward. */
     39   private int section = SECTION_HEADER;
     40 
     41   /**
     42    * Our source should yield a GZIP header (which we consume directly), followed
     43    * by deflated bytes (which we consume via an InflaterSource), followed by a
     44    * GZIP trailer (which we also consume directly).
     45    */
     46   private final BufferedSource source;
     47 
     48   /** The inflater used to decompress the deflated body. */
     49   private final Inflater inflater;
     50 
     51   /**
     52    * The inflater source takes care of moving data between compressed source and
     53    * decompressed sink buffers.
     54    */
     55   private final InflaterSource inflaterSource;
     56 
     57   /** Checksum used to check both the GZIP header and decompressed body. */
     58   private final CRC32 crc = new CRC32();
     59 
     60   public GzipSource(Source source) {
     61     if (source == null) throw new IllegalArgumentException("source == null");
     62     this.inflater = new Inflater(true);
     63     this.source = Okio.buffer(source);
     64     this.inflaterSource = new InflaterSource(this.source, inflater);
     65   }
     66 
     67   @Override public long read(Buffer sink, long byteCount) throws IOException {
     68     if (byteCount < 0) throw new IllegalArgumentException("byteCount < 0: " + byteCount);
     69     if (byteCount == 0) return 0;
     70 
     71     // If we haven't consumed the header, we must consume it before anything else.
     72     if (section == SECTION_HEADER) {
     73       consumeHeader();
     74       section = SECTION_BODY;
     75     }
     76 
     77     // Attempt to read at least a byte of the body. If we do, we're done.
     78     if (section == SECTION_BODY) {
     79       long offset = sink.size;
     80       long result = inflaterSource.read(sink, byteCount);
     81       if (result != -1) {
     82         updateCrc(sink, offset, result);
     83         return result;
     84       }
     85       section = SECTION_TRAILER;
     86     }
     87 
     88     // The body is exhausted; time to read the trailer. We always consume the
     89     // trailer before returning a -1 exhausted result; that way if you read to
     90     // the end of a GzipSource you guarantee that the CRC has been checked.
     91     if (section == SECTION_TRAILER) {
     92       consumeTrailer();
     93       section = SECTION_DONE;
     94 
     95       // Gzip streams self-terminate: they return -1 before their underlying
     96       // source returns -1. Here we attempt to force the underlying stream to
     97       // return -1 which may trigger it to release its resources. If it doesn't
     98       // return -1, then our Gzip data finished prematurely!
     99       if (!source.exhausted()) {
    100         throw new IOException("gzip finished without exhausting source");
    101       }
    102     }
    103 
    104     return -1;
    105   }
    106 
    107   private void consumeHeader() throws IOException {
    108     // Read the 10-byte header. We peek at the flags byte first so we know if we
    109     // need to CRC the entire header. Then we read the magic ID1ID2 sequence.
    110     // We can skip everything else in the first 10 bytes.
    111     // +---+---+---+---+---+---+---+---+---+---+
    112     // |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
    113     // +---+---+---+---+---+---+---+---+---+---+
    114     source.require(10);
    115     byte flags = source.buffer().getByte(3);
    116     boolean fhcrc = ((flags >> FHCRC) & 1) == 1;
    117     if (fhcrc) updateCrc(source.buffer(), 0, 10);
    118 
    119     short id1id2 = source.readShort();
    120     checkEqual("ID1ID2", (short) 0x1f8b, id1id2);
    121     source.skip(8);
    122 
    123     // Skip optional extra fields.
    124     // +---+---+=================================+
    125     // | XLEN  |...XLEN bytes of "extra field"...| (more-->)
    126     // +---+---+=================================+
    127     if (((flags >> FEXTRA) & 1) == 1) {
    128       source.require(2);
    129       if (fhcrc) updateCrc(source.buffer(), 0, 2);
    130       int xlen = source.buffer().readShortLe();
    131       source.require(xlen);
    132       if (fhcrc) updateCrc(source.buffer(), 0, xlen);
    133       source.skip(xlen);
    134     }
    135 
    136     // Skip an optional 0-terminated name.
    137     // +=========================================+
    138     // |...original file name, zero-terminated...| (more-->)
    139     // +=========================================+
    140     if (((flags >> FNAME) & 1) == 1) {
    141       long index = source.indexOf((byte) 0);
    142       if (index == -1) throw new EOFException();
    143       if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
    144       source.skip(index + 1);
    145     }
    146 
    147     // Skip an optional 0-terminated comment.
    148     // +===================================+
    149     // |...file comment, zero-terminated...| (more-->)
    150     // +===================================+
    151     if (((flags >> FCOMMENT) & 1) == 1) {
    152       long index = source.indexOf((byte) 0);
    153       if (index == -1) throw new EOFException();
    154       if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
    155       source.skip(index + 1);
    156     }
    157 
    158     // Confirm the optional header CRC.
    159     // +---+---+
    160     // | CRC16 |
    161     // +---+---+
    162     if (fhcrc) {
    163       checkEqual("FHCRC", source.readShortLe(), (short) crc.getValue());
    164       crc.reset();
    165     }
    166   }
    167 
    168   private void consumeTrailer() throws IOException {
    169     // Read the eight-byte trailer. Confirm the body's CRC and size.
    170     // +---+---+---+---+---+---+---+---+
    171     // |     CRC32     |     ISIZE     |
    172     // +---+---+---+---+---+---+---+---+
    173     checkEqual("CRC", source.readIntLe(), (int) crc.getValue());
    174     checkEqual("ISIZE", source.readIntLe(), inflater.getTotalOut());
    175   }
    176 
    177   @Override public Timeout timeout() {
    178     return source.timeout();
    179   }
    180 
    181   @Override public void close() throws IOException {
    182     inflaterSource.close();
    183   }
    184 
    185   /** Updates the CRC with the given bytes. */
    186   private void updateCrc(Buffer buffer, long offset, long byteCount) {
    187     // Skip segments that we aren't checksumming.
    188     Segment s = buffer.head;
    189     for (; offset >= (s.limit - s.pos); s = s.next) {
    190       offset -= (s.limit - s.pos);
    191     }
    192 
    193     // Checksum one segment at a time.
    194     for (; byteCount > 0; s = s.next) {
    195       int pos = (int) (s.pos + offset);
    196       int toUpdate = (int) Math.min(s.limit - pos, byteCount);
    197       crc.update(s.data, pos, toUpdate);
    198       byteCount -= toUpdate;
    199       offset = 0;
    200     }
    201   }
    202 
    203   private void checkEqual(String name, int expected, int actual) throws IOException {
    204     if (actual != expected) {
    205       throw new IOException(String.format(
    206           "%s: actual 0x%08x != expected 0x%08x", name, actual, expected));
    207     }
    208   }
    209 }
    210