Home | History | Annotate | Download | only in io
      1 /*
      2  * Copyright (C) 2012 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.io;
     18 
     19 import static com.google.common.base.Preconditions.checkArgument;
     20 import static com.google.common.base.Preconditions.checkNotNull;
     21 
     22 import com.google.common.annotations.Beta;
     23 import com.google.common.base.Ascii;
     24 import com.google.common.collect.ImmutableList;
     25 import com.google.common.hash.Funnels;
     26 import com.google.common.hash.HashCode;
     27 import com.google.common.hash.HashFunction;
     28 import com.google.common.hash.Hasher;
     29 
     30 import java.io.BufferedInputStream;
     31 import java.io.ByteArrayInputStream;
     32 import java.io.IOException;
     33 import java.io.InputStream;
     34 import java.io.InputStreamReader;
     35 import java.io.OutputStream;
     36 import java.io.Reader;
     37 import java.nio.charset.Charset;
     38 import java.util.Arrays;
     39 import java.util.Iterator;
     40 
     41 /**
     42  * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a
     43  * {@code ByteSource} is not an open, stateful stream for input that can be read and closed.
     44  * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances.
     45  *
     46  * <p>{@code ByteSource} provides two kinds of methods:
     47  * <ul>
     48  *   <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
     49  *   instance each time they are called. The caller is responsible for ensuring that the returned
     50  *   stream is closed.
     51  *   <li><b>Convenience methods:</b> These are implementations of common operations that are
     52  *   typically implemented by opening a stream using one of the methods in the first category, doing
     53  *   something and finally closing the stream that was opened.
     54  * </ul>
     55  *
     56  * @since 14.0
     57  * @author Colin Decker
     58  */
     59 public abstract class ByteSource {
     60 
     61   private static final int BUF_SIZE = 0x1000; // 4K
     62 
     63   /**
     64    * Constructor for use by subclasses.
     65    */
     66   protected ByteSource() {}
     67 
     68   /**
     69    * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
     70    * as characters using the given {@link Charset}.
     71    */
     72   public CharSource asCharSource(Charset charset) {
     73     return new AsCharSource(charset);
     74   }
     75 
     76   /**
     77    * Opens a new {@link InputStream} for reading from this source. This method should return a new,
     78    * independent stream each time it is called.
     79    *
     80    * <p>The caller is responsible for ensuring that the returned stream is closed.
     81    *
     82    * @throws IOException if an I/O error occurs in the process of opening the stream
     83    */
     84   public abstract InputStream openStream() throws IOException;
     85 
     86   /**
     87    * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
     88    * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
     89    * delegate to {@link #openStream()} when the stream returned by that method does not benefit
     90    * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should
     91    * return a new, independent stream each time it is called.
     92    *
     93    * <p>The caller is responsible for ensuring that the returned stream is closed.
     94    *
     95    * @throws IOException if an I/O error occurs in the process of opening the stream
     96    * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
     97    */
     98   public InputStream openBufferedStream() throws IOException {
     99     InputStream in = openStream();
    100     return (in instanceof BufferedInputStream)
    101         ? (BufferedInputStream) in
    102         : new BufferedInputStream(in);
    103   }
    104 
    105   /**
    106    * Returns a view of a slice of this byte source that is at most {@code length} bytes long
    107    * starting at the given {@code offset}.
    108    *
    109    * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
    110    */
    111   public ByteSource slice(long offset, long length) {
    112     return new SlicedByteSource(offset, length);
    113   }
    114 
    115   /**
    116    * Returns whether the source has zero bytes. The default implementation is to open a stream and
    117    * check for EOF.
    118    *
    119    * @throws IOException if an I/O error occurs
    120    * @since 15.0
    121    */
    122   public boolean isEmpty() throws IOException {
    123     Closer closer = Closer.create();
    124     try {
    125       InputStream in = closer.register(openStream());
    126       return in.read() == -1;
    127     } catch (Throwable e) {
    128       throw closer.rethrow(e);
    129     } finally {
    130       closer.close();
    131     }
    132   }
    133 
    134   /**
    135    * Returns the size of this source in bytes. For most implementations, this is a heavyweight
    136    * operation that will open a stream, read (or {@link InputStream#skip(long) skip}, if possible)
    137    * to the end of the stream and return the total number of bytes that were read.
    138    *
    139    * <p>For some sources, such as a file, this method may use a more efficient implementation. Note
    140    * that in such cases, it is <i>possible</i> that this method will return a different number of
    141    * bytes than would be returned by reading all of the bytes (for example, some special files may
    142    * return a size of 0 despite actually having content when read).
    143    *
    144    * <p>In either case, if this is a mutable source such as a file, the size it returns may not be
    145    * the same number of bytes a subsequent read would return.
    146    *
    147    * @throws IOException if an I/O error occurs in the process of reading the size of this source
    148    */
    149   public long size() throws IOException {
    150     Closer closer = Closer.create();
    151     try {
    152       InputStream in = closer.register(openStream());
    153       return countBySkipping(in);
    154     } catch (IOException e) {
    155       // skip may not be supported... at any rate, try reading
    156     } finally {
    157       closer.close();
    158     }
    159 
    160     closer = Closer.create();
    161     try {
    162       InputStream in = closer.register(openStream());
    163       return countByReading(in);
    164     } catch (Throwable e) {
    165       throw closer.rethrow(e);
    166     } finally {
    167       closer.close();
    168     }
    169   }
    170 
    171   /**
    172    * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the
    173    * first call to skip threw, in which case skip may just not be supported.
    174    */
    175   private long countBySkipping(InputStream in) throws IOException {
    176     long count = 0;
    177     while (true) {
    178       // don't try to skip more than available()
    179       // things may work really wrong with FileInputStream otherwise
    180       long skipped = in.skip(Math.min(in.available(), Integer.MAX_VALUE));
    181       if (skipped <= 0) {
    182         if (in.read() == -1) {
    183           return count;
    184         } else if (count == 0 && in.available() == 0) {
    185           // if available is still zero after reading a single byte, it
    186           // will probably always be zero, so we should countByReading
    187           throw new IOException();
    188         }
    189         count++;
    190       } else {
    191         count += skipped;
    192       }
    193     }
    194   }
    195 
    196   private static final byte[] countBuffer = new byte[BUF_SIZE];
    197 
    198   private long countByReading(InputStream in) throws IOException {
    199     long count = 0;
    200     long read;
    201     while ((read = in.read(countBuffer)) != -1) {
    202       count += read;
    203     }
    204     return count;
    205   }
    206 
    207   /**
    208    * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
    209    * {@code output}.
    210    *
    211    * @throws IOException if an I/O error occurs in the process of reading from this source or
    212    *     writing to {@code output}
    213    */
    214   public long copyTo(OutputStream output) throws IOException {
    215     checkNotNull(output);
    216 
    217     Closer closer = Closer.create();
    218     try {
    219       InputStream in = closer.register(openStream());
    220       return ByteStreams.copy(in, output);
    221     } catch (Throwable e) {
    222       throw closer.rethrow(e);
    223     } finally {
    224       closer.close();
    225     }
    226   }
    227 
    228   /**
    229    * Copies the contents of this byte source to the given {@code ByteSink}.
    230    *
    231    * @throws IOException if an I/O error occurs in the process of reading from this source or
    232    *     writing to {@code sink}
    233    */
    234   public long copyTo(ByteSink sink) throws IOException {
    235     checkNotNull(sink);
    236 
    237     Closer closer = Closer.create();
    238     try {
    239       InputStream in = closer.register(openStream());
    240       OutputStream out = closer.register(sink.openStream());
    241       return ByteStreams.copy(in, out);
    242     } catch (Throwable e) {
    243       throw closer.rethrow(e);
    244     } finally {
    245       closer.close();
    246     }
    247   }
    248 
    249   /**
    250    * Reads the full contents of this byte source as a byte array.
    251    *
    252    * @throws IOException if an I/O error occurs in the process of reading from this source
    253    */
    254   public byte[] read() throws IOException {
    255     Closer closer = Closer.create();
    256     try {
    257       InputStream in = closer.register(openStream());
    258       return ByteStreams.toByteArray(in);
    259     } catch (Throwable e) {
    260       throw closer.rethrow(e);
    261     } finally {
    262       closer.close();
    263     }
    264   }
    265 
    266   /**
    267    * Reads the contents of this byte source using the given {@code processor} to process bytes as
    268    * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
    269    * Returns the result produced by the processor.
    270    *
    271    * @throws IOException if an I/O error occurs in the process of reading from this source or if
    272    *     {@code processor} throws an {@code IOException}
    273    * @since 16.0
    274    */
    275   @Beta
    276   public <T> T read(ByteProcessor<T> processor) throws IOException {
    277     checkNotNull(processor);
    278 
    279     Closer closer = Closer.create();
    280     try {
    281       InputStream in = closer.register(openStream());
    282       return ByteStreams.readBytes(in, processor);
    283     } catch (Throwable e) {
    284       throw closer.rethrow(e);
    285     } finally {
    286       closer.close();
    287     }
    288   }
    289 
    290   /**
    291    * Hashes the contents of this byte source using the given hash function.
    292    *
    293    * @throws IOException if an I/O error occurs in the process of reading from this source
    294    */
    295   public HashCode hash(HashFunction hashFunction) throws IOException {
    296     Hasher hasher = hashFunction.newHasher();
    297     copyTo(Funnels.asOutputStream(hasher));
    298     return hasher.hash();
    299   }
    300 
    301   /**
    302    * Checks that the contents of this byte source are equal to the contents of the given byte
    303    * source.
    304    *
    305    * @throws IOException if an I/O error occurs in the process of reading from this source or
    306    *     {@code other}
    307    */
    308   public boolean contentEquals(ByteSource other) throws IOException {
    309     checkNotNull(other);
    310 
    311     byte[] buf1 = new byte[BUF_SIZE];
    312     byte[] buf2 = new byte[BUF_SIZE];
    313 
    314     Closer closer = Closer.create();
    315     try {
    316       InputStream in1 = closer.register(openStream());
    317       InputStream in2 = closer.register(other.openStream());
    318       while (true) {
    319         int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE);
    320         int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE);
    321         if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
    322           return false;
    323         } else if (read1 != BUF_SIZE) {
    324           return true;
    325         }
    326       }
    327     } catch (Throwable e) {
    328       throw closer.rethrow(e);
    329     } finally {
    330       closer.close();
    331     }
    332   }
    333 
    334   /**
    335    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
    336    * the source will contain the concatenated data from the streams of the underlying sources.
    337    *
    338    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
    339    * close the open underlying stream.
    340    *
    341    * @param sources the sources to concatenate
    342    * @return a {@code ByteSource} containing the concatenated data
    343    * @since 15.0
    344    */
    345   public static ByteSource concat(Iterable<? extends ByteSource> sources) {
    346     return new ConcatenatedByteSource(sources);
    347   }
    348 
    349   /**
    350    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
    351    * the source will contain the concatenated data from the streams of the underlying sources.
    352    *
    353    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
    354    * close the open underlying stream.
    355    *
    356    * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
    357    * method is called. This will fail if the iterator is infinite and may cause problems if the
    358    * iterator eagerly fetches data for each source when iterated (rather than producing sources
    359    * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
    360    * overload if possible.
    361    *
    362    * @param sources the sources to concatenate
    363    * @return a {@code ByteSource} containing the concatenated data
    364    * @throws NullPointerException if any of {@code sources} is {@code null}
    365    * @since 15.0
    366    */
    367   public static ByteSource concat(Iterator<? extends ByteSource> sources) {
    368     return concat(ImmutableList.copyOf(sources));
    369   }
    370 
    371   /**
    372    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
    373    * the source will contain the concatenated data from the streams of the underlying sources.
    374    *
    375    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
    376    * close the open underlying stream.
    377    *
    378    * @param sources the sources to concatenate
    379    * @return a {@code ByteSource} containing the concatenated data
    380    * @throws NullPointerException if any of {@code sources} is {@code null}
    381    * @since 15.0
    382    */
    383   public static ByteSource concat(ByteSource... sources) {
    384     return concat(ImmutableList.copyOf(sources));
    385   }
    386 
    387   /**
    388    * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
    389    * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
    390    *
    391    * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
    392    */
    393   public static ByteSource wrap(byte[] b) {
    394     return new ByteArrayByteSource(b);
    395   }
    396 
    397   /**
    398    * Returns an immutable {@link ByteSource} that contains no bytes.
    399    *
    400    * @since 15.0
    401    */
    402   public static ByteSource empty() {
    403     return EmptyByteSource.INSTANCE;
    404   }
    405 
    406   /**
    407    * A char source that reads bytes from this source and decodes them as characters using a
    408    * charset.
    409    */
    410   private final class AsCharSource extends CharSource {
    411 
    412     private final Charset charset;
    413 
    414     private AsCharSource(Charset charset) {
    415       this.charset = checkNotNull(charset);
    416     }
    417 
    418     @Override
    419     public Reader openStream() throws IOException {
    420       return new InputStreamReader(ByteSource.this.openStream(), charset);
    421     }
    422 
    423     @Override
    424     public String toString() {
    425       return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
    426     }
    427   }
    428 
    429   /**
    430    * A view of a subsection of the containing byte source.
    431    */
    432   private final class SlicedByteSource extends ByteSource {
    433 
    434     private final long offset;
    435     private final long length;
    436 
    437     private SlicedByteSource(long offset, long length) {
    438       checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
    439       checkArgument(length >= 0, "length (%s) may not be negative", length);
    440       this.offset = offset;
    441       this.length = length;
    442     }
    443 
    444     @Override
    445     public InputStream openStream() throws IOException {
    446       return sliceStream(ByteSource.this.openStream());
    447     }
    448 
    449     @Override
    450     public InputStream openBufferedStream() throws IOException {
    451       return sliceStream(ByteSource.this.openBufferedStream());
    452     }
    453 
    454     private InputStream sliceStream(InputStream in) throws IOException {
    455       if (offset > 0) {
    456         try {
    457           ByteStreams.skipFully(in, offset);
    458         } catch (Throwable e) {
    459           Closer closer = Closer.create();
    460           closer.register(in);
    461           try {
    462             throw closer.rethrow(e);
    463           } finally {
    464             closer.close();
    465           }
    466         }
    467       }
    468       return ByteStreams.limit(in, length);
    469     }
    470 
    471     @Override
    472     public ByteSource slice(long offset, long length) {
    473       checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
    474       checkArgument(length >= 0, "length (%s) may not be negative", length);
    475       long maxLength = this.length - offset;
    476       return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
    477     }
    478 
    479     @Override
    480     public boolean isEmpty() throws IOException {
    481       return length == 0 || super.isEmpty();
    482     }
    483 
    484     @Override
    485     public String toString() {
    486       return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
    487     }
    488   }
    489 
    490   private static class ByteArrayByteSource extends ByteSource {
    491 
    492     protected final byte[] bytes;
    493 
    494     protected ByteArrayByteSource(byte[] bytes) {
    495       this.bytes = checkNotNull(bytes);
    496     }
    497 
    498     @Override
    499     public InputStream openStream() {
    500       return new ByteArrayInputStream(bytes);
    501     }
    502 
    503     @Override
    504     public InputStream openBufferedStream() throws IOException {
    505       return openStream();
    506     }
    507 
    508     @Override
    509     public boolean isEmpty() {
    510       return bytes.length == 0;
    511     }
    512 
    513     @Override
    514     public long size() {
    515       return bytes.length;
    516     }
    517 
    518     @Override
    519     public byte[] read() {
    520       return bytes.clone();
    521     }
    522 
    523     @Override
    524     public long copyTo(OutputStream output) throws IOException {
    525       output.write(bytes);
    526       return bytes.length;
    527     }
    528 
    529     @Override
    530     public <T> T read(ByteProcessor<T> processor) throws IOException {
    531       processor.processBytes(bytes, 0, bytes.length);
    532       return processor.getResult();
    533     }
    534 
    535     @Override
    536     public HashCode hash(HashFunction hashFunction) throws IOException {
    537       return hashFunction.hashBytes(bytes);
    538     }
    539 
    540     // TODO(user): Possibly override slice()
    541 
    542     @Override
    543     public String toString() {
    544       return "ByteSource.wrap("
    545           + Ascii.truncate(BaseEncoding.base16().encode(bytes), 30, "...") + ")";
    546     }
    547   }
    548 
    549   private static final class EmptyByteSource extends ByteArrayByteSource {
    550 
    551     private static final EmptyByteSource INSTANCE = new EmptyByteSource();
    552 
    553     private EmptyByteSource() {
    554       super(new byte[0]);
    555     }
    556 
    557     @Override
    558     public CharSource asCharSource(Charset charset) {
    559       checkNotNull(charset);
    560       return CharSource.empty();
    561     }
    562 
    563     @Override
    564     public byte[] read() {
    565       return bytes; // length is 0, no need to clone
    566     }
    567 
    568     @Override
    569     public String toString() {
    570       return "ByteSource.empty()";
    571     }
    572   }
    573 
    574   private static final class ConcatenatedByteSource extends ByteSource {
    575 
    576     private final Iterable<? extends ByteSource> sources;
    577 
    578     ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
    579       this.sources = checkNotNull(sources);
    580     }
    581 
    582     @Override
    583     public InputStream openStream() throws IOException {
    584       return new MultiInputStream(sources.iterator());
    585     }
    586 
    587     @Override
    588     public boolean isEmpty() throws IOException {
    589       for (ByteSource source : sources) {
    590         if (!source.isEmpty()) {
    591           return false;
    592         }
    593       }
    594       return true;
    595     }
    596 
    597     @Override
    598     public long size() throws IOException {
    599       long result = 0L;
    600       for (ByteSource source : sources) {
    601         result += source.size();
    602       }
    603       return result;
    604     }
    605 
    606     @Override
    607     public String toString() {
    608       return "ByteSource.concat(" + sources + ")";
    609     }
    610   }
    611 }
    612