Home | History | Annotate | Download | only in io
      1 /*
      2  * Copyright (C) 2012 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.common.io;
     18 
     19 import static com.google.common.base.Preconditions.checkNotNull;
     20 
     21 import com.google.common.annotations.Beta;
     22 import com.google.common.base.Ascii;
     23 import com.google.common.base.Splitter;
     24 import com.google.common.collect.AbstractIterator;
     25 import com.google.common.collect.ImmutableList;
     26 import com.google.common.collect.Lists;
     27 
     28 import java.io.BufferedReader;
     29 import java.io.IOException;
     30 import java.io.Reader;
     31 import java.io.Writer;
     32 import java.nio.charset.Charset;
     33 import java.util.Iterator;
     34 import java.util.List;
     35 import java.util.regex.Pattern;
     36 
     37 import javax.annotation.Nullable;
     38 
     39 /**
     40  * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
     41  * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
     42  * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
     43  *
     44  * <p>{@code CharSource} provides two kinds of methods:
     45  * <ul>
     46  *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
     47  *   instance each time they are called. The caller is responsible for ensuring that the returned
     48  *   reader is closed.
     49  *   <li><b>Convenience methods:</b> These are implementations of common operations that are
     50  *   typically implemented by opening a reader using one of the methods in the first category,
     51  *   doing something and finally closing the reader that was opened.
     52  * </ul>
     53  *
     54  * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the
     55  * source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
     56  * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
     57  * there to be an empty line at the end if the contents are terminated with a line separator.
     58  *
     59  * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
     60  * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
     61  *
     62  * @since 14.0
     63  * @author Colin Decker
     64  */
     65 public abstract class CharSource {
     66 
     67   /**
     68    * Constructor for use by subclasses.
     69    */
     70   protected CharSource() {}
     71 
     72   /**
     73    * Opens a new {@link Reader} for reading from this source. This method should return a new,
     74    * independent reader each time it is called.
     75    *
     76    * <p>The caller is responsible for ensuring that the returned reader is closed.
     77    *
     78    * @throws IOException if an I/O error occurs in the process of opening the reader
     79    */
     80   public abstract Reader openStream() throws IOException;
     81 
     82   /**
     83    * Opens a new {@link BufferedReader} for reading from this source. This method should return a
     84    * new, independent reader each time it is called.
     85    *
     86    * <p>The caller is responsible for ensuring that the returned reader is closed.
     87    *
     88    * @throws IOException if an I/O error occurs in the process of opening the reader
     89    */
     90   public BufferedReader openBufferedStream() throws IOException {
     91     Reader reader = openStream();
     92     return (reader instanceof BufferedReader)
     93         ? (BufferedReader) reader
     94         : new BufferedReader(reader);
     95   }
     96 
     97   /**
     98    * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
     99    * Does not close {@code appendable} if it is {@code Closeable}.
    100    *
    101    * @throws IOException if an I/O error occurs in the process of reading from this source or
    102    *     writing to {@code appendable}
    103    */
    104   public long copyTo(Appendable appendable) throws IOException {
    105     checkNotNull(appendable);
    106 
    107     Closer closer = Closer.create();
    108     try {
    109       Reader reader = closer.register(openStream());
    110       return CharStreams.copy(reader, appendable);
    111     } catch (Throwable e) {
    112       throw closer.rethrow(e);
    113     } finally {
    114       closer.close();
    115     }
    116   }
    117 
    118   /**
    119    * Copies the contents of this source to the given sink.
    120    *
    121    * @throws IOException if an I/O error occurs in the process of reading from this source or
    122    *     writing to {@code sink}
    123    */
    124   public long copyTo(CharSink sink) throws IOException {
    125     checkNotNull(sink);
    126 
    127     Closer closer = Closer.create();
    128     try {
    129       Reader reader = closer.register(openStream());
    130       Writer writer = closer.register(sink.openStream());
    131       return CharStreams.copy(reader, writer);
    132     } catch (Throwable e) {
    133       throw closer.rethrow(e);
    134     } finally {
    135       closer.close();
    136     }
    137   }
    138 
    139   /**
    140    * Reads the contents of this source as a string.
    141    *
    142    * @throws IOException if an I/O error occurs in the process of reading from this source
    143    */
    144   public String read() throws IOException {
    145     Closer closer = Closer.create();
    146     try {
    147       Reader reader = closer.register(openStream());
    148       return CharStreams.toString(reader);
    149     } catch (Throwable e) {
    150       throw closer.rethrow(e);
    151     } finally {
    152       closer.close();
    153     }
    154   }
    155 
    156   /**
    157    * Reads the first link of this source as a string. Returns {@code null} if this source is empty.
    158    *
    159    * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
    160    * {@code \r\n}, does not include the line separator in the returned line and does not consider
    161    * there to be an extra empty line at the end if the content is terminated with a line separator.
    162    *
    163    * @throws IOException if an I/O error occurs in the process of reading from this source
    164    */
    165   public @Nullable String readFirstLine() throws IOException {
    166     Closer closer = Closer.create();
    167     try {
    168       BufferedReader reader = closer.register(openBufferedStream());
    169       return reader.readLine();
    170     } catch (Throwable e) {
    171       throw closer.rethrow(e);
    172     } finally {
    173       closer.close();
    174     }
    175   }
    176 
    177   /**
    178    * Reads all the lines of this source as a list of strings. The returned list will be empty if
    179    * this source is empty.
    180    *
    181    * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
    182    * {@code \r\n}, does not include the line separator in the returned lines and does not consider
    183    * there to be an extra empty line at the end if the content is terminated with a line separator.
    184    *
    185    * @throws IOException if an I/O error occurs in the process of reading from this source
    186    */
    187   public ImmutableList<String> readLines() throws IOException {
    188     Closer closer = Closer.create();
    189     try {
    190       BufferedReader reader = closer.register(openBufferedStream());
    191       List<String> result = Lists.newArrayList();
    192       String line;
    193       while ((line = reader.readLine()) != null) {
    194         result.add(line);
    195       }
    196       return ImmutableList.copyOf(result);
    197     } catch (Throwable e) {
    198       throw closer.rethrow(e);
    199     } finally {
    200       closer.close();
    201     }
    202   }
    203 
    204   /**
    205    * Reads lines of text from this source, processing each line as it is read using the given
    206    * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
    207    * returns {@code false} and returns the result produced by the processor.
    208    *
    209    * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
    210    * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor}
    211    * and does not consider there to be an extra empty line at the end if the content is terminated
    212    * with a line separator.
    213    *
    214    * @throws IOException if an I/O error occurs in the process of reading from this source or if
    215    *     {@code processor} throws an {@code IOException}
    216    * @since 16.0
    217    */
    218   @Beta
    219   public <T> T readLines(LineProcessor<T> processor) throws IOException {
    220     checkNotNull(processor);
    221 
    222     Closer closer = Closer.create();
    223     try {
    224       Reader reader = closer.register(openStream());
    225       return CharStreams.readLines(reader, processor);
    226     } catch (Throwable e) {
    227       throw closer.rethrow(e);
    228     } finally {
    229       closer.close();
    230     }
    231   }
    232 
    233   /**
    234    * Returns whether the source has zero chars. The default implementation is to open a stream and
    235    * check for EOF.
    236    *
    237    * @throws IOException if an I/O error occurs
    238    * @since 15.0
    239    */
    240   public boolean isEmpty() throws IOException {
    241     Closer closer = Closer.create();
    242     try {
    243       Reader reader = closer.register(openStream());
    244       return reader.read() == -1;
    245     } catch (Throwable e) {
    246       throw closer.rethrow(e);
    247     } finally {
    248       closer.close();
    249     }
    250   }
    251 
    252   /**
    253    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
    254    * the source will contain the concatenated data from the streams of the underlying sources.
    255    *
    256    * <p>Only one underlying stream will be open at a time. Closing the  concatenated stream will
    257    * close the open underlying stream.
    258    *
    259    * @param sources the sources to concatenate
    260    * @return a {@code CharSource} containing the concatenated data
    261    * @since 15.0
    262    */
    263   public static CharSource concat(Iterable<? extends CharSource> sources) {
    264     return new ConcatenatedCharSource(sources);
    265   }
    266 
    267   /**
    268    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
    269    * the source will contain the concatenated data from the streams of the underlying sources.
    270    *
    271    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
    272    * close the open underlying stream.
    273    *
    274    * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
    275    * method is called. This will fail if the iterator is infinite and may cause problems if the
    276    * iterator eagerly fetches data for each source when iterated (rather than producing sources
    277    * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
    278    * overload if possible.
    279    *
    280    * @param sources the sources to concatenate
    281    * @return a {@code CharSource} containing the concatenated data
    282    * @throws NullPointerException if any of {@code sources} is {@code null}
    283    * @since 15.0
    284    */
    285   public static CharSource concat(Iterator<? extends CharSource> sources) {
    286     return concat(ImmutableList.copyOf(sources));
    287   }
    288 
    289   /**
    290    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
    291    * the source will contain the concatenated data from the streams of the underlying sources.
    292    *
    293    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
    294    * close the open underlying stream.
    295    *
    296    * @param sources the sources to concatenate
    297    * @return a {@code CharSource} containing the concatenated data
    298    * @throws NullPointerException if any of {@code sources} is {@code null}
    299    * @since 15.0
    300    */
    301   public static CharSource concat(CharSource... sources) {
    302     return concat(ImmutableList.copyOf(sources));
    303   }
    304 
    305   /**
    306    * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
    307    * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
    308    * the {@code charSequence} is mutated while it is being read, so don't do that.
    309    *
    310    * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
    311    */
    312   public static CharSource wrap(CharSequence charSequence) {
    313     return new CharSequenceCharSource(charSequence);
    314   }
    315 
    316   /**
    317    * Returns an immutable {@link CharSource} that contains no characters.
    318    *
    319    * @since 15.0
    320    */
    321   public static CharSource empty() {
    322     return EmptyCharSource.INSTANCE;
    323   }
    324 
    325   private static class CharSequenceCharSource extends CharSource {
    326 
    327     private static final Splitter LINE_SPLITTER
    328         = Splitter.on(Pattern.compile("\r\n|\n|\r"));
    329 
    330     private final CharSequence seq;
    331 
    332     protected CharSequenceCharSource(CharSequence seq) {
    333       this.seq = checkNotNull(seq);
    334     }
    335 
    336     @Override
    337     public Reader openStream() {
    338       return new CharSequenceReader(seq);
    339     }
    340 
    341     @Override
    342     public String read() {
    343       return seq.toString();
    344     }
    345 
    346     @Override
    347     public boolean isEmpty() {
    348       return seq.length() == 0;
    349     }
    350 
    351     /**
    352      * Returns an iterable over the lines in the string. If the string ends in
    353      * a newline, a final empty string is not included to match the behavior of
    354      * BufferedReader/LineReader.readLine().
    355      */
    356     private Iterable<String> lines() {
    357       return new Iterable<String>() {
    358         @Override
    359         public Iterator<String> iterator() {
    360           return new AbstractIterator<String>() {
    361             Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
    362 
    363             @Override
    364             protected String computeNext() {
    365               if (lines.hasNext()) {
    366                 String next = lines.next();
    367                 // skip last line if it's empty
    368                 if (lines.hasNext() || !next.isEmpty()) {
    369                   return next;
    370                 }
    371               }
    372               return endOfData();
    373             }
    374           };
    375         }
    376       };
    377     }
    378 
    379     @Override
    380     public String readFirstLine() {
    381       Iterator<String> lines = lines().iterator();
    382       return lines.hasNext() ? lines.next() : null;
    383     }
    384 
    385     @Override
    386     public ImmutableList<String> readLines() {
    387       return ImmutableList.copyOf(lines());
    388     }
    389 
    390     @Override
    391     public <T> T readLines(LineProcessor<T> processor) throws IOException {
    392       for (String line : lines()) {
    393         if (!processor.processLine(line)) {
    394           break;
    395         }
    396       }
    397       return processor.getResult();
    398     }
    399 
    400     @Override
    401     public String toString() {
    402       return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
    403     }
    404   }
    405 
    406   private static final class EmptyCharSource extends CharSequenceCharSource {
    407 
    408     private static final EmptyCharSource INSTANCE = new EmptyCharSource();
    409 
    410     private EmptyCharSource() {
    411       super("");
    412     }
    413 
    414     @Override
    415     public String toString() {
    416       return "CharSource.empty()";
    417     }
    418   }
    419 
    420   private static final class ConcatenatedCharSource extends CharSource {
    421 
    422     private final Iterable<? extends CharSource> sources;
    423 
    424     ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
    425       this.sources = checkNotNull(sources);
    426     }
    427 
    428     @Override
    429     public Reader openStream() throws IOException {
    430       return new MultiReader(sources.iterator());
    431     }
    432 
    433     @Override
    434     public boolean isEmpty() throws IOException {
    435       for (CharSource source : sources) {
    436         if (!source.isEmpty()) {
    437           return false;
    438         }
    439       }
    440       return true;
    441     }
    442 
    443     @Override
    444     public String toString() {
    445       return "CharSource.concat(" + sources + ")";
    446     }
    447   }
    448 }
    449