Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  *   - Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  *
     11  *   - Redistributions in binary form must reproduce the above copyright
     12  *     notice, this list of conditions and the following disclaimer in the
     13  *     documentation and/or other materials provided with the distribution.
     14  *
     15  *   - Neither the name of Oracle nor the names of its
     16  *     contributors may be used to endorse or promote products derived
     17  *     from this software without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
     20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
     21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
     23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * This source code is provided to illustrate the usage of a given feature
     34  * or technique and has been deliberately simplified. Additional steps
     35  * required for a production-quality application, such as security checks,
     36  * input validation, and proper error handling, might not be present in
     37  * this sample code.
     38  */
     39 
     40 import java.io.BufferedReader;
     41 import java.io.FileNotFoundException;
     42 import java.io.FileReader;
     43 import java.io.IOException;
     44 import java.util.function.Consumer;
     45 import java.util.regex.Pattern;
     46 
     47 /**
     48  * WC - Prints newline, word, and character counts for each file. See
     49  * the {@link #usage} method for instructions and command line parameters. This
     50  * sample shows usages of:
     51  * <ul>
     52  * <li>Lambda and bulk operations. Shows how to create a custom collector to
     53  * gather custom statistics. Implements the collection of statistics using a
     54  * built-in API.</li>
     55  * <li>Constructor reference.</li>
     56  * <li>Try-with-resources feature.</li>
     57  * </ul>
     58  *
     59  */
     60 public class WC {
     61 
     62     //The number of characters that may be read.
     63     private static final int READ_AHEAD_LIMIT = 100_000_000;
     64 
     65     //The pattern for splitting strings by non word characters to get words.
     66     private static final Pattern nonWordPattern = Pattern.compile("\\W");
     67 
     68     /**
     69      * The main method for the WC program. Run the program with an empty
     70      * argument list to see possible arguments.
     71      *
     72      * @param args the argument list for WC
     73      * @throws java.io.IOException If an input exception occurred.
     74      */
     75     public static void main(String[] args) throws IOException {
     76 
     77         if (args.length != 1) {
     78             usage();
     79             return;
     80         }
     81 
     82         try (BufferedReader reader = new BufferedReader(
     83                 new FileReader(args[0]))) {
     84             reader.mark(READ_AHEAD_LIMIT);
     85             /*
     86              * Statistics can be gathered in four passes using a built-in API.
     87              * The method demonstrates how separate operations can be
     88              * implemented using a built-in API.
     89              */
     90             collectInFourPasses(reader);
     91             /*
     92              * Usage of several passes to collect data is not the best way.
     93              * Statistics can be gathered by a custom collector in one pass.
     94              */
     95             reader.reset();
     96             collectInOnePass(reader);
     97         } catch (FileNotFoundException e) {
     98             usage();
     99             System.err.println(e);
    100         }
    101     }
    102 
    103     private static void collectInFourPasses(BufferedReader reader)
    104             throws IOException {
    105         /*
    106          * Input is read as a stream of lines by lines().
    107          * Every line is turned into a stream of chars by the flatMapToInt(...)
    108          * method.
    109          * Length of the stream is counted by count().
    110          */
    111         System.out.println("Character count = "
    112                 + reader.lines().flatMapToInt(String::chars).count());
    113         /*
    114          * Input is read as a stream of lines by lines().
    115          * Every line is split by nonWordPattern into words by flatMap(...)
    116          * method.
    117          * Empty lines are removed by the filter(...) method.
    118          * Length of the stream is counted by count().
    119          */
    120         reader.reset();
    121         System.out.println("Word count = "
    122                 + reader.lines()
    123                 .flatMap(nonWordPattern::splitAsStream)
    124                 .filter(str -> !str.isEmpty()).count());
    125 
    126         reader.reset();
    127         System.out.println("Newline count = " + reader.lines().count());
    128         /*
    129          * Input is read as a stream of lines by lines().
    130          * Every line is mapped to its length.
    131          * Maximum of the lengths is calculated.
    132          */
    133         reader.reset();
    134         System.out.println("Max line length = "
    135                 + reader.lines().mapToInt(String::length).max().getAsInt());
    136     }
    137 
    138     private static void collectInOnePass(BufferedReader reader) {
    139         /*
    140          * The collect() method has three parameters:
    141          * The first parameter is the {@code WCStatistic} constructor reference.
    142          * collect() will create {@code WCStatistics} instances, where
    143          * statistics will be aggregated.
    144          * The second parameter shows how {@code WCStatistics} will process
    145          * String.
    146          * The third parameter shows how to merge two {@code WCStatistic}
    147          * instances.
    148          *
    149          * Also {@code Collector} can be used, which would be more reusable
    150          * solution. See {@code CSVProcessor} example for how {@code Collector}
    151          * can be implemented.
    152          *
    153          * Note that the any performance increase when going parallel will
    154          * depend on the size of the input (lines) and the cost per-element.
    155          */
    156         WCStatistics wc = reader.lines().parallel()
    157                 .collect(WCStatistics::new,
    158                         WCStatistics::accept,
    159                         WCStatistics::combine);
    160         System.out.println(wc);
    161     }
    162 
    163     private static void usage() {
    164         System.out.println("Usage: " + WC.class.getSimpleName() + " FILE");
    165         System.out.println("Print newline, word,"
    166                 + "  character counts and max line length for FILE.");
    167     }
    168 
    169     private static class WCStatistics implements Consumer<String> {
    170         /*
    171          * @implNote This implementation does not need to be thread safe because
    172          * the parallel implementation of
    173          * {@link java.util.stream.Stream#collect Stream.collect()}
    174          * provides the necessary partitioning and isolation for safe parallel
    175          * execution.
    176          */
    177 
    178         private long characterCount;
    179         private long lineCount;
    180         private long wordCount;
    181         private long maxLineLength;
    182 
    183 
    184         /*
    185          * Processes line.
    186          */
    187         @Override
    188         public void accept(String line) {
    189             characterCount += line.length();
    190             lineCount++;
    191             wordCount += nonWordPattern.splitAsStream(line)
    192                     .filter(str -> !str.isEmpty()).count();
    193             maxLineLength = Math.max(maxLineLength, line.length());
    194         }
    195 
    196         /*
    197          * Merges two WCStatistics.
    198          */
    199         public void combine(WCStatistics stat) {
    200             wordCount += stat.wordCount;
    201             lineCount += stat.lineCount;
    202             characterCount += stat.characterCount;
    203             maxLineLength = Math.max(maxLineLength, stat.maxLineLength);
    204         }
    205 
    206         @Override
    207         public String toString() {
    208             StringBuilder sb = new StringBuilder();
    209             sb.append("#------WCStatistic------#\n");
    210             sb.append("Character count = ").append(characterCount).append('\n');
    211             sb.append("Word count = ").append(wordCount).append('\n');
    212             sb.append("Newline count = ").append(lineCount).append('\n');
    213             sb.append("Max line length = ").append(maxLineLength).append('\n');
    214             return sb.toString();
    215         }
    216     }
    217 }
    218