1 /* 2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * - Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * - Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * - Neither the name of Oracle nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * This source code is provided to illustrate the usage of a given feature 34 * or technique and has been deliberately simplified. Additional steps 35 * required for a production-quality application, such as security checks, 36 * input validation, and proper error handling, might not be present in 37 * this sample code. 38 */ 39 40 import java.io.BufferedReader; 41 import java.io.FileNotFoundException; 42 import java.io.FileReader; 43 import java.io.IOException; 44 import java.util.function.Consumer; 45 import java.util.regex.Pattern; 46 47 /** 48 * WC - Prints newline, word, and character counts for each file. See 49 * the {@link #usage} method for instructions and command line parameters. This 50 * sample shows usages of: 51 * <ul> 52 * <li>Lambda and bulk operations. Shows how to create a custom collector to 53 * gather custom statistics. Implements the collection of statistics using a 54 * built-in API.</li> 55 * <li>Constructor reference.</li> 56 * <li>Try-with-resources feature.</li> 57 * </ul> 58 * 59 */ 60 public class WC { 61 62 //The number of characters that may be read. 63 private static final int READ_AHEAD_LIMIT = 100_000_000; 64 65 //The pattern for splitting strings by non word characters to get words. 66 private static final Pattern nonWordPattern = Pattern.compile("\\W"); 67 68 /** 69 * The main method for the WC program. Run the program with an empty 70 * argument list to see possible arguments. 71 * 72 * @param args the argument list for WC 73 * @throws java.io.IOException If an input exception occurred. 74 */ 75 public static void main(String[] args) throws IOException { 76 77 if (args.length != 1) { 78 usage(); 79 return; 80 } 81 82 try (BufferedReader reader = new BufferedReader( 83 new FileReader(args[0]))) { 84 reader.mark(READ_AHEAD_LIMIT); 85 /* 86 * Statistics can be gathered in four passes using a built-in API. 87 * The method demonstrates how separate operations can be 88 * implemented using a built-in API. 89 */ 90 collectInFourPasses(reader); 91 /* 92 * Usage of several passes to collect data is not the best way. 93 * Statistics can be gathered by a custom collector in one pass. 94 */ 95 reader.reset(); 96 collectInOnePass(reader); 97 } catch (FileNotFoundException e) { 98 usage(); 99 System.err.println(e); 100 } 101 } 102 103 private static void collectInFourPasses(BufferedReader reader) 104 throws IOException { 105 /* 106 * Input is read as a stream of lines by lines(). 107 * Every line is turned into a stream of chars by the flatMapToInt(...) 108 * method. 109 * Length of the stream is counted by count(). 110 */ 111 System.out.println("Character count = " 112 + reader.lines().flatMapToInt(String::chars).count()); 113 /* 114 * Input is read as a stream of lines by lines(). 115 * Every line is split by nonWordPattern into words by flatMap(...) 116 * method. 117 * Empty lines are removed by the filter(...) method. 118 * Length of the stream is counted by count(). 119 */ 120 reader.reset(); 121 System.out.println("Word count = " 122 + reader.lines() 123 .flatMap(nonWordPattern::splitAsStream) 124 .filter(str -> !str.isEmpty()).count()); 125 126 reader.reset(); 127 System.out.println("Newline count = " + reader.lines().count()); 128 /* 129 * Input is read as a stream of lines by lines(). 130 * Every line is mapped to its length. 131 * Maximum of the lengths is calculated. 132 */ 133 reader.reset(); 134 System.out.println("Max line length = " 135 + reader.lines().mapToInt(String::length).max().getAsInt()); 136 } 137 138 private static void collectInOnePass(BufferedReader reader) { 139 /* 140 * The collect() method has three parameters: 141 * The first parameter is the {@code WCStatistic} constructor reference. 142 * collect() will create {@code WCStatistics} instances, where 143 * statistics will be aggregated. 144 * The second parameter shows how {@code WCStatistics} will process 145 * String. 146 * The third parameter shows how to merge two {@code WCStatistic} 147 * instances. 148 * 149 * Also {@code Collector} can be used, which would be more reusable 150 * solution. See {@code CSVProcessor} example for how {@code Collector} 151 * can be implemented. 152 * 153 * Note that the any performance increase when going parallel will 154 * depend on the size of the input (lines) and the cost per-element. 155 */ 156 WCStatistics wc = reader.lines().parallel() 157 .collect(WCStatistics::new, 158 WCStatistics::accept, 159 WCStatistics::combine); 160 System.out.println(wc); 161 } 162 163 private static void usage() { 164 System.out.println("Usage: " + WC.class.getSimpleName() + " FILE"); 165 System.out.println("Print newline, word," 166 + " character counts and max line length for FILE."); 167 } 168 169 private static class WCStatistics implements Consumer<String> { 170 /* 171 * @implNote This implementation does not need to be thread safe because 172 * the parallel implementation of 173 * {@link java.util.stream.Stream#collect Stream.collect()} 174 * provides the necessary partitioning and isolation for safe parallel 175 * execution. 176 */ 177 178 private long characterCount; 179 private long lineCount; 180 private long wordCount; 181 private long maxLineLength; 182 183 184 /* 185 * Processes line. 186 */ 187 @Override 188 public void accept(String line) { 189 characterCount += line.length(); 190 lineCount++; 191 wordCount += nonWordPattern.splitAsStream(line) 192 .filter(str -> !str.isEmpty()).count(); 193 maxLineLength = Math.max(maxLineLength, line.length()); 194 } 195 196 /* 197 * Merges two WCStatistics. 198 */ 199 public void combine(WCStatistics stat) { 200 wordCount += stat.wordCount; 201 lineCount += stat.lineCount; 202 characterCount += stat.characterCount; 203 maxLineLength = Math.max(maxLineLength, stat.maxLineLength); 204 } 205 206 @Override 207 public String toString() { 208 StringBuilder sb = new StringBuilder(); 209 sb.append("#------WCStatistic------#\n"); 210 sb.append("Character count = ").append(characterCount).append('\n'); 211 sb.append("Word count = ").append(wordCount).append('\n'); 212 sb.append("Newline count = ").append(lineCount).append('\n'); 213 sb.append("Max line length = ").append(maxLineLength).append('\n'); 214 return sb.toString(); 215 } 216 } 217 } 218