1 /* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package examples; 18 19 20 import com.google.caliper.BeforeExperiment; 21 import com.google.caliper.Benchmark; 22 23 import java.util.BitSet; 24 import java.util.Random; 25 26 /** 27 * A simple example of a benchmark for BitSet showing some of the issues with 28 * micro-benchmarking. 29 * 30 * <p>The following is a discussion of how the benchmarks evolved and what they 31 * may (or may not) tell us. This discussion is based on the following set of 32 * results: 33 * 34 * <p><pre> 35 * 0% Scenario{vm=java, benchmark=SetBitSetX64} 233.45ns; =0.31ns @ 3 trials 36 * 20% Scenario{vm=java, benchmark=SetMaskX64} 116.62ns; =0.09ns @ 3 trials 37 * 40% Scenario{vm=java, benchmark=CharsToBitSet} 748.40ns; =23.52ns @ 10 trials 38 * 60% Scenario{vm=java, benchmark=CharsToMask} 198.55ns; =9.46ns @ 10 trials 39 * 80% Scenario{vm=java, benchmark=BaselineIteration} 67.85ns; =0.44ns @ 3 trials 40 * 41 * benchmark ns logarithmic runtime 42 * SetBitSetX64 233 XXXXXXXXX||||||||||||||| 43 * SetMaskX64 117 XXXX||||||||||||||||| 44 * CharsToBitSet 748 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 45 * CharsToMask 199 XXXXXXX|||||||||||||||| 46 * BaselineIteration 68 XX||||||||||||||||| 47 * </pre> 48 * 49 * <p>Initially things look simple. The {@link #setBitSetX64(int)} benchmark 50 * takes approximately twice as long as {@link #setMaskX64(int)}. However 51 * the inner loops in these benchmarks have almost no content, so a more 52 * 'real world' benchmark was devised in an attempt to back up these results. 53 * 54 * <p>The {@link #charsToMask(int)} and {@link #charsToBitSet(int)} 55 * benchmarks convert a simple char[] of '1's and '0's to a corresponding BitSet 56 * or bit mask. These also processes 64 bits per iteration and so appears to be 57 * doing the same amount of work as the first benchmarks. 58 * 59 * <p>Additionally the {@link BitSetBenchmark#baselineIteration(int)} 60 * benchmark attempts to measure the raw cost of looping through and reading the 61 * source data. 62 * 63 * <p>When comparing the benchmarks that use bit masking, we see that the 64 * measured time of the SetMaskX64 benchmark (117ns) is roughly the same 65 * as the CharsToMask benchmark (199ns) with the BaselineIteration time (68ms) 66 * subtracted from it. This gives us some confidence that both benchmarks are 67 * resulting in the same underlying work on the CPU. 68 * 69 * <p>However the CharsToBitSet and the SetBitSetX64 benchmarks differ very 70 * significantly (approximately 3x) even when accounting for the 71 * BaselineIteration result. This suggests that the performance of 72 * {@link BitSet#set} is quite dependent on the surrounding code and how 73 * it is optimized by the JVM. 74 * 75 * <p>The conclusions we can draw from this are: 76 * 77 * <p><b>1:</b> Using BitSet is slower than using bit masks directly. At best it 78 * seems about 2x slower than a bit mask, but could easily be 5x slower in real 79 * applications. 80 * 81 * <p>While these are only estimates, we can conclude that when performance is 82 * important and where bit set operations occur in tight loops, bit masks 83 * should be used in favor of BitSets. 84 * 85 * <p><b>2:</b>Overly simplistic benchmarks can give a very false impression of 86 * performance. 87 */ 88 public class BitSetBenchmark { 89 private BitSet bitSet; 90 private char[] bitString; 91 92 @BeforeExperiment void setUp() throws Exception { 93 bitSet = new BitSet(64); 94 bitString = new char[64]; 95 Random r = new Random(); 96 for (int n = 0; n < 64; n++) { 97 bitString[n] = r.nextBoolean() ? '1' : '0'; 98 } 99 } 100 101 /** 102 * This benchmark attempts to measure performance of {@link BitSet#set}. 103 */ 104 @Benchmark int setBitSetX64(int reps) { 105 long count = 64L * reps; 106 for (int i = 0; i < count; i++) { 107 bitSet.set(i & 0x3F, true); 108 } 109 return bitSet.hashCode(); 110 } 111 112 /** 113 * This benchmark attempts to measure performance of direct bit-manipulation. 114 */ 115 @Benchmark long setMaskX64(int reps) { 116 long count = 64L * reps; 117 long bitMask = 0L; 118 for (int i = 0; i < count; i++) { 119 bitMask |= 1 << (i & 0x3F); 120 } 121 return bitMask; 122 } 123 124 /** 125 * This benchmark parses a char[] of 1's and 0's into a BitSet. Results from 126 * this benchmark should be comparable with those from 127 * {@link #charsToMask(int)}. 128 */ 129 @Benchmark String charsToBitSet(int reps) { 130 /* 131 * This benchmark now measures the complete parsing of a char[] rather than 132 * a single invocation of {@link BitSet#set}. However this fine because 133 * it is intended to be a comparative benchmark. 134 */ 135 for (int i = 0; i < reps; i++) { 136 for (int n = 0; n < bitString.length; n++) { 137 bitSet.set(n, bitString[n] == '1'); 138 } 139 } 140 return bitSet.toString(); 141 } 142 143 /** 144 * This benchmark parses a char[] of 1's and 0's into a bit mask. Results from 145 * this benchmark should be comparable with those from 146 * {@link #charsToBitSet(int)}. 147 */ 148 @Benchmark long charsToMask(int reps) { 149 /* 150 * Comparing results we see a far more realistic sounding result whereby 151 * using a bit mask is a little over 4x faster than using BitSet. 152 */ 153 long bitMask = 0; 154 for (int i = 0; i < reps; i++) { 155 for (int n = 0; n < bitString.length; n++) { 156 long m = 1 << n; 157 if (bitString[n] == '1') { 158 bitMask |= m; 159 } else { 160 bitMask &= ~m; 161 } 162 } 163 } 164 return bitMask; 165 } 166 167 /** 168 * This benchmark attempts to measure the baseline cost of both 169 * {@link #charsToBitSet(int)} and {@link #charsToMask(int)}. 170 * It does this by unconditionally summing the character values of the char[]. 171 * This is as close to a no-op case as we can expect to get without unwanted 172 * over-optimization. 173 */ 174 @Benchmark long baselineIteration(int reps) { 175 int badHash = 0; 176 for (int i = 0; i < reps; i++) { 177 for (int n = 0; n < bitString.length; n++) { 178 badHash += bitString[n]; 179 } 180 } 181 return badHash; 182 } 183 } 184