1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.inference; 18 19 import java.util.Collection; 20 21 import org.apache.commons.math.MathException; 22 import org.apache.commons.math.MathRuntimeException; 23 import org.apache.commons.math.distribution.FDistribution; 24 import org.apache.commons.math.distribution.FDistributionImpl; 25 import org.apache.commons.math.exception.util.LocalizedFormats; 26 import org.apache.commons.math.stat.descriptive.summary.Sum; 27 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 28 29 30 /** 31 * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl} 32 * interface. 33 * 34 * <p>Uses the 35 * {@link org.apache.commons.math.distribution.FDistribution 36 * commons-math F Distribution implementation} to estimate exact p-values.</p> 37 * 38 * <p>This implementation is based on a description at 39 * http://faculty.vassar.edu/lowry/ch13pt1.html</p> 40 * <pre> 41 * Abbreviations: bg = between groups, 42 * wg = within groups, 43 * ss = sum squared deviations 44 * </pre> 45 * 46 * @since 1.2 47 * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 aot 2010) $ 48 */ 49 public class OneWayAnovaImpl implements OneWayAnova { 50 51 /** 52 * Default constructor. 53 */ 54 public OneWayAnovaImpl() { 55 } 56 57 /** 58 * {@inheritDoc}<p> 59 * This implementation computes the F statistic using the definitional 60 * formula<pre> 61 * F = msbg/mswg</pre> 62 * where<pre> 63 * msbg = between group mean square 64 * mswg = within group mean square</pre> 65 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> 66 * here</a></p> 67 */ 68 public double anovaFValue(Collection<double[]> categoryData) 69 throws IllegalArgumentException, MathException { 70 AnovaStats a = anovaStats(categoryData); 71 return a.F; 72 } 73 74 /** 75 * {@inheritDoc}<p> 76 * This implementation uses the 77 * {@link org.apache.commons.math.distribution.FDistribution 78 * commons-math F Distribution implementation} to estimate the exact 79 * p-value, using the formula<pre> 80 * p = 1 - cumulativeProbability(F)</pre> 81 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 82 * is the commons-math implementation of the F distribution.</p> 83 */ 84 public double anovaPValue(Collection<double[]> categoryData) 85 throws IllegalArgumentException, MathException { 86 AnovaStats a = anovaStats(categoryData); 87 FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg); 88 return 1.0 - fdist.cumulativeProbability(a.F); 89 } 90 91 /** 92 * {@inheritDoc}<p> 93 * This implementation uses the 94 * {@link org.apache.commons.math.distribution.FDistribution 95 * commons-math F Distribution implementation} to estimate the exact 96 * p-value, using the formula<pre> 97 * p = 1 - cumulativeProbability(F)</pre> 98 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 99 * is the commons-math implementation of the F distribution.</p> 100 * <p>True is returned iff the estimated p-value is less than alpha.</p> 101 */ 102 public boolean anovaTest(Collection<double[]> categoryData, double alpha) 103 throws IllegalArgumentException, MathException { 104 if ((alpha <= 0) || (alpha > 0.5)) { 105 throw MathRuntimeException.createIllegalArgumentException( 106 LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL, 107 alpha, 0, 0.5); 108 } 109 return anovaPValue(categoryData) < alpha; 110 } 111 112 113 /** 114 * This method actually does the calculations (except P-value). 115 * 116 * @param categoryData <code>Collection</code> of <code>double[]</code> 117 * arrays each containing data for one category 118 * @return computed AnovaStats 119 * @throws IllegalArgumentException if categoryData does not meet 120 * preconditions specified in the interface definition 121 * @throws MathException if an error occurs computing the Anova stats 122 */ 123 private AnovaStats anovaStats(Collection<double[]> categoryData) 124 throws IllegalArgumentException, MathException { 125 126 // check if we have enough categories 127 if (categoryData.size() < 2) { 128 throw MathRuntimeException.createIllegalArgumentException( 129 LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, 130 categoryData.size()); 131 } 132 133 // check if each category has enough data and all is double[] 134 for (double[] array : categoryData) { 135 if (array.length <= 1) { 136 throw MathRuntimeException.createIllegalArgumentException( 137 LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, 138 array.length); 139 } 140 } 141 142 int dfwg = 0; 143 double sswg = 0; 144 Sum totsum = new Sum(); 145 SumOfSquares totsumsq = new SumOfSquares(); 146 int totnum = 0; 147 148 for (double[] data : categoryData) { 149 150 Sum sum = new Sum(); 151 SumOfSquares sumsq = new SumOfSquares(); 152 int num = 0; 153 154 for (int i = 0; i < data.length; i++) { 155 double val = data[i]; 156 157 // within category 158 num++; 159 sum.increment(val); 160 sumsq.increment(val); 161 162 // for all categories 163 totnum++; 164 totsum.increment(val); 165 totsumsq.increment(val); 166 } 167 dfwg += num - 1; 168 double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num; 169 sswg += ss; 170 } 171 double sst = totsumsq.getResult() - totsum.getResult() * 172 totsum.getResult()/totnum; 173 double ssbg = sst - sswg; 174 int dfbg = categoryData.size() - 1; 175 double msbg = ssbg/dfbg; 176 double mswg = sswg/dfwg; 177 double F = msbg/mswg; 178 179 return new AnovaStats(dfbg, dfwg, F); 180 } 181 182 /** 183 Convenience class to pass dfbg,dfwg,F values around within AnovaImpl. 184 No get/set methods provided. 185 */ 186 private static class AnovaStats { 187 188 /** Degrees of freedom in numerator (between groups). */ 189 private int dfbg; 190 191 /** Degrees of freedom in denominator (within groups). */ 192 private int dfwg; 193 194 /** Statistic. */ 195 private double F; 196 197 /** 198 * Constructor 199 * @param dfbg degrees of freedom in numerator (between groups) 200 * @param dfwg degrees of freedom in denominator (within groups) 201 * @param F statistic 202 */ 203 private AnovaStats(int dfbg, int dfwg, double F) { 204 this.dfbg = dfbg; 205 this.dfwg = dfwg; 206 this.F = F; 207 } 208 } 209 210 } 211