1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.inference; 18 19 import org.apache.commons.math.MathException; 20 21 /** 22 * An interface for Chi-Square tests. 23 * <p>This interface handles only known distributions. If the distribution is 24 * unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest 25 * UnknownDistributionChiSquareTest} extended interface should be used instead.</p> 26 * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $ 27 */ 28 public interface ChiSquareTest { 29 30 /** 31 * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 32 * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code> 33 * frequency counts. 34 * <p> 35 * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that 36 * the observed counts follow the expected distribution.</p> 37 * <p> 38 * <strong>Preconditions</strong>: <ul> 39 * <li>Expected counts must all be positive. 40 * </li> 41 * <li>Observed counts must all be >= 0. 42 * </li> 43 * <li>The observed and expected arrays must have the same length and 44 * their common length must be at least 2. 45 * </li></ul></p><p> 46 * If any of the preconditions are not met, an 47 * <code>IllegalArgumentException</code> is thrown.</p> 48 * 49 * @param observed array of observed frequency counts 50 * @param expected array of expected frequency counts 51 * @return chiSquare statistic 52 * @throws IllegalArgumentException if preconditions are not met 53 */ 54 double chiSquare(double[] expected, long[] observed) 55 throws IllegalArgumentException; 56 57 /** 58 * Returns the <i>observed significance level</i>, or <a href= 59 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 60 * p-value</a>, associated with a 61 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 62 * Chi-square goodness of fit test</a> comparing the <code>observed</code> 63 * frequency counts to those in the <code>expected</code> array. 64 * <p> 65 * The number returned is the smallest significance level at which one can reject 66 * the null hypothesis that the observed counts conform to the frequency distribution 67 * described by the expected counts.</p> 68 * <p> 69 * <strong>Preconditions</strong>: <ul> 70 * <li>Expected counts must all be positive. 71 * </li> 72 * <li>Observed counts must all be >= 0. 73 * </li> 74 * <li>The observed and expected arrays must have the same length and 75 * their common length must be at least 2. 76 * </li></ul></p><p> 77 * If any of the preconditions are not met, an 78 * <code>IllegalArgumentException</code> is thrown.</p> 79 * 80 * @param observed array of observed frequency counts 81 * @param expected array of expected frequency counts 82 * @return p-value 83 * @throws IllegalArgumentException if preconditions are not met 84 * @throws MathException if an error occurs computing the p-value 85 */ 86 double chiSquareTest(double[] expected, long[] observed) 87 throws IllegalArgumentException, MathException; 88 89 /** 90 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 91 * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts 92 * conform to the frequency distribution described by the expected counts, with 93 * significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected 94 * with 100 * (1 - alpha) percent confidence. 95 * <p> 96 * <strong>Example:</strong><br> 97 * To test the hypothesis that <code>observed</code> follows 98 * <code>expected</code> at the 99% level, use </p><p> 99 * <code>chiSquareTest(expected, observed, 0.01) </code></p> 100 * <p> 101 * <strong>Preconditions</strong>: <ul> 102 * <li>Expected counts must all be positive. 103 * </li> 104 * <li>Observed counts must all be >= 0. 105 * </li> 106 * <li>The observed and expected arrays must have the same length and 107 * their common length must be at least 2. 108 * <li> <code> 0 < alpha < 0.5 </code> 109 * </li></ul></p><p> 110 * If any of the preconditions are not met, an 111 * <code>IllegalArgumentException</code> is thrown.</p> 112 * 113 * @param observed array of observed frequency counts 114 * @param expected array of expected frequency counts 115 * @param alpha significance level of the test 116 * @return true iff null hypothesis can be rejected with confidence 117 * 1 - alpha 118 * @throws IllegalArgumentException if preconditions are not met 119 * @throws MathException if an error occurs performing the test 120 */ 121 boolean chiSquareTest(double[] expected, long[] observed, double alpha) 122 throws IllegalArgumentException, MathException; 123 124 /** 125 * Computes the Chi-Square statistic associated with a 126 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 127 * chi-square test of independence</a> based on the input <code>counts</code> 128 * array, viewed as a two-way table. 129 * <p> 130 * The rows of the 2-way table are 131 * <code>count[0], ... , count[count.length - 1] </code></p> 132 * <p> 133 * <strong>Preconditions</strong>: <ul> 134 * <li>All counts must be >= 0. 135 * </li> 136 * <li>The count array must be rectangular (i.e. all count[i] subarrays 137 * must have the same length). 138 * </li> 139 * <li>The 2-way table represented by <code>counts</code> must have at 140 * least 2 columns and at least 2 rows. 141 * </li> 142 * </li></ul></p><p> 143 * If any of the preconditions are not met, an 144 * <code>IllegalArgumentException</code> is thrown.</p> 145 * 146 * @param counts array representation of 2-way table 147 * @return chiSquare statistic 148 * @throws IllegalArgumentException if preconditions are not met 149 */ 150 double chiSquare(long[][] counts) 151 throws IllegalArgumentException; 152 153 /** 154 * Returns the <i>observed significance level</i>, or <a href= 155 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 156 * p-value</a>, associated with a 157 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 158 * chi-square test of independence</a> based on the input <code>counts</code> 159 * array, viewed as a two-way table. 160 * <p> 161 * The rows of the 2-way table are 162 * <code>count[0], ... , count[count.length - 1] </code></p> 163 * <p> 164 * <strong>Preconditions</strong>: <ul> 165 * <li>All counts must be >= 0. 166 * </li> 167 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length). 168 * </li> 169 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and 170 * at least 2 rows. 171 * </li> 172 * </li></ul></p><p> 173 * If any of the preconditions are not met, an 174 * <code>IllegalArgumentException</code> is thrown.</p> 175 * 176 * @param counts array representation of 2-way table 177 * @return p-value 178 * @throws IllegalArgumentException if preconditions are not met 179 * @throws MathException if an error occurs computing the p-value 180 */ 181 double chiSquareTest(long[][] counts) 182 throws IllegalArgumentException, MathException; 183 184 /** 185 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 186 * chi-square test of independence</a> evaluating the null hypothesis that the classifications 187 * represented by the counts in the columns of the input 2-way table are independent of the rows, 188 * with significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected 189 * with 100 * (1 - alpha) percent confidence. 190 * <p> 191 * The rows of the 2-way table are 192 * <code>count[0], ... , count[count.length - 1] </code></p> 193 * <p> 194 * <strong>Example:</strong><br> 195 * To test the null hypothesis that the counts in 196 * <code>count[0], ... , count[count.length - 1] </code> 197 * all correspond to the same underlying probability distribution at the 99% level, use </p><p> 198 * <code>chiSquareTest(counts, 0.01) </code></p> 199 * <p> 200 * <strong>Preconditions</strong>: <ul> 201 * <li>All counts must be >= 0. 202 * </li> 203 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length). 204 * </li> 205 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and 206 * at least 2 rows. 207 * </li> 208 * </li></ul></p><p> 209 * If any of the preconditions are not met, an 210 * <code>IllegalArgumentException</code> is thrown.</p> 211 * 212 * @param counts array representation of 2-way table 213 * @param alpha significance level of the test 214 * @return true iff null hypothesis can be rejected with confidence 215 * 1 - alpha 216 * @throws IllegalArgumentException if preconditions are not met 217 * @throws MathException if an error occurs performing the test 218 */ 219 boolean chiSquareTest(long[][] counts, double alpha) 220 throws IllegalArgumentException, MathException; 221 222 } 223