Home | History | Annotate | Download | only in inference
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 package org.apache.commons.math.stat.inference;
     18 
     19 import org.apache.commons.math.MathException;
     20 
     21 /**
     22  * An interface for Chi-Square tests.
     23  * <p>This interface handles only known distributions. If the distribution is
     24  * unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest
     25  * UnknownDistributionChiSquareTest} extended interface should be used instead.</p>
     26  * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
     27  */
     28 public interface ChiSquareTest {
     29 
     30      /**
     31      * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
     32      * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
     33      * frequency counts.
     34      * <p>
     35      * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
     36      *  the observed counts follow the expected distribution.</p>
     37      * <p>
     38      * <strong>Preconditions</strong>: <ul>
     39      * <li>Expected counts must all be positive.
     40      * </li>
     41      * <li>Observed counts must all be >= 0.
     42      * </li>
     43      * <li>The observed and expected arrays must have the same length and
     44      * their common length must be at least 2.
     45      * </li></ul></p><p>
     46      * If any of the preconditions are not met, an
     47      * <code>IllegalArgumentException</code> is thrown.</p>
     48      *
     49      * @param observed array of observed frequency counts
     50      * @param expected array of expected frequency counts
     51      * @return chiSquare statistic
     52      * @throws IllegalArgumentException if preconditions are not met
     53      */
     54     double chiSquare(double[] expected, long[] observed)
     55         throws IllegalArgumentException;
     56 
     57     /**
     58      * Returns the <i>observed significance level</i>, or <a href=
     59      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
     60      * p-value</a>, associated with a
     61      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
     62      * Chi-square goodness of fit test</a> comparing the <code>observed</code>
     63      * frequency counts to those in the <code>expected</code> array.
     64      * <p>
     65      * The number returned is the smallest significance level at which one can reject
     66      * the null hypothesis that the observed counts conform to the frequency distribution
     67      * described by the expected counts.</p>
     68      * <p>
     69      * <strong>Preconditions</strong>: <ul>
     70      * <li>Expected counts must all be positive.
     71      * </li>
     72      * <li>Observed counts must all be >= 0.
     73      * </li>
     74      * <li>The observed and expected arrays must have the same length and
     75      * their common length must be at least 2.
     76      * </li></ul></p><p>
     77      * If any of the preconditions are not met, an
     78      * <code>IllegalArgumentException</code> is thrown.</p>
     79      *
     80      * @param observed array of observed frequency counts
     81      * @param expected array of expected frequency counts
     82      * @return p-value
     83      * @throws IllegalArgumentException if preconditions are not met
     84      * @throws MathException if an error occurs computing the p-value
     85      */
     86     double chiSquareTest(double[] expected, long[] observed)
     87         throws IllegalArgumentException, MathException;
     88 
     89     /**
     90      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
     91      * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts
     92      * conform to the frequency distribution described by the expected counts, with
     93      * significance level <code>alpha</code>.  Returns true iff the null hypothesis can be rejected
     94      * with 100 * (1 - alpha) percent confidence.
     95      * <p>
     96      * <strong>Example:</strong><br>
     97      * To test the hypothesis that <code>observed</code> follows
     98      * <code>expected</code> at the 99% level, use </p><p>
     99      * <code>chiSquareTest(expected, observed, 0.01) </code></p>
    100      * <p>
    101      * <strong>Preconditions</strong>: <ul>
    102      * <li>Expected counts must all be positive.
    103      * </li>
    104      * <li>Observed counts must all be >= 0.
    105      * </li>
    106      * <li>The observed and expected arrays must have the same length and
    107      * their common length must be at least 2.
    108      * <li> <code> 0 < alpha < 0.5 </code>
    109      * </li></ul></p><p>
    110      * If any of the preconditions are not met, an
    111      * <code>IllegalArgumentException</code> is thrown.</p>
    112      *
    113      * @param observed array of observed frequency counts
    114      * @param expected array of expected frequency counts
    115      * @param alpha significance level of the test
    116      * @return true iff null hypothesis can be rejected with confidence
    117      * 1 - alpha
    118      * @throws IllegalArgumentException if preconditions are not met
    119      * @throws MathException if an error occurs performing the test
    120      */
    121     boolean chiSquareTest(double[] expected, long[] observed, double alpha)
    122         throws IllegalArgumentException, MathException;
    123 
    124     /**
    125      *  Computes the Chi-Square statistic associated with a
    126      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
    127      *  chi-square test of independence</a> based on the input <code>counts</code>
    128      *  array, viewed as a two-way table.
    129      * <p>
    130      * The rows of the 2-way table are
    131      * <code>count[0], ... , count[count.length - 1] </code></p>
    132      * <p>
    133      * <strong>Preconditions</strong>: <ul>
    134      * <li>All counts must be >= 0.
    135      * </li>
    136      * <li>The count array must be rectangular (i.e. all count[i] subarrays
    137      *  must have the same length).
    138      * </li>
    139      * <li>The 2-way table represented by <code>counts</code> must have at
    140      *  least 2 columns and at least 2 rows.
    141      * </li>
    142      * </li></ul></p><p>
    143      * If any of the preconditions are not met, an
    144      * <code>IllegalArgumentException</code> is thrown.</p>
    145      *
    146      * @param counts array representation of 2-way table
    147      * @return chiSquare statistic
    148      * @throws IllegalArgumentException if preconditions are not met
    149      */
    150     double chiSquare(long[][] counts)
    151     throws IllegalArgumentException;
    152 
    153     /**
    154      * Returns the <i>observed significance level</i>, or <a href=
    155      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
    156      * p-value</a>, associated with a
    157      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
    158      * chi-square test of independence</a> based on the input <code>counts</code>
    159      * array, viewed as a two-way table.
    160      * <p>
    161      * The rows of the 2-way table are
    162      * <code>count[0], ... , count[count.length - 1] </code></p>
    163      * <p>
    164      * <strong>Preconditions</strong>: <ul>
    165      * <li>All counts must be >= 0.
    166      * </li>
    167      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
    168      * </li>
    169      * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
    170      *        at least 2 rows.
    171      * </li>
    172      * </li></ul></p><p>
    173      * If any of the preconditions are not met, an
    174      * <code>IllegalArgumentException</code> is thrown.</p>
    175      *
    176      * @param counts array representation of 2-way table
    177      * @return p-value
    178      * @throws IllegalArgumentException if preconditions are not met
    179      * @throws MathException if an error occurs computing the p-value
    180      */
    181     double chiSquareTest(long[][] counts)
    182     throws IllegalArgumentException, MathException;
    183 
    184     /**
    185      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
    186      * chi-square test of independence</a> evaluating the null hypothesis that the classifications
    187      * represented by the counts in the columns of the input 2-way table are independent of the rows,
    188      * with significance level <code>alpha</code>.  Returns true iff the null hypothesis can be rejected
    189      * with 100 * (1 - alpha) percent confidence.
    190      * <p>
    191      * The rows of the 2-way table are
    192      * <code>count[0], ... , count[count.length - 1] </code></p>
    193      * <p>
    194      * <strong>Example:</strong><br>
    195      * To test the null hypothesis that the counts in
    196      * <code>count[0], ... , count[count.length - 1] </code>
    197      *  all correspond to the same underlying probability distribution at the 99% level, use </p><p>
    198      * <code>chiSquareTest(counts, 0.01) </code></p>
    199      * <p>
    200      * <strong>Preconditions</strong>: <ul>
    201      * <li>All counts must be >= 0.
    202      * </li>
    203      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
    204      * </li>
    205      * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
    206      *        at least 2 rows.
    207      * </li>
    208      * </li></ul></p><p>
    209      * If any of the preconditions are not met, an
    210      * <code>IllegalArgumentException</code> is thrown.</p>
    211      *
    212      * @param counts array representation of 2-way table
    213      * @param alpha significance level of the test
    214      * @return true iff null hypothesis can be rejected with confidence
    215      * 1 - alpha
    216      * @throws IllegalArgumentException if preconditions are not met
    217      * @throws MathException if an error occurs performing the test
    218      */
    219     boolean chiSquareTest(long[][] counts, double alpha)
    220     throws IllegalArgumentException, MathException;
    221 
    222 }
    223