Home | History | Annotate | Download | only in inference
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 package org.apache.commons.math.stat.inference;
     18 
     19 import org.apache.commons.math.MathException;
     20 import org.apache.commons.math.MathRuntimeException;
     21 import org.apache.commons.math.distribution.TDistribution;
     22 import org.apache.commons.math.distribution.TDistributionImpl;
     23 import org.apache.commons.math.exception.util.LocalizedFormats;
     24 import org.apache.commons.math.stat.StatUtils;
     25 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
     26 import org.apache.commons.math.util.FastMath;
     27 
     28 /**
     29  * Implements t-test statistics defined in the {@link TTest} interface.
     30  * <p>
     31  * Uses commons-math {@link org.apache.commons.math.distribution.TDistributionImpl}
     32  * implementation to estimate exact p-values.</p>
     33  *
     34  * @version $Revision: 1042336 $ $Date: 2010-12-05 13:40:48 +0100 (dim. 05 dc. 2010) $
     35  */
     36 public class TTestImpl implements TTest  {
     37 
     38     /** Distribution used to compute inference statistics.
     39      * @deprecated in 2.2 (to be removed in 3.0).
     40      */
     41     @Deprecated
     42     private TDistribution distribution;
     43 
     44     /**
     45      * Default constructor.
     46      */
     47     public TTestImpl() {
     48         this(new TDistributionImpl(1.0));
     49     }
     50 
     51     /**
     52      * Create a test instance using the given distribution for computing
     53      * inference statistics.
     54      * @param t distribution used to compute inference statistics.
     55      * @since 1.2
     56      * @deprecated in 2.2 (to be removed in 3.0).
     57      */
     58     @Deprecated
     59     public TTestImpl(TDistribution t) {
     60         super();
     61         setDistribution(t);
     62     }
     63 
     64     /**
     65      * Computes a paired, 2-sample t-statistic based on the data in the input
     66      * arrays.  The t-statistic returned is equivalent to what would be returned by
     67      * computing the one-sample t-statistic {@link #t(double, double[])}, with
     68      * <code>mu = 0</code> and the sample array consisting of the (signed)
     69      * differences between corresponding entries in <code>sample1</code> and
     70      * <code>sample2.</code>
     71      * <p>
     72      * <strong>Preconditions</strong>: <ul>
     73      * <li>The input arrays must have the same length and their common length
     74      * must be at least 2.
     75      * </li></ul></p>
     76      *
     77      * @param sample1 array of sample data values
     78      * @param sample2 array of sample data values
     79      * @return t statistic
     80      * @throws IllegalArgumentException if the precondition is not met
     81      * @throws MathException if the statistic can not be computed do to a
     82      *         convergence or other numerical error.
     83      */
     84     public double pairedT(double[] sample1, double[] sample2)
     85         throws IllegalArgumentException, MathException {
     86         checkSampleData(sample1);
     87         checkSampleData(sample2);
     88         double meanDifference = StatUtils.meanDifference(sample1, sample2);
     89         return t(meanDifference, 0,
     90                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
     91                 sample1.length);
     92     }
     93 
     94      /**
     95      * Returns the <i>observed significance level</i>, or
     96      * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
     97      * based on the data in the input arrays.
     98      * <p>
     99      * The number returned is the smallest significance level
    100      * at which one can reject the null hypothesis that the mean of the paired
    101      * differences is 0 in favor of the two-sided alternative that the mean paired
    102      * difference is not equal to 0. For a one-sided test, divide the returned
    103      * value by 2.</p>
    104      * <p>
    105      * This test is equivalent to a one-sample t-test computed using
    106      * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
    107      * array consisting of the signed differences between corresponding elements of
    108      * <code>sample1</code> and <code>sample2.</code></p>
    109      * <p>
    110      * <strong>Usage Note:</strong><br>
    111      * The validity of the p-value depends on the assumptions of the parametric
    112      * t-test procedure, as discussed
    113      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    114      * here</a></p>
    115      * <p>
    116      * <strong>Preconditions</strong>: <ul>
    117      * <li>The input array lengths must be the same and their common length must
    118      * be at least 2.
    119      * </li></ul></p>
    120      *
    121      * @param sample1 array of sample data values
    122      * @param sample2 array of sample data values
    123      * @return p-value for t-test
    124      * @throws IllegalArgumentException if the precondition is not met
    125      * @throws MathException if an error occurs computing the p-value
    126      */
    127     public double pairedTTest(double[] sample1, double[] sample2)
    128         throws IllegalArgumentException, MathException {
    129         double meanDifference = StatUtils.meanDifference(sample1, sample2);
    130         return tTest(meanDifference, 0,
    131                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
    132                 sample1.length);
    133     }
    134 
    135      /**
    136      * Performs a paired t-test evaluating the null hypothesis that the
    137      * mean of the paired differences between <code>sample1</code> and
    138      * <code>sample2</code> is 0 in favor of the two-sided alternative that the
    139      * mean paired difference is not equal to 0, with significance level
    140      * <code>alpha</code>.
    141      * <p>
    142      * Returns <code>true</code> iff the null hypothesis can be rejected with
    143      * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
    144      * <code>alpha * 2</code></p>
    145      * <p>
    146      * <strong>Usage Note:</strong><br>
    147      * The validity of the test depends on the assumptions of the parametric
    148      * t-test procedure, as discussed
    149      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    150      * here</a></p>
    151      * <p>
    152      * <strong>Preconditions</strong>: <ul>
    153      * <li>The input array lengths must be the same and their common length
    154      * must be at least 2.
    155      * </li>
    156      * <li> <code> 0 < alpha < 0.5 </code>
    157      * </li></ul></p>
    158      *
    159      * @param sample1 array of sample data values
    160      * @param sample2 array of sample data values
    161      * @param alpha significance level of the test
    162      * @return true if the null hypothesis can be rejected with
    163      * confidence 1 - alpha
    164      * @throws IllegalArgumentException if the preconditions are not met
    165      * @throws MathException if an error occurs performing the test
    166      */
    167     public boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
    168         throws IllegalArgumentException, MathException {
    169         checkSignificanceLevel(alpha);
    170         return pairedTTest(sample1, sample2) < alpha;
    171     }
    172 
    173     /**
    174      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
    175      * t statistic </a> given observed values and a comparison constant.
    176      * <p>
    177      * This statistic can be used to perform a one sample t-test for the mean.
    178      * </p><p>
    179      * <strong>Preconditions</strong>: <ul>
    180      * <li>The observed array length must be at least 2.
    181      * </li></ul></p>
    182      *
    183      * @param mu comparison constant
    184      * @param observed array of values
    185      * @return t statistic
    186      * @throws IllegalArgumentException if input array length is less than 2
    187      */
    188     public double t(double mu, double[] observed)
    189     throws IllegalArgumentException {
    190         checkSampleData(observed);
    191         return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
    192                 observed.length);
    193     }
    194 
    195     /**
    196      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
    197      * t statistic </a> to use in comparing the mean of the dataset described by
    198      * <code>sampleStats</code> to <code>mu</code>.
    199      * <p>
    200      * This statistic can be used to perform a one sample t-test for the mean.
    201      * </p><p>
    202      * <strong>Preconditions</strong>: <ul>
    203      * <li><code>observed.getN() > = 2</code>.
    204      * </li></ul></p>
    205      *
    206      * @param mu comparison constant
    207      * @param sampleStats DescriptiveStatistics holding sample summary statitstics
    208      * @return t statistic
    209      * @throws IllegalArgumentException if the precondition is not met
    210      */
    211     public double t(double mu, StatisticalSummary sampleStats)
    212     throws IllegalArgumentException {
    213         checkSampleData(sampleStats);
    214         return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
    215                 sampleStats.getN());
    216     }
    217 
    218     /**
    219      * Computes a 2-sample t statistic,  under the hypothesis of equal
    220      * subpopulation variances.  To compute a t-statistic without the
    221      * equal variances hypothesis, use {@link #t(double[], double[])}.
    222      * <p>
    223      * This statistic can be used to perform a (homoscedastic) two-sample
    224      * t-test to compare sample means.</p>
    225      * <p>
    226      * The t-statisitc is</p>
    227      * <p>
    228      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
    229      * </p><p>
    230      * where <strong><code>n1</code></strong> is the size of first sample;
    231      * <strong><code> n2</code></strong> is the size of second sample;
    232      * <strong><code> m1</code></strong> is the mean of first sample;
    233      * <strong><code> m2</code></strong> is the mean of second sample</li>
    234      * </ul>
    235      * and <strong><code>var</code></strong> is the pooled variance estimate:
    236      * </p><p>
    237      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
    238      * </p><p>
    239      * with <strong><code>var1<code></strong> the variance of the first sample and
    240      * <strong><code>var2</code></strong> the variance of the second sample.
    241      * </p><p>
    242      * <strong>Preconditions</strong>: <ul>
    243      * <li>The observed array lengths must both be at least 2.
    244      * </li></ul></p>
    245      *
    246      * @param sample1 array of sample data values
    247      * @param sample2 array of sample data values
    248      * @return t statistic
    249      * @throws IllegalArgumentException if the precondition is not met
    250      */
    251     public double homoscedasticT(double[] sample1, double[] sample2)
    252     throws IllegalArgumentException {
    253         checkSampleData(sample1);
    254         checkSampleData(sample2);
    255         return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
    256                 StatUtils.variance(sample1), StatUtils.variance(sample2),
    257                 sample1.length, sample2.length);
    258     }
    259 
    260     /**
    261      * Computes a 2-sample t statistic, without the hypothesis of equal
    262      * subpopulation variances.  To compute a t-statistic assuming equal
    263      * variances, use {@link #homoscedasticT(double[], double[])}.
    264      * <p>
    265      * This statistic can be used to perform a two-sample t-test to compare
    266      * sample means.</p>
    267      * <p>
    268      * The t-statisitc is</p>
    269      * <p>
    270      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
    271      * </p><p>
    272      *  where <strong><code>n1</code></strong> is the size of the first sample
    273      * <strong><code> n2</code></strong> is the size of the second sample;
    274      * <strong><code> m1</code></strong> is the mean of the first sample;
    275      * <strong><code> m2</code></strong> is the mean of the second sample;
    276      * <strong><code> var1</code></strong> is the variance of the first sample;
    277      * <strong><code> var2</code></strong> is the variance of the second sample;
    278      * </p><p>
    279      * <strong>Preconditions</strong>: <ul>
    280      * <li>The observed array lengths must both be at least 2.
    281      * </li></ul></p>
    282      *
    283      * @param sample1 array of sample data values
    284      * @param sample2 array of sample data values
    285      * @return t statistic
    286      * @throws IllegalArgumentException if the precondition is not met
    287      */
    288     public double t(double[] sample1, double[] sample2)
    289     throws IllegalArgumentException {
    290         checkSampleData(sample1);
    291         checkSampleData(sample2);
    292         return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
    293                 StatUtils.variance(sample1), StatUtils.variance(sample2),
    294                 sample1.length, sample2.length);
    295     }
    296 
    297     /**
    298      * Computes a 2-sample t statistic </a>, comparing the means of the datasets
    299      * described by two {@link StatisticalSummary} instances, without the
    300      * assumption of equal subpopulation variances.  Use
    301      * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
    302      * compute a t-statistic under the equal variances assumption.
    303      * <p>
    304      * This statistic can be used to perform a two-sample t-test to compare
    305      * sample means.</p>
    306      * <p>
    307       * The returned  t-statisitc is</p>
    308      * <p>
    309      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
    310      * </p><p>
    311      * where <strong><code>n1</code></strong> is the size of the first sample;
    312      * <strong><code> n2</code></strong> is the size of the second sample;
    313      * <strong><code> m1</code></strong> is the mean of the first sample;
    314      * <strong><code> m2</code></strong> is the mean of the second sample
    315      * <strong><code> var1</code></strong> is the variance of the first sample;
    316      * <strong><code> var2</code></strong> is the variance of the second sample
    317      * </p><p>
    318      * <strong>Preconditions</strong>: <ul>
    319      * <li>The datasets described by the two Univariates must each contain
    320      * at least 2 observations.
    321      * </li></ul></p>
    322      *
    323      * @param sampleStats1 StatisticalSummary describing data from the first sample
    324      * @param sampleStats2 StatisticalSummary describing data from the second sample
    325      * @return t statistic
    326      * @throws IllegalArgumentException if the precondition is not met
    327      */
    328     public double t(StatisticalSummary sampleStats1,
    329                     StatisticalSummary sampleStats2)
    330     throws IllegalArgumentException {
    331         checkSampleData(sampleStats1);
    332         checkSampleData(sampleStats2);
    333         return t(sampleStats1.getMean(), sampleStats2.getMean(),
    334                 sampleStats1.getVariance(), sampleStats2.getVariance(),
    335                 sampleStats1.getN(), sampleStats2.getN());
    336     }
    337 
    338     /**
    339      * Computes a 2-sample t statistic, comparing the means of the datasets
    340      * described by two {@link StatisticalSummary} instances, under the
    341      * assumption of equal subpopulation variances.  To compute a t-statistic
    342      * without the equal variances assumption, use
    343      * {@link #t(StatisticalSummary, StatisticalSummary)}.
    344      * <p>
    345      * This statistic can be used to perform a (homoscedastic) two-sample
    346      * t-test to compare sample means.</p>
    347      * <p>
    348      * The t-statisitc returned is</p>
    349      * <p>
    350      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
    351      * </p><p>
    352      * where <strong><code>n1</code></strong> is the size of first sample;
    353      * <strong><code> n2</code></strong> is the size of second sample;
    354      * <strong><code> m1</code></strong> is the mean of first sample;
    355      * <strong><code> m2</code></strong> is the mean of second sample
    356      * and <strong><code>var</code></strong> is the pooled variance estimate:
    357      * </p><p>
    358      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
    359      * <p>
    360      * with <strong><code>var1<code></strong> the variance of the first sample and
    361      * <strong><code>var2</code></strong> the variance of the second sample.
    362      * </p><p>
    363      * <strong>Preconditions</strong>: <ul>
    364      * <li>The datasets described by the two Univariates must each contain
    365      * at least 2 observations.
    366      * </li></ul></p>
    367      *
    368      * @param sampleStats1 StatisticalSummary describing data from the first sample
    369      * @param sampleStats2 StatisticalSummary describing data from the second sample
    370      * @return t statistic
    371      * @throws IllegalArgumentException if the precondition is not met
    372      */
    373     public double homoscedasticT(StatisticalSummary sampleStats1,
    374             StatisticalSummary sampleStats2)
    375     throws IllegalArgumentException {
    376         checkSampleData(sampleStats1);
    377         checkSampleData(sampleStats2);
    378         return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
    379                 sampleStats1.getVariance(), sampleStats2.getVariance(),
    380                 sampleStats1.getN(), sampleStats2.getN());
    381     }
    382 
    383      /**
    384      * Returns the <i>observed significance level</i>, or
    385      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
    386      * comparing the mean of the input array with the constant <code>mu</code>.
    387      * <p>
    388      * The number returned is the smallest significance level
    389      * at which one can reject the null hypothesis that the mean equals
    390      * <code>mu</code> in favor of the two-sided alternative that the mean
    391      * is different from <code>mu</code>. For a one-sided test, divide the
    392      * returned value by 2.</p>
    393      * <p>
    394      * <strong>Usage Note:</strong><br>
    395      * The validity of the test depends on the assumptions of the parametric
    396      * t-test procedure, as discussed
    397      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
    398      * </p><p>
    399      * <strong>Preconditions</strong>: <ul>
    400      * <li>The observed array length must be at least 2.
    401      * </li></ul></p>
    402      *
    403      * @param mu constant value to compare sample mean against
    404      * @param sample array of sample data values
    405      * @return p-value
    406      * @throws IllegalArgumentException if the precondition is not met
    407      * @throws MathException if an error occurs computing the p-value
    408      */
    409     public double tTest(double mu, double[] sample)
    410     throws IllegalArgumentException, MathException {
    411         checkSampleData(sample);
    412         return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
    413                 sample.length);
    414     }
    415 
    416     /**
    417      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
    418      * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
    419      * which <code>sample</code> is drawn equals <code>mu</code>.
    420      * <p>
    421      * Returns <code>true</code> iff the null hypothesis can be
    422      * rejected with confidence <code>1 - alpha</code>.  To
    423      * perform a 1-sided test, use <code>alpha * 2</code>
    424      * </p><p>
    425      * <strong>Examples:</strong><br><ol>
    426      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
    427      * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
    428      * </li>
    429      * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
    430      * at the 99% level, first verify that the measured sample mean is less
    431      * than <code>mu</code> and then use
    432      * <br><code>tTest(mu, sample, 0.02) </code>
    433      * </li></ol></p>
    434      * <p>
    435      * <strong>Usage Note:</strong><br>
    436      * The validity of the test depends on the assumptions of the one-sample
    437      * parametric t-test procedure, as discussed
    438      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
    439      * </p><p>
    440      * <strong>Preconditions</strong>: <ul>
    441      * <li>The observed array length must be at least 2.
    442      * </li></ul></p>
    443      *
    444      * @param mu constant value to compare sample mean against
    445      * @param sample array of sample data values
    446      * @param alpha significance level of the test
    447      * @return p-value
    448      * @throws IllegalArgumentException if the precondition is not met
    449      * @throws MathException if an error computing the p-value
    450      */
    451     public boolean tTest(double mu, double[] sample, double alpha)
    452     throws IllegalArgumentException, MathException {
    453         checkSignificanceLevel(alpha);
    454         return tTest(mu, sample) < alpha;
    455     }
    456 
    457     /**
    458      * Returns the <i>observed significance level</i>, or
    459      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
    460      * comparing the mean of the dataset described by <code>sampleStats</code>
    461      * with the constant <code>mu</code>.
    462      * <p>
    463      * The number returned is the smallest significance level
    464      * at which one can reject the null hypothesis that the mean equals
    465      * <code>mu</code> in favor of the two-sided alternative that the mean
    466      * is different from <code>mu</code>. For a one-sided test, divide the
    467      * returned value by 2.</p>
    468      * <p>
    469      * <strong>Usage Note:</strong><br>
    470      * The validity of the test depends on the assumptions of the parametric
    471      * t-test procedure, as discussed
    472      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    473      * here</a></p>
    474      * <p>
    475      * <strong>Preconditions</strong>: <ul>
    476      * <li>The sample must contain at least 2 observations.
    477      * </li></ul></p>
    478      *
    479      * @param mu constant value to compare sample mean against
    480      * @param sampleStats StatisticalSummary describing sample data
    481      * @return p-value
    482      * @throws IllegalArgumentException if the precondition is not met
    483      * @throws MathException if an error occurs computing the p-value
    484      */
    485     public double tTest(double mu, StatisticalSummary sampleStats)
    486     throws IllegalArgumentException, MathException {
    487         checkSampleData(sampleStats);
    488         return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
    489                 sampleStats.getN());
    490     }
    491 
    492      /**
    493      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
    494      * two-sided t-test</a> evaluating the null hypothesis that the mean of the
    495      * population from which the dataset described by <code>stats</code> is
    496      * drawn equals <code>mu</code>.
    497      * <p>
    498      * Returns <code>true</code> iff the null hypothesis can be rejected with
    499      * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
    500      * <code>alpha * 2.</code></p>
    501      * <p>
    502      * <strong>Examples:</strong><br><ol>
    503      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
    504      * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
    505      * </li>
    506      * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
    507      * at the 99% level, first verify that the measured sample mean is less
    508      * than <code>mu</code> and then use
    509      * <br><code>tTest(mu, sampleStats, 0.02) </code>
    510      * </li></ol></p>
    511      * <p>
    512      * <strong>Usage Note:</strong><br>
    513      * The validity of the test depends on the assumptions of the one-sample
    514      * parametric t-test procedure, as discussed
    515      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
    516      * </p><p>
    517      * <strong>Preconditions</strong>: <ul>
    518      * <li>The sample must include at least 2 observations.
    519      * </li></ul></p>
    520      *
    521      * @param mu constant value to compare sample mean against
    522      * @param sampleStats StatisticalSummary describing sample data values
    523      * @param alpha significance level of the test
    524      * @return p-value
    525      * @throws IllegalArgumentException if the precondition is not met
    526      * @throws MathException if an error occurs computing the p-value
    527      */
    528     public boolean tTest( double mu, StatisticalSummary sampleStats,
    529             double alpha)
    530     throws IllegalArgumentException, MathException {
    531         checkSignificanceLevel(alpha);
    532         return tTest(mu, sampleStats) < alpha;
    533     }
    534 
    535     /**
    536      * Returns the <i>observed significance level</i>, or
    537      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
    538      * comparing the means of the input arrays.
    539      * <p>
    540      * The number returned is the smallest significance level
    541      * at which one can reject the null hypothesis that the two means are
    542      * equal in favor of the two-sided alternative that they are different.
    543      * For a one-sided test, divide the returned value by 2.</p>
    544      * <p>
    545      * The test does not assume that the underlying popuation variances are
    546      * equal  and it uses approximated degrees of freedom computed from the
    547      * sample data to compute the p-value.  The t-statistic used is as defined in
    548      * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
    549      * to the degrees of freedom is used,
    550      * as described
    551      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
    552      * here.</a>  To perform the test under the assumption of equal subpopulation
    553      * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
    554      * <p>
    555      * <strong>Usage Note:</strong><br>
    556      * The validity of the p-value depends on the assumptions of the parametric
    557      * t-test procedure, as discussed
    558      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    559      * here</a></p>
    560      * <p>
    561      * <strong>Preconditions</strong>: <ul>
    562      * <li>The observed array lengths must both be at least 2.
    563      * </li></ul></p>
    564      *
    565      * @param sample1 array of sample data values
    566      * @param sample2 array of sample data values
    567      * @return p-value for t-test
    568      * @throws IllegalArgumentException if the precondition is not met
    569      * @throws MathException if an error occurs computing the p-value
    570      */
    571     public double tTest(double[] sample1, double[] sample2)
    572     throws IllegalArgumentException, MathException {
    573         checkSampleData(sample1);
    574         checkSampleData(sample2);
    575         return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
    576                 StatUtils.variance(sample1), StatUtils.variance(sample2),
    577                 sample1.length, sample2.length);
    578     }
    579 
    580     /**
    581      * Returns the <i>observed significance level</i>, or
    582      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
    583      * comparing the means of the input arrays, under the assumption that
    584      * the two samples are drawn from subpopulations with equal variances.
    585      * To perform the test without the equal variances assumption, use
    586      * {@link #tTest(double[], double[])}.
    587      * <p>
    588      * The number returned is the smallest significance level
    589      * at which one can reject the null hypothesis that the two means are
    590      * equal in favor of the two-sided alternative that they are different.
    591      * For a one-sided test, divide the returned value by 2.</p>
    592      * <p>
    593      * A pooled variance estimate is used to compute the t-statistic.  See
    594      * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
    595      * minus 2 is used as the degrees of freedom.</p>
    596      * <p>
    597      * <strong>Usage Note:</strong><br>
    598      * The validity of the p-value depends on the assumptions of the parametric
    599      * t-test procedure, as discussed
    600      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    601      * here</a></p>
    602      * <p>
    603      * <strong>Preconditions</strong>: <ul>
    604      * <li>The observed array lengths must both be at least 2.
    605      * </li></ul></p>
    606      *
    607      * @param sample1 array of sample data values
    608      * @param sample2 array of sample data values
    609      * @return p-value for t-test
    610      * @throws IllegalArgumentException if the precondition is not met
    611      * @throws MathException if an error occurs computing the p-value
    612      */
    613     public double homoscedasticTTest(double[] sample1, double[] sample2)
    614     throws IllegalArgumentException, MathException {
    615         checkSampleData(sample1);
    616         checkSampleData(sample2);
    617         return homoscedasticTTest(StatUtils.mean(sample1),
    618                 StatUtils.mean(sample2), StatUtils.variance(sample1),
    619                 StatUtils.variance(sample2), sample1.length,
    620                 sample2.length);
    621     }
    622 
    623 
    624      /**
    625      * Performs a
    626      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
    627      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
    628      * and <code>sample2</code> are drawn from populations with the same mean,
    629      * with significance level <code>alpha</code>.  This test does not assume
    630      * that the subpopulation variances are equal.  To perform the test assuming
    631      * equal variances, use
    632      * {@link #homoscedasticTTest(double[], double[], double)}.
    633      * <p>
    634      * Returns <code>true</code> iff the null hypothesis that the means are
    635      * equal can be rejected with confidence <code>1 - alpha</code>.  To
    636      * perform a 1-sided test, use <code>alpha / 2</code></p>
    637      * <p>
    638      * See {@link #t(double[], double[])} for the formula used to compute the
    639      * t-statistic.  Degrees of freedom are approximated using the
    640      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
    641      * Welch-Satterthwaite approximation.</a></p>
    642 
    643      * <p>
    644      * <strong>Examples:</strong><br><ol>
    645      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
    646      * the 95% level,  use
    647      * <br><code>tTest(sample1, sample2, 0.05). </code>
    648      * </li>
    649      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
    650      * the 99% level, first verify that the measured  mean of <code>sample 1</code>
    651      * is less than the mean of <code>sample 2</code> and then use
    652      * <br><code>tTest(sample1, sample2, 0.02) </code>
    653      * </li></ol></p>
    654      * <p>
    655      * <strong>Usage Note:</strong><br>
    656      * The validity of the test depends on the assumptions of the parametric
    657      * t-test procedure, as discussed
    658      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    659      * here</a></p>
    660      * <p>
    661      * <strong>Preconditions</strong>: <ul>
    662      * <li>The observed array lengths must both be at least 2.
    663      * </li>
    664      * <li> <code> 0 < alpha < 0.5 </code>
    665      * </li></ul></p>
    666      *
    667      * @param sample1 array of sample data values
    668      * @param sample2 array of sample data values
    669      * @param alpha significance level of the test
    670      * @return true if the null hypothesis can be rejected with
    671      * confidence 1 - alpha
    672      * @throws IllegalArgumentException if the preconditions are not met
    673      * @throws MathException if an error occurs performing the test
    674      */
    675     public boolean tTest(double[] sample1, double[] sample2,
    676             double alpha)
    677     throws IllegalArgumentException, MathException {
    678         checkSignificanceLevel(alpha);
    679         return tTest(sample1, sample2) < alpha;
    680     }
    681 
    682     /**
    683      * Performs a
    684      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
    685      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
    686      * and <code>sample2</code> are drawn from populations with the same mean,
    687      * with significance level <code>alpha</code>,  assuming that the
    688      * subpopulation variances are equal.  Use
    689      * {@link #tTest(double[], double[], double)} to perform the test without
    690      * the assumption of equal variances.
    691      * <p>
    692      * Returns <code>true</code> iff the null hypothesis that the means are
    693      * equal can be rejected with confidence <code>1 - alpha</code>.  To
    694      * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
    695      * without the assumption of equal subpopulation variances, use
    696      * {@link #tTest(double[], double[], double)}.</p>
    697      * <p>
    698      * A pooled variance estimate is used to compute the t-statistic. See
    699      * {@link #t(double[], double[])} for the formula. The sum of the sample
    700      * sizes minus 2 is used as the degrees of freedom.</p>
    701      * <p>
    702      * <strong>Examples:</strong><br><ol>
    703      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
    704      * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
    705      * </li>
    706      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
    707      * at the 99% level, first verify that the measured mean of
    708      * <code>sample 1</code> is less than the mean of <code>sample 2</code>
    709      * and then use
    710      * <br><code>tTest(sample1, sample2, 0.02) </code>
    711      * </li></ol></p>
    712      * <p>
    713      * <strong>Usage Note:</strong><br>
    714      * The validity of the test depends on the assumptions of the parametric
    715      * t-test procedure, as discussed
    716      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    717      * here</a></p>
    718      * <p>
    719      * <strong>Preconditions</strong>: <ul>
    720      * <li>The observed array lengths must both be at least 2.
    721      * </li>
    722      * <li> <code> 0 < alpha < 0.5 </code>
    723      * </li></ul></p>
    724      *
    725      * @param sample1 array of sample data values
    726      * @param sample2 array of sample data values
    727      * @param alpha significance level of the test
    728      * @return true if the null hypothesis can be rejected with
    729      * confidence 1 - alpha
    730      * @throws IllegalArgumentException if the preconditions are not met
    731      * @throws MathException if an error occurs performing the test
    732      */
    733     public boolean homoscedasticTTest(double[] sample1, double[] sample2,
    734             double alpha)
    735     throws IllegalArgumentException, MathException {
    736         checkSignificanceLevel(alpha);
    737         return homoscedasticTTest(sample1, sample2) < alpha;
    738     }
    739 
    740      /**
    741      * Returns the <i>observed significance level</i>, or
    742      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
    743      * comparing the means of the datasets described by two StatisticalSummary
    744      * instances.
    745      * <p>
    746      * The number returned is the smallest significance level
    747      * at which one can reject the null hypothesis that the two means are
    748      * equal in favor of the two-sided alternative that they are different.
    749      * For a one-sided test, divide the returned value by 2.</p>
    750      * <p>
    751      * The test does not assume that the underlying popuation variances are
    752      * equal  and it uses approximated degrees of freedom computed from the
    753      * sample data to compute the p-value.   To perform the test assuming
    754      * equal variances, use
    755      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
    756      * <p>
    757      * <strong>Usage Note:</strong><br>
    758      * The validity of the p-value depends on the assumptions of the parametric
    759      * t-test procedure, as discussed
    760      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    761      * here</a></p>
    762      * <p>
    763      * <strong>Preconditions</strong>: <ul>
    764      * <li>The datasets described by the two Univariates must each contain
    765      * at least 2 observations.
    766      * </li></ul></p>
    767      *
    768      * @param sampleStats1  StatisticalSummary describing data from the first sample
    769      * @param sampleStats2  StatisticalSummary describing data from the second sample
    770      * @return p-value for t-test
    771      * @throws IllegalArgumentException if the precondition is not met
    772      * @throws MathException if an error occurs computing the p-value
    773      */
    774     public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
    775     throws IllegalArgumentException, MathException {
    776         checkSampleData(sampleStats1);
    777         checkSampleData(sampleStats2);
    778         return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
    779                 sampleStats2.getVariance(), sampleStats1.getN(),
    780                 sampleStats2.getN());
    781     }
    782 
    783     /**
    784      * Returns the <i>observed significance level</i>, or
    785      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
    786      * comparing the means of the datasets described by two StatisticalSummary
    787      * instances, under the hypothesis of equal subpopulation variances. To
    788      * perform a test without the equal variances assumption, use
    789      * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
    790      * <p>
    791      * The number returned is the smallest significance level
    792      * at which one can reject the null hypothesis that the two means are
    793      * equal in favor of the two-sided alternative that they are different.
    794      * For a one-sided test, divide the returned value by 2.</p>
    795      * <p>
    796      * See {@link #homoscedasticT(double[], double[])} for the formula used to
    797      * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
    798      * the degrees of freedom.</p>
    799      * <p>
    800      * <strong>Usage Note:</strong><br>
    801      * The validity of the p-value depends on the assumptions of the parametric
    802      * t-test procedure, as discussed
    803      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
    804      * </p><p>
    805      * <strong>Preconditions</strong>: <ul>
    806      * <li>The datasets described by the two Univariates must each contain
    807      * at least 2 observations.
    808      * </li></ul></p>
    809      *
    810      * @param sampleStats1  StatisticalSummary describing data from the first sample
    811      * @param sampleStats2  StatisticalSummary describing data from the second sample
    812      * @return p-value for t-test
    813      * @throws IllegalArgumentException if the precondition is not met
    814      * @throws MathException if an error occurs computing the p-value
    815      */
    816     public double homoscedasticTTest(StatisticalSummary sampleStats1,
    817                                      StatisticalSummary sampleStats2)
    818     throws IllegalArgumentException, MathException {
    819         checkSampleData(sampleStats1);
    820         checkSampleData(sampleStats2);
    821         return homoscedasticTTest(sampleStats1.getMean(),
    822                 sampleStats2.getMean(), sampleStats1.getVariance(),
    823                 sampleStats2.getVariance(), sampleStats1.getN(),
    824                 sampleStats2.getN());
    825     }
    826 
    827     /**
    828      * Performs a
    829      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
    830      * two-sided t-test</a> evaluating the null hypothesis that
    831      * <code>sampleStats1</code> and <code>sampleStats2</code> describe
    832      * datasets drawn from populations with the same mean, with significance
    833      * level <code>alpha</code>.   This test does not assume that the
    834      * subpopulation variances are equal.  To perform the test under the equal
    835      * variances assumption, use
    836      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
    837      * <p>
    838      * Returns <code>true</code> iff the null hypothesis that the means are
    839      * equal can be rejected with confidence <code>1 - alpha</code>.  To
    840      * perform a 1-sided test, use <code>alpha * 2</code></p>
    841      * <p>
    842      * See {@link #t(double[], double[])} for the formula used to compute the
    843      * t-statistic.  Degrees of freedom are approximated using the
    844      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
    845      * Welch-Satterthwaite approximation.</a></p>
    846      * <p>
    847      * <strong>Examples:</strong><br><ol>
    848      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
    849      * the 95%, use
    850      * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
    851      * </li>
    852      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
    853      * at the 99% level,  first verify that the measured mean of
    854      * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
    855      * and then use
    856      * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
    857      * </li></ol></p>
    858      * <p>
    859      * <strong>Usage Note:</strong><br>
    860      * The validity of the test depends on the assumptions of the parametric
    861      * t-test procedure, as discussed
    862      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
    863      * here</a></p>
    864      * <p>
    865      * <strong>Preconditions</strong>: <ul>
    866      * <li>The datasets described by the two Univariates must each contain
    867      * at least 2 observations.
    868      * </li>
    869      * <li> <code> 0 < alpha < 0.5 </code>
    870      * </li></ul></p>
    871      *
    872      * @param sampleStats1 StatisticalSummary describing sample data values
    873      * @param sampleStats2 StatisticalSummary describing sample data values
    874      * @param alpha significance level of the test
    875      * @return true if the null hypothesis can be rejected with
    876      * confidence 1 - alpha
    877      * @throws IllegalArgumentException if the preconditions are not met
    878      * @throws MathException if an error occurs performing the test
    879      */
    880     public boolean tTest(StatisticalSummary sampleStats1,
    881             StatisticalSummary sampleStats2, double alpha)
    882     throws IllegalArgumentException, MathException {
    883         checkSignificanceLevel(alpha);
    884         return tTest(sampleStats1, sampleStats2) < alpha;
    885     }
    886 
    887     //----------------------------------------------- Protected methods
    888 
    889     /**
    890      * Computes approximate degrees of freedom for 2-sample t-test.
    891      *
    892      * @param v1 first sample variance
    893      * @param v2 second sample variance
    894      * @param n1 first sample n
    895      * @param n2 second sample n
    896      * @return approximate degrees of freedom
    897      */
    898     protected double df(double v1, double v2, double n1, double n2) {
    899         return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
    900         ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
    901                 (n2 * n2 * (n2 - 1d)));
    902     }
    903 
    904     /**
    905      * Computes t test statistic for 1-sample t-test.
    906      *
    907      * @param m sample mean
    908      * @param mu constant to test against
    909      * @param v sample variance
    910      * @param n sample n
    911      * @return t test statistic
    912      */
    913     protected double t(double m, double mu, double v, double n) {
    914         return (m - mu) / FastMath.sqrt(v / n);
    915     }
    916 
    917     /**
    918      * Computes t test statistic for 2-sample t-test.
    919      * <p>
    920      * Does not assume that subpopulation variances are equal.</p>
    921      *
    922      * @param m1 first sample mean
    923      * @param m2 second sample mean
    924      * @param v1 first sample variance
    925      * @param v2 second sample variance
    926      * @param n1 first sample n
    927      * @param n2 second sample n
    928      * @return t test statistic
    929      */
    930     protected double t(double m1, double m2,  double v1, double v2, double n1,
    931             double n2)  {
    932             return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
    933     }
    934 
    935     /**
    936      * Computes t test statistic for 2-sample t-test under the hypothesis
    937      * of equal subpopulation variances.
    938      *
    939      * @param m1 first sample mean
    940      * @param m2 second sample mean
    941      * @param v1 first sample variance
    942      * @param v2 second sample variance
    943      * @param n1 first sample n
    944      * @param n2 second sample n
    945      * @return t test statistic
    946      */
    947     protected double homoscedasticT(double m1, double m2,  double v1,
    948             double v2, double n1, double n2)  {
    949             double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
    950             return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
    951     }
    952 
    953     /**
    954      * Computes p-value for 2-sided, 1-sample t-test.
    955      *
    956      * @param m sample mean
    957      * @param mu constant to test against
    958      * @param v sample variance
    959      * @param n sample n
    960      * @return p-value
    961      * @throws MathException if an error occurs computing the p-value
    962      */
    963     protected double tTest(double m, double mu, double v, double n)
    964     throws MathException {
    965         double t = FastMath.abs(t(m, mu, v, n));
    966         distribution.setDegreesOfFreedom(n - 1);
    967         return 2.0 * distribution.cumulativeProbability(-t);
    968     }
    969 
    970     /**
    971      * Computes p-value for 2-sided, 2-sample t-test.
    972      * <p>
    973      * Does not assume subpopulation variances are equal. Degrees of freedom
    974      * are estimated from the data.</p>
    975      *
    976      * @param m1 first sample mean
    977      * @param m2 second sample mean
    978      * @param v1 first sample variance
    979      * @param v2 second sample variance
    980      * @param n1 first sample n
    981      * @param n2 second sample n
    982      * @return p-value
    983      * @throws MathException if an error occurs computing the p-value
    984      */
    985     protected double tTest(double m1, double m2, double v1, double v2,
    986             double n1, double n2)
    987     throws MathException {
    988         double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
    989         double degreesOfFreedom = 0;
    990         degreesOfFreedom = df(v1, v2, n1, n2);
    991         distribution.setDegreesOfFreedom(degreesOfFreedom);
    992         return 2.0 * distribution.cumulativeProbability(-t);
    993     }
    994 
    995     /**
    996      * Computes p-value for 2-sided, 2-sample t-test, under the assumption
    997      * of equal subpopulation variances.
    998      * <p>
    999      * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
   1000      *
   1001      * @param m1 first sample mean
   1002      * @param m2 second sample mean
   1003      * @param v1 first sample variance
   1004      * @param v2 second sample variance
   1005      * @param n1 first sample n
   1006      * @param n2 second sample n
   1007      * @return p-value
   1008      * @throws MathException if an error occurs computing the p-value
   1009      */
   1010     protected double homoscedasticTTest(double m1, double m2, double v1,
   1011             double v2, double n1, double n2)
   1012     throws MathException {
   1013         double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
   1014         double degreesOfFreedom = n1 + n2 - 2;
   1015         distribution.setDegreesOfFreedom(degreesOfFreedom);
   1016         return 2.0 * distribution.cumulativeProbability(-t);
   1017     }
   1018 
   1019     /**
   1020      * Modify the distribution used to compute inference statistics.
   1021      * @param value the new distribution
   1022      * @since 1.2
   1023      * @deprecated in 2.2 (to be removed in 3.0).
   1024      */
   1025     @Deprecated
   1026     public void setDistribution(TDistribution value) {
   1027         distribution = value;
   1028     }
   1029 
   1030     /** Check significance level.
   1031      * @param alpha significance level
   1032      * @exception IllegalArgumentException if significance level is out of bounds
   1033      */
   1034     private void checkSignificanceLevel(final double alpha)
   1035         throws IllegalArgumentException {
   1036         if ((alpha <= 0) || (alpha > 0.5)) {
   1037             throw MathRuntimeException.createIllegalArgumentException(
   1038                   LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
   1039                   alpha, 0.0, 0.5);
   1040         }
   1041     }
   1042 
   1043     /** Check sample data.
   1044      * @param data sample data
   1045      * @exception IllegalArgumentException if there is not enough sample data
   1046      */
   1047     private void checkSampleData(final double[] data)
   1048         throws IllegalArgumentException {
   1049         if ((data == null) || (data.length < 2)) {
   1050             throw MathRuntimeException.createIllegalArgumentException(
   1051                   LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
   1052                   (data == null) ? 0 : data.length);
   1053         }
   1054     }
   1055 
   1056     /** Check sample data.
   1057      * @param stat statistical summary
   1058      * @exception IllegalArgumentException if there is not enough sample data
   1059      */
   1060     private void checkSampleData(final StatisticalSummary stat)
   1061         throws IllegalArgumentException {
   1062         if ((stat == null) || (stat.getN() < 2)) {
   1063             throw MathRuntimeException.createIllegalArgumentException(
   1064                   LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
   1065                   (stat == null) ? 0 : stat.getN());
   1066         }
   1067     }
   1068 
   1069 }
   1070