Home | History | Annotate | Download | only in distribution
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package org.apache.commons.math.distribution;
     19 
     20 import java.io.Serializable;
     21 
     22 import org.apache.commons.math.MathRuntimeException;
     23 import org.apache.commons.math.exception.util.LocalizedFormats;
     24 import org.apache.commons.math.util.MathUtils;
     25 import org.apache.commons.math.util.FastMath;
     26 
     27 /**
     28  * The default implementation of {@link HypergeometricDistribution}.
     29  *
     30  * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
     31  */
     32 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
     33         implements HypergeometricDistribution, Serializable {
     34 
     35     /** Serializable version identifier */
     36     private static final long serialVersionUID = -436928820673516179L;
     37 
     38     /** The number of successes in the population. */
     39     private int numberOfSuccesses;
     40 
     41     /** The population size. */
     42     private int populationSize;
     43 
     44     /** The sample size. */
     45     private int sampleSize;
     46 
     47     /**
     48      * Construct a new hypergeometric distribution with the given the population
     49      * size, the number of successes in the population, and the sample size.
     50      *
     51      * @param populationSize the population size.
     52      * @param numberOfSuccesses number of successes in the population.
     53      * @param sampleSize the sample size.
     54      */
     55     public HypergeometricDistributionImpl(int populationSize,
     56             int numberOfSuccesses, int sampleSize) {
     57         super();
     58         if (numberOfSuccesses > populationSize) {
     59             throw MathRuntimeException
     60                     .createIllegalArgumentException(
     61                             LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
     62                             numberOfSuccesses, populationSize);
     63         }
     64         if (sampleSize > populationSize) {
     65             throw MathRuntimeException
     66                     .createIllegalArgumentException(
     67                             LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
     68                             sampleSize, populationSize);
     69         }
     70 
     71         setPopulationSizeInternal(populationSize);
     72         setSampleSizeInternal(sampleSize);
     73         setNumberOfSuccessesInternal(numberOfSuccesses);
     74     }
     75 
     76     /**
     77      * For this distribution, X, this method returns P(X ≤ x).
     78      *
     79      * @param x the value at which the PDF is evaluated.
     80      * @return PDF for this distribution.
     81      */
     82     @Override
     83     public double cumulativeProbability(int x) {
     84         double ret;
     85 
     86         int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
     87         if (x < domain[0]) {
     88             ret = 0.0;
     89         } else if (x >= domain[1]) {
     90             ret = 1.0;
     91         } else {
     92             ret = innerCumulativeProbability(domain[0], x, 1, populationSize,
     93                                              numberOfSuccesses, sampleSize);
     94         }
     95 
     96         return ret;
     97     }
     98 
     99     /**
    100      * Return the domain for the given hypergeometric distribution parameters.
    101      *
    102      * @param n the population size.
    103      * @param m number of successes in the population.
    104      * @param k the sample size.
    105      * @return a two element array containing the lower and upper bounds of the
    106      *         hypergeometric distribution.
    107      */
    108     private int[] getDomain(int n, int m, int k) {
    109         return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
    110     }
    111 
    112     /**
    113      * Access the domain value lower bound, based on <code>p</code>, used to
    114      * bracket a PDF root.
    115      *
    116      * @param p the desired probability for the critical value
    117      * @return domain value lower bound, i.e. P(X &lt; <i>lower bound</i>) &lt;
    118      *         <code>p</code>
    119      */
    120     @Override
    121     protected int getDomainLowerBound(double p) {
    122         return getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
    123     }
    124 
    125     /**
    126      * Access the domain value upper bound, based on <code>p</code>, used to
    127      * bracket a PDF root.
    128      *
    129      * @param p the desired probability for the critical value
    130      * @return domain value upper bound, i.e. P(X &lt; <i>upper bound</i>) &gt;
    131      *         <code>p</code>
    132      */
    133     @Override
    134     protected int getDomainUpperBound(double p) {
    135         return getUpperDomain(sampleSize, numberOfSuccesses);
    136     }
    137 
    138     /**
    139      * Return the lowest domain value for the given hypergeometric distribution
    140      * parameters.
    141      *
    142      * @param n the population size.
    143      * @param m number of successes in the population.
    144      * @param k the sample size.
    145      * @return the lowest domain value of the hypergeometric distribution.
    146      */
    147     private int getLowerDomain(int n, int m, int k) {
    148         return FastMath.max(0, m - (n - k));
    149     }
    150 
    151     /**
    152      * Access the number of successes.
    153      *
    154      * @return the number of successes.
    155      */
    156     public int getNumberOfSuccesses() {
    157         return numberOfSuccesses;
    158     }
    159 
    160     /**
    161      * Access the population size.
    162      *
    163      * @return the population size.
    164      */
    165     public int getPopulationSize() {
    166         return populationSize;
    167     }
    168 
    169     /**
    170      * Access the sample size.
    171      *
    172      * @return the sample size.
    173      */
    174     public int getSampleSize() {
    175         return sampleSize;
    176     }
    177 
    178     /**
    179      * Return the highest domain value for the given hypergeometric distribution
    180      * parameters.
    181      *
    182      * @param m number of successes in the population.
    183      * @param k the sample size.
    184      * @return the highest domain value of the hypergeometric distribution.
    185      */
    186     private int getUpperDomain(int m, int k) {
    187         return FastMath.min(k, m);
    188     }
    189 
    190     /**
    191      * For this distribution, X, this method returns P(X = x).
    192      *
    193      * @param x the value at which the PMF is evaluated.
    194      * @return PMF for this distribution.
    195      */
    196     public double probability(int x) {
    197         double ret;
    198 
    199         int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
    200         if (x < domain[0] || x > domain[1]) {
    201             ret = 0.0;
    202         } else {
    203             double p = (double) sampleSize / (double) populationSize;
    204             double q = (double) (populationSize - sampleSize) / (double) populationSize;
    205             double p1 = SaddlePointExpansion.logBinomialProbability(x,
    206                     numberOfSuccesses, p, q);
    207             double p2 =
    208                 SaddlePointExpansion.logBinomialProbability(sampleSize - x,
    209                     populationSize - numberOfSuccesses, p, q);
    210             double p3 =
    211                 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
    212             ret = FastMath.exp(p1 + p2 - p3);
    213         }
    214 
    215         return ret;
    216     }
    217 
    218     /**
    219      * For the distribution, X, defined by the given hypergeometric distribution
    220      * parameters, this method returns P(X = x).
    221      *
    222      * @param n the population size.
    223      * @param m number of successes in the population.
    224      * @param k the sample size.
    225      * @param x the value at which the PMF is evaluated.
    226      * @return PMF for the distribution.
    227      */
    228     private double probability(int n, int m, int k, int x) {
    229         return FastMath.exp(MathUtils.binomialCoefficientLog(m, x) +
    230                MathUtils.binomialCoefficientLog(n - m, k - x) -
    231                MathUtils.binomialCoefficientLog(n, k));
    232     }
    233 
    234     /**
    235      * Modify the number of successes.
    236      *
    237      * @param num the new number of successes.
    238      * @throws IllegalArgumentException if <code>num</code> is negative.
    239      * @deprecated as of 2.1 (class will become immutable in 3.0)
    240      */
    241     @Deprecated
    242     public void setNumberOfSuccesses(int num) {
    243         setNumberOfSuccessesInternal(num);
    244     }
    245 
    246     /**
    247      * Modify the number of successes.
    248      *
    249      * @param num the new number of successes.
    250      * @throws IllegalArgumentException if <code>num</code> is negative.
    251      */
    252     private void setNumberOfSuccessesInternal(int num) {
    253         if (num < 0) {
    254             throw MathRuntimeException.createIllegalArgumentException(
    255                     LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, num);
    256         }
    257         numberOfSuccesses = num;
    258     }
    259 
    260     /**
    261      * Modify the population size.
    262      *
    263      * @param size the new population size.
    264      * @throws IllegalArgumentException if <code>size</code> is not positive.
    265      * @deprecated as of 2.1 (class will become immutable in 3.0)
    266      */
    267     @Deprecated
    268     public void setPopulationSize(int size) {
    269         setPopulationSizeInternal(size);
    270     }
    271 
    272     /**
    273      * Modify the population size.
    274      *
    275      * @param size the new population size.
    276      * @throws IllegalArgumentException if <code>size</code> is not positive.
    277      */
    278     private void setPopulationSizeInternal(int size) {
    279         if (size <= 0) {
    280             throw MathRuntimeException.createIllegalArgumentException(
    281                     LocalizedFormats.NOT_POSITIVE_POPULATION_SIZE, size);
    282         }
    283         populationSize = size;
    284     }
    285 
    286     /**
    287      * Modify the sample size.
    288      *
    289      * @param size the new sample size.
    290      * @throws IllegalArgumentException if <code>size</code> is negative.
    291      * @deprecated as of 2.1 (class will become immutable in 3.0)
    292      */
    293     @Deprecated
    294     public void setSampleSize(int size) {
    295         setSampleSizeInternal(size);
    296     }
    297     /**
    298      * Modify the sample size.
    299      *
    300      * @param size the new sample size.
    301      * @throws IllegalArgumentException if <code>size</code> is negative.
    302      */
    303     private void setSampleSizeInternal(int size) {
    304         if (size < 0) {
    305             throw MathRuntimeException.createIllegalArgumentException(
    306                     LocalizedFormats.NOT_POSITIVE_SAMPLE_SIZE, size);
    307         }
    308         sampleSize = size;
    309     }
    310 
    311     /**
    312      * For this distribution, X, this method returns P(X &ge; x).
    313      *
    314      * @param x the value at which the CDF is evaluated.
    315      * @return upper tail CDF for this distribution.
    316      * @since 1.1
    317      */
    318     public double upperCumulativeProbability(int x) {
    319         double ret;
    320 
    321         final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
    322         if (x < domain[0]) {
    323             ret = 1.0;
    324         } else if (x > domain[1]) {
    325             ret = 0.0;
    326         } else {
    327             ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize);
    328         }
    329 
    330         return ret;
    331     }
    332 
    333     /**
    334      * For this distribution, X, this method returns P(x0 &le; X &le; x1). This
    335      * probability is computed by summing the point probabilities for the values
    336      * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
    337      *
    338      * @param x0 the inclusive, lower bound
    339      * @param x1 the inclusive, upper bound
    340      * @param dx the direction of summation. 1 indicates summing from x0 to x1.
    341      *            0 indicates summing from x1 to x0.
    342      * @param n the population size.
    343      * @param m number of successes in the population.
    344      * @param k the sample size.
    345      * @return P(x0 &le; X &le; x1).
    346      */
    347     private double innerCumulativeProbability(int x0, int x1, int dx, int n,
    348             int m, int k) {
    349         double ret = probability(n, m, k, x0);
    350         while (x0 != x1) {
    351             x0 += dx;
    352             ret += probability(n, m, k, x0);
    353         }
    354         return ret;
    355     }
    356 
    357     /**
    358      * Returns the lower bound for the support for the distribution.
    359      *
    360      * For population size <code>N</code>,
    361      * number of successes <code>m</code>, and
    362      * sample size <code>n</code>,
    363      * the lower bound of the support is
    364      * <code>max(0, n + m - N)</code>
    365      *
    366      * @return lower bound of the support
    367      * @since 2.2
    368      */
    369     public int getSupportLowerBound() {
    370         return FastMath.max(0,
    371                 getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
    372     }
    373 
    374     /**
    375      * Returns the upper bound for the support of the distribution.
    376      *
    377      * For number of successes <code>m</code> and
    378      * sample size <code>n</code>,
    379      * the upper bound of the support is
    380      * <code>min(m, n)</code>
    381      *
    382      * @return upper bound of the support
    383      * @since 2.2
    384      */
    385     public int getSupportUpperBound() {
    386         return FastMath.min(getNumberOfSuccesses(), getSampleSize());
    387     }
    388 
    389     /**
    390      * Returns the mean.
    391      *
    392      * For population size <code>N</code>,
    393      * number of successes <code>m</code>, and
    394      * sample size <code>n</code>, the mean is
    395      * <code>n * m / N</code>
    396      *
    397      * @return the mean
    398      * @since 2.2
    399      */
    400     protected double getNumericalMean() {
    401         return (double)(getSampleSize() * getNumberOfSuccesses()) / (double)getPopulationSize();
    402     }
    403 
    404     /**
    405      * Returns the variance.
    406      *
    407      * For population size <code>N</code>,
    408      * number of successes <code>m</code>, and
    409      * sample size <code>n</code>, the variance is
    410      * <code>[ n * m * (N - n) * (N - m) ] / [ N^2 * (N - 1) ]</code>
    411      *
    412      * @return the variance
    413      * @since 2.2
    414      */
    415     public double getNumericalVariance() {
    416         final double N = getPopulationSize();
    417         final double m = getNumberOfSuccesses();
    418         final double n = getSampleSize();
    419         return ( n * m * (N - n) * (N - m) ) / ( (N*N * (N - 1)) );
    420     }
    421 }
    422