1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.math.distribution; 19 20 import java.io.Serializable; 21 22 import org.apache.commons.math.MathRuntimeException; 23 import org.apache.commons.math.exception.util.LocalizedFormats; 24 import org.apache.commons.math.util.MathUtils; 25 import org.apache.commons.math.util.FastMath; 26 27 /** 28 * The default implementation of {@link HypergeometricDistribution}. 29 * 30 * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $ 31 */ 32 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution 33 implements HypergeometricDistribution, Serializable { 34 35 /** Serializable version identifier */ 36 private static final long serialVersionUID = -436928820673516179L; 37 38 /** The number of successes in the population. */ 39 private int numberOfSuccesses; 40 41 /** The population size. */ 42 private int populationSize; 43 44 /** The sample size. */ 45 private int sampleSize; 46 47 /** 48 * Construct a new hypergeometric distribution with the given the population 49 * size, the number of successes in the population, and the sample size. 50 * 51 * @param populationSize the population size. 52 * @param numberOfSuccesses number of successes in the population. 53 * @param sampleSize the sample size. 54 */ 55 public HypergeometricDistributionImpl(int populationSize, 56 int numberOfSuccesses, int sampleSize) { 57 super(); 58 if (numberOfSuccesses > populationSize) { 59 throw MathRuntimeException 60 .createIllegalArgumentException( 61 LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE, 62 numberOfSuccesses, populationSize); 63 } 64 if (sampleSize > populationSize) { 65 throw MathRuntimeException 66 .createIllegalArgumentException( 67 LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE, 68 sampleSize, populationSize); 69 } 70 71 setPopulationSizeInternal(populationSize); 72 setSampleSizeInternal(sampleSize); 73 setNumberOfSuccessesInternal(numberOfSuccesses); 74 } 75 76 /** 77 * For this distribution, X, this method returns P(X ≤ x). 78 * 79 * @param x the value at which the PDF is evaluated. 80 * @return PDF for this distribution. 81 */ 82 @Override 83 public double cumulativeProbability(int x) { 84 double ret; 85 86 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 87 if (x < domain[0]) { 88 ret = 0.0; 89 } else if (x >= domain[1]) { 90 ret = 1.0; 91 } else { 92 ret = innerCumulativeProbability(domain[0], x, 1, populationSize, 93 numberOfSuccesses, sampleSize); 94 } 95 96 return ret; 97 } 98 99 /** 100 * Return the domain for the given hypergeometric distribution parameters. 101 * 102 * @param n the population size. 103 * @param m number of successes in the population. 104 * @param k the sample size. 105 * @return a two element array containing the lower and upper bounds of the 106 * hypergeometric distribution. 107 */ 108 private int[] getDomain(int n, int m, int k) { 109 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) }; 110 } 111 112 /** 113 * Access the domain value lower bound, based on <code>p</code>, used to 114 * bracket a PDF root. 115 * 116 * @param p the desired probability for the critical value 117 * @return domain value lower bound, i.e. P(X < <i>lower bound</i>) < 118 * <code>p</code> 119 */ 120 @Override 121 protected int getDomainLowerBound(double p) { 122 return getLowerDomain(populationSize, numberOfSuccesses, sampleSize); 123 } 124 125 /** 126 * Access the domain value upper bound, based on <code>p</code>, used to 127 * bracket a PDF root. 128 * 129 * @param p the desired probability for the critical value 130 * @return domain value upper bound, i.e. P(X < <i>upper bound</i>) > 131 * <code>p</code> 132 */ 133 @Override 134 protected int getDomainUpperBound(double p) { 135 return getUpperDomain(sampleSize, numberOfSuccesses); 136 } 137 138 /** 139 * Return the lowest domain value for the given hypergeometric distribution 140 * parameters. 141 * 142 * @param n the population size. 143 * @param m number of successes in the population. 144 * @param k the sample size. 145 * @return the lowest domain value of the hypergeometric distribution. 146 */ 147 private int getLowerDomain(int n, int m, int k) { 148 return FastMath.max(0, m - (n - k)); 149 } 150 151 /** 152 * Access the number of successes. 153 * 154 * @return the number of successes. 155 */ 156 public int getNumberOfSuccesses() { 157 return numberOfSuccesses; 158 } 159 160 /** 161 * Access the population size. 162 * 163 * @return the population size. 164 */ 165 public int getPopulationSize() { 166 return populationSize; 167 } 168 169 /** 170 * Access the sample size. 171 * 172 * @return the sample size. 173 */ 174 public int getSampleSize() { 175 return sampleSize; 176 } 177 178 /** 179 * Return the highest domain value for the given hypergeometric distribution 180 * parameters. 181 * 182 * @param m number of successes in the population. 183 * @param k the sample size. 184 * @return the highest domain value of the hypergeometric distribution. 185 */ 186 private int getUpperDomain(int m, int k) { 187 return FastMath.min(k, m); 188 } 189 190 /** 191 * For this distribution, X, this method returns P(X = x). 192 * 193 * @param x the value at which the PMF is evaluated. 194 * @return PMF for this distribution. 195 */ 196 public double probability(int x) { 197 double ret; 198 199 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 200 if (x < domain[0] || x > domain[1]) { 201 ret = 0.0; 202 } else { 203 double p = (double) sampleSize / (double) populationSize; 204 double q = (double) (populationSize - sampleSize) / (double) populationSize; 205 double p1 = SaddlePointExpansion.logBinomialProbability(x, 206 numberOfSuccesses, p, q); 207 double p2 = 208 SaddlePointExpansion.logBinomialProbability(sampleSize - x, 209 populationSize - numberOfSuccesses, p, q); 210 double p3 = 211 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q); 212 ret = FastMath.exp(p1 + p2 - p3); 213 } 214 215 return ret; 216 } 217 218 /** 219 * For the distribution, X, defined by the given hypergeometric distribution 220 * parameters, this method returns P(X = x). 221 * 222 * @param n the population size. 223 * @param m number of successes in the population. 224 * @param k the sample size. 225 * @param x the value at which the PMF is evaluated. 226 * @return PMF for the distribution. 227 */ 228 private double probability(int n, int m, int k, int x) { 229 return FastMath.exp(MathUtils.binomialCoefficientLog(m, x) + 230 MathUtils.binomialCoefficientLog(n - m, k - x) - 231 MathUtils.binomialCoefficientLog(n, k)); 232 } 233 234 /** 235 * Modify the number of successes. 236 * 237 * @param num the new number of successes. 238 * @throws IllegalArgumentException if <code>num</code> is negative. 239 * @deprecated as of 2.1 (class will become immutable in 3.0) 240 */ 241 @Deprecated 242 public void setNumberOfSuccesses(int num) { 243 setNumberOfSuccessesInternal(num); 244 } 245 246 /** 247 * Modify the number of successes. 248 * 249 * @param num the new number of successes. 250 * @throws IllegalArgumentException if <code>num</code> is negative. 251 */ 252 private void setNumberOfSuccessesInternal(int num) { 253 if (num < 0) { 254 throw MathRuntimeException.createIllegalArgumentException( 255 LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, num); 256 } 257 numberOfSuccesses = num; 258 } 259 260 /** 261 * Modify the population size. 262 * 263 * @param size the new population size. 264 * @throws IllegalArgumentException if <code>size</code> is not positive. 265 * @deprecated as of 2.1 (class will become immutable in 3.0) 266 */ 267 @Deprecated 268 public void setPopulationSize(int size) { 269 setPopulationSizeInternal(size); 270 } 271 272 /** 273 * Modify the population size. 274 * 275 * @param size the new population size. 276 * @throws IllegalArgumentException if <code>size</code> is not positive. 277 */ 278 private void setPopulationSizeInternal(int size) { 279 if (size <= 0) { 280 throw MathRuntimeException.createIllegalArgumentException( 281 LocalizedFormats.NOT_POSITIVE_POPULATION_SIZE, size); 282 } 283 populationSize = size; 284 } 285 286 /** 287 * Modify the sample size. 288 * 289 * @param size the new sample size. 290 * @throws IllegalArgumentException if <code>size</code> is negative. 291 * @deprecated as of 2.1 (class will become immutable in 3.0) 292 */ 293 @Deprecated 294 public void setSampleSize(int size) { 295 setSampleSizeInternal(size); 296 } 297 /** 298 * Modify the sample size. 299 * 300 * @param size the new sample size. 301 * @throws IllegalArgumentException if <code>size</code> is negative. 302 */ 303 private void setSampleSizeInternal(int size) { 304 if (size < 0) { 305 throw MathRuntimeException.createIllegalArgumentException( 306 LocalizedFormats.NOT_POSITIVE_SAMPLE_SIZE, size); 307 } 308 sampleSize = size; 309 } 310 311 /** 312 * For this distribution, X, this method returns P(X ≥ x). 313 * 314 * @param x the value at which the CDF is evaluated. 315 * @return upper tail CDF for this distribution. 316 * @since 1.1 317 */ 318 public double upperCumulativeProbability(int x) { 319 double ret; 320 321 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 322 if (x < domain[0]) { 323 ret = 1.0; 324 } else if (x > domain[1]) { 325 ret = 0.0; 326 } else { 327 ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize); 328 } 329 330 return ret; 331 } 332 333 /** 334 * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This 335 * probability is computed by summing the point probabilities for the values 336 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 337 * 338 * @param x0 the inclusive, lower bound 339 * @param x1 the inclusive, upper bound 340 * @param dx the direction of summation. 1 indicates summing from x0 to x1. 341 * 0 indicates summing from x1 to x0. 342 * @param n the population size. 343 * @param m number of successes in the population. 344 * @param k the sample size. 345 * @return P(x0 ≤ X ≤ x1). 346 */ 347 private double innerCumulativeProbability(int x0, int x1, int dx, int n, 348 int m, int k) { 349 double ret = probability(n, m, k, x0); 350 while (x0 != x1) { 351 x0 += dx; 352 ret += probability(n, m, k, x0); 353 } 354 return ret; 355 } 356 357 /** 358 * Returns the lower bound for the support for the distribution. 359 * 360 * For population size <code>N</code>, 361 * number of successes <code>m</code>, and 362 * sample size <code>n</code>, 363 * the lower bound of the support is 364 * <code>max(0, n + m - N)</code> 365 * 366 * @return lower bound of the support 367 * @since 2.2 368 */ 369 public int getSupportLowerBound() { 370 return FastMath.max(0, 371 getSampleSize() + getNumberOfSuccesses() - getPopulationSize()); 372 } 373 374 /** 375 * Returns the upper bound for the support of the distribution. 376 * 377 * For number of successes <code>m</code> and 378 * sample size <code>n</code>, 379 * the upper bound of the support is 380 * <code>min(m, n)</code> 381 * 382 * @return upper bound of the support 383 * @since 2.2 384 */ 385 public int getSupportUpperBound() { 386 return FastMath.min(getNumberOfSuccesses(), getSampleSize()); 387 } 388 389 /** 390 * Returns the mean. 391 * 392 * For population size <code>N</code>, 393 * number of successes <code>m</code>, and 394 * sample size <code>n</code>, the mean is 395 * <code>n * m / N</code> 396 * 397 * @return the mean 398 * @since 2.2 399 */ 400 protected double getNumericalMean() { 401 return (double)(getSampleSize() * getNumberOfSuccesses()) / (double)getPopulationSize(); 402 } 403 404 /** 405 * Returns the variance. 406 * 407 * For population size <code>N</code>, 408 * number of successes <code>m</code>, and 409 * sample size <code>n</code>, the variance is 410 * <code>[ n * m * (N - n) * (N - m) ] / [ N^2 * (N - 1) ]</code> 411 * 412 * @return the variance 413 * @since 2.2 414 */ 415 public double getNumericalVariance() { 416 final double N = getPopulationSize(); 417 final double m = getNumberOfSuccesses(); 418 final double n = getSampleSize(); 419 return ( n * m * (N - n) * (N - m) ) / ( (N*N * (N - 1)) ); 420 } 421 } 422