1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.math.distribution; 19 20 import java.io.Serializable; 21 22 import org.apache.commons.math.MathRuntimeException; 23 import org.apache.commons.math.exception.util.LocalizedFormats; 24 import org.apache.commons.math.util.FastMath; 25 26 /** 27 * Implementation for the {@link ZipfDistribution}. 28 * 29 * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $ 30 */ 31 public class ZipfDistributionImpl extends AbstractIntegerDistribution 32 implements ZipfDistribution, Serializable { 33 34 /** Serializable version identifier. */ 35 private static final long serialVersionUID = -140627372283420404L; 36 37 /** Number of elements. */ 38 private int numberOfElements; 39 40 /** Exponent parameter of the distribution. */ 41 private double exponent; 42 43 /** 44 * Create a new Zipf distribution with the given number of elements and 45 * exponent. Both values must be positive; otherwise an 46 * <code>IllegalArgumentException</code> is thrown. 47 * 48 * @param numberOfElements the number of elements 49 * @param exponent the exponent 50 * @exception IllegalArgumentException if n ≤ 0 or s ≤ 0.0 51 */ 52 public ZipfDistributionImpl(final int numberOfElements, final double exponent) 53 throws IllegalArgumentException { 54 setNumberOfElementsInternal(numberOfElements); 55 setExponentInternal(exponent); 56 } 57 58 /** 59 * Get the number of elements (e.g. corpus size) for the distribution. 60 * 61 * @return the number of elements 62 */ 63 public int getNumberOfElements() { 64 return numberOfElements; 65 } 66 67 /** 68 * Set the number of elements (e.g. corpus size) for the distribution. 69 * The parameter value must be positive; otherwise an 70 * <code>IllegalArgumentException</code> is thrown. 71 * 72 * @param n the number of elements 73 * @exception IllegalArgumentException if n ≤ 0 74 * @deprecated as of 2.1 (class will become immutable in 3.0) 75 */ 76 @Deprecated 77 public void setNumberOfElements(final int n) { 78 setNumberOfElementsInternal(n); 79 } 80 /** 81 * Set the number of elements (e.g. corpus size) for the distribution. 82 * The parameter value must be positive; otherwise an 83 * <code>IllegalArgumentException</code> is thrown. 84 * 85 * @param n the number of elements 86 * @exception IllegalArgumentException if n ≤ 0 87 */ 88 private void setNumberOfElementsInternal(final int n) 89 throws IllegalArgumentException { 90 if (n <= 0) { 91 throw MathRuntimeException.createIllegalArgumentException( 92 LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0); 93 } 94 this.numberOfElements = n; 95 } 96 97 /** 98 * Get the exponent characterising the distribution. 99 * 100 * @return the exponent 101 */ 102 public double getExponent() { 103 return exponent; 104 } 105 106 /** 107 * Set the exponent characterising the distribution. 108 * The parameter value must be positive; otherwise an 109 * <code>IllegalArgumentException</code> is thrown. 110 * 111 * @param s the exponent 112 * @exception IllegalArgumentException if s ≤ 0.0 113 * @deprecated as of 2.1 (class will become immutable in 3.0) 114 */ 115 @Deprecated 116 public void setExponent(final double s) { 117 setExponentInternal(s); 118 } 119 120 /** 121 * Set the exponent characterising the distribution. 122 * The parameter value must be positive; otherwise an 123 * <code>IllegalArgumentException</code> is thrown. 124 * 125 * @param s the exponent 126 * @exception IllegalArgumentException if s ≤ 0.0 127 */ 128 private void setExponentInternal(final double s) 129 throws IllegalArgumentException { 130 if (s <= 0.0) { 131 throw MathRuntimeException.createIllegalArgumentException( 132 LocalizedFormats.NOT_POSITIVE_EXPONENT, 133 s); 134 } 135 this.exponent = s; 136 } 137 138 /** 139 * The probability mass function P(X = x) for a Zipf distribution. 140 * 141 * @param x the value at which the probability density function is evaluated. 142 * @return the value of the probability mass function at x 143 */ 144 public double probability(final int x) { 145 if (x <= 0 || x > numberOfElements) { 146 return 0.0; 147 } 148 149 return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); 150 151 } 152 153 /** 154 * The probability distribution function P(X <= x) for a Zipf distribution. 155 * 156 * @param x the value at which the PDF is evaluated. 157 * @return Zipf distribution function evaluated at x 158 */ 159 @Override 160 public double cumulativeProbability(final int x) { 161 if (x <= 0) { 162 return 0.0; 163 } else if (x >= numberOfElements) { 164 return 1.0; 165 } 166 167 return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); 168 169 } 170 171 /** 172 * Access the domain value lower bound, based on <code>p</code>, used to 173 * bracket a PDF root. 174 * 175 * @param p the desired probability for the critical value 176 * @return domain value lower bound, i.e. 177 * P(X < <i>lower bound</i>) < <code>p</code> 178 */ 179 @Override 180 protected int getDomainLowerBound(final double p) { 181 return 0; 182 } 183 184 /** 185 * Access the domain value upper bound, based on <code>p</code>, used to 186 * bracket a PDF root. 187 * 188 * @param p the desired probability for the critical value 189 * @return domain value upper bound, i.e. 190 * P(X < <i>upper bound</i>) > <code>p</code> 191 */ 192 @Override 193 protected int getDomainUpperBound(final double p) { 194 return numberOfElements; 195 } 196 197 198 /** 199 * Calculates the Nth generalized harmonic number. See 200 * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic 201 * Series</a>. 202 * 203 * @param n the term in the series to calculate (must be ≥ 1) 204 * @param m the exponent; special case m == 1.0 is the harmonic series 205 * @return the nth generalized harmonic number 206 */ 207 private double generalizedHarmonic(final int n, final double m) { 208 double value = 0; 209 for (int k = n; k > 0; --k) { 210 value += 1.0 / FastMath.pow(k, m); 211 } 212 return value; 213 } 214 215 /** 216 * Returns the lower bound of the support for the distribution. 217 * 218 * The lower bound of the support is always 1 no matter the parameters. 219 * 220 * @return lower bound of the support (always 1) 221 * @since 2.2 222 */ 223 public int getSupportLowerBound() { 224 return 1; 225 } 226 227 /** 228 * Returns the upper bound of the support for the distribution. 229 * 230 * The upper bound of the support is the number of elements 231 * 232 * @return upper bound of the support 233 * @since 2.2 234 */ 235 public int getSupportUpperBound() { 236 return getNumberOfElements(); 237 } 238 239 /** 240 * Returns the mean. 241 * 242 * For number of elements N and exponent s, the mean is 243 * <code>Hs1 / Hs</code> where 244 * <ul> 245 * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> 246 * <li><code>Hs = generalizedHarmonic(N, s)</code></li> 247 * </ul> 248 * 249 * @return the mean 250 * @since 2.2 251 */ 252 protected double getNumericalMean() { 253 final int N = getNumberOfElements(); 254 final double s = getExponent(); 255 256 final double Hs1 = generalizedHarmonic(N, s - 1); 257 final double Hs = generalizedHarmonic(N, s); 258 259 return Hs1 / Hs; 260 } 261 262 /** 263 * Returns the variance. 264 * 265 * For number of elements N and exponent s, the mean is 266 * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where 267 * <ul> 268 * <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li> 269 * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> 270 * <li><code>Hs = generalizedHarmonic(N, s)</code></li> 271 * </ul> 272 * 273 * @return the variance 274 * @since 2.2 275 */ 276 protected double getNumericalVariance() { 277 final int N = getNumberOfElements(); 278 final double s = getExponent(); 279 280 final double Hs2 = generalizedHarmonic(N, s - 2); 281 final double Hs1 = generalizedHarmonic(N, s - 1); 282 final double Hs = generalizedHarmonic(N, s); 283 284 return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); 285 } 286 } 287