Home | History | Annotate | Download | only in distribution
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package org.apache.commons.math.distribution;
     19 
     20 import java.io.Serializable;
     21 
     22 import org.apache.commons.math.MathRuntimeException;
     23 import org.apache.commons.math.exception.util.LocalizedFormats;
     24 import org.apache.commons.math.util.FastMath;
     25 
     26 /**
     27  * Implementation for the {@link ZipfDistribution}.
     28  *
     29  * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
     30  */
     31 public class ZipfDistributionImpl extends AbstractIntegerDistribution
     32     implements ZipfDistribution, Serializable {
     33 
     34     /** Serializable version identifier. */
     35     private static final long serialVersionUID = -140627372283420404L;
     36 
     37     /** Number of elements. */
     38     private int numberOfElements;
     39 
     40     /** Exponent parameter of the distribution. */
     41     private double exponent;
     42 
     43     /**
     44      * Create a new Zipf distribution with the given number of elements and
     45      * exponent. Both values must be positive; otherwise an
     46      * <code>IllegalArgumentException</code> is thrown.
     47      *
     48      * @param numberOfElements the number of elements
     49      * @param exponent the exponent
     50      * @exception IllegalArgumentException if n &le; 0 or s &le; 0.0
     51      */
     52     public ZipfDistributionImpl(final int numberOfElements, final double exponent)
     53         throws IllegalArgumentException {
     54         setNumberOfElementsInternal(numberOfElements);
     55         setExponentInternal(exponent);
     56     }
     57 
     58     /**
     59      * Get the number of elements (e.g. corpus size) for the distribution.
     60      *
     61      * @return the number of elements
     62      */
     63     public int getNumberOfElements() {
     64         return numberOfElements;
     65     }
     66 
     67     /**
     68      * Set the number of elements (e.g. corpus size) for the distribution.
     69      * The parameter value must be positive; otherwise an
     70      * <code>IllegalArgumentException</code> is thrown.
     71      *
     72      * @param n the number of elements
     73      * @exception IllegalArgumentException if n &le; 0
     74      * @deprecated as of 2.1 (class will become immutable in 3.0)
     75      */
     76     @Deprecated
     77     public void setNumberOfElements(final int n) {
     78         setNumberOfElementsInternal(n);
     79     }
     80     /**
     81      * Set the number of elements (e.g. corpus size) for the distribution.
     82      * The parameter value must be positive; otherwise an
     83      * <code>IllegalArgumentException</code> is thrown.
     84      *
     85      * @param n the number of elements
     86      * @exception IllegalArgumentException if n &le; 0
     87      */
     88     private void setNumberOfElementsInternal(final int n)
     89         throws IllegalArgumentException {
     90         if (n <= 0) {
     91             throw MathRuntimeException.createIllegalArgumentException(
     92                     LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0);
     93         }
     94         this.numberOfElements = n;
     95     }
     96 
     97     /**
     98      * Get the exponent characterising the distribution.
     99      *
    100      * @return the exponent
    101      */
    102     public double getExponent() {
    103         return exponent;
    104     }
    105 
    106     /**
    107      * Set the exponent characterising the distribution.
    108      * The parameter value must be positive; otherwise an
    109      * <code>IllegalArgumentException</code> is thrown.
    110      *
    111      * @param s the exponent
    112      * @exception IllegalArgumentException if s &le; 0.0
    113      * @deprecated as of 2.1 (class will become immutable in 3.0)
    114      */
    115     @Deprecated
    116     public void setExponent(final double s) {
    117         setExponentInternal(s);
    118     }
    119 
    120     /**
    121      * Set the exponent characterising the distribution.
    122      * The parameter value must be positive; otherwise an
    123      * <code>IllegalArgumentException</code> is thrown.
    124      *
    125      * @param s the exponent
    126      * @exception IllegalArgumentException if s &le; 0.0
    127      */
    128     private void setExponentInternal(final double s)
    129         throws IllegalArgumentException {
    130         if (s <= 0.0) {
    131             throw MathRuntimeException.createIllegalArgumentException(
    132                     LocalizedFormats.NOT_POSITIVE_EXPONENT,
    133                     s);
    134         }
    135         this.exponent = s;
    136     }
    137 
    138     /**
    139      * The probability mass function P(X = x) for a Zipf distribution.
    140      *
    141      * @param x the value at which the probability density function is evaluated.
    142      * @return the value of the probability mass function at x
    143      */
    144     public double probability(final int x) {
    145         if (x <= 0 || x > numberOfElements) {
    146             return 0.0;
    147         }
    148 
    149         return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
    150 
    151     }
    152 
    153     /**
    154      * The probability distribution function P(X <= x) for a Zipf distribution.
    155      *
    156      * @param x the value at which the PDF is evaluated.
    157      * @return Zipf distribution function evaluated at x
    158      */
    159     @Override
    160     public double cumulativeProbability(final int x) {
    161         if (x <= 0) {
    162             return 0.0;
    163         } else if (x >= numberOfElements) {
    164             return 1.0;
    165         }
    166 
    167         return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
    168 
    169     }
    170 
    171     /**
    172      * Access the domain value lower bound, based on <code>p</code>, used to
    173      * bracket a PDF root.
    174      *
    175      * @param p the desired probability for the critical value
    176      * @return domain value lower bound, i.e.
    177      *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code>
    178      */
    179     @Override
    180     protected int getDomainLowerBound(final double p) {
    181         return 0;
    182     }
    183 
    184     /**
    185      * Access the domain value upper bound, based on <code>p</code>, used to
    186      * bracket a PDF root.
    187      *
    188      * @param p the desired probability for the critical value
    189      * @return domain value upper bound, i.e.
    190      *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code>
    191      */
    192     @Override
    193     protected int getDomainUpperBound(final double p) {
    194         return numberOfElements;
    195     }
    196 
    197 
    198     /**
    199      * Calculates the Nth generalized harmonic number. See
    200      * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
    201      * Series</a>.
    202      *
    203      * @param n the term in the series to calculate (must be &ge; 1)
    204      * @param m the exponent; special case m == 1.0 is the harmonic series
    205      * @return the nth generalized harmonic number
    206      */
    207     private double generalizedHarmonic(final int n, final double m) {
    208         double value = 0;
    209         for (int k = n; k > 0; --k) {
    210             value += 1.0 / FastMath.pow(k, m);
    211         }
    212         return value;
    213     }
    214 
    215     /**
    216      * Returns the lower bound of the support for the distribution.
    217      *
    218      * The lower bound of the support is always 1 no matter the parameters.
    219      *
    220      * @return lower bound of the support (always 1)
    221      * @since 2.2
    222      */
    223     public int getSupportLowerBound() {
    224         return 1;
    225     }
    226 
    227     /**
    228      * Returns the upper bound of the support for the distribution.
    229      *
    230      * The upper bound of the support is the number of elements
    231      *
    232      * @return upper bound of the support
    233      * @since 2.2
    234      */
    235     public int getSupportUpperBound() {
    236         return getNumberOfElements();
    237     }
    238 
    239     /**
    240      * Returns the mean.
    241      *
    242      * For number of elements N and exponent s, the mean is
    243      * <code>Hs1 / Hs</code> where
    244      * <ul>
    245      *  <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
    246      *  <li><code>Hs = generalizedHarmonic(N, s)</code></li>
    247      * </ul>
    248      *
    249      * @return the mean
    250      * @since 2.2
    251      */
    252     protected double getNumericalMean() {
    253         final int N = getNumberOfElements();
    254         final double s = getExponent();
    255 
    256         final double Hs1 = generalizedHarmonic(N, s - 1);
    257         final double Hs = generalizedHarmonic(N, s);
    258 
    259         return Hs1 / Hs;
    260     }
    261 
    262     /**
    263      * Returns the variance.
    264      *
    265      * For number of elements N and exponent s, the mean is
    266      * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where
    267      * <ul>
    268      *  <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li>
    269      *  <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
    270      *  <li><code>Hs = generalizedHarmonic(N, s)</code></li>
    271      * </ul>
    272      *
    273      * @return the variance
    274      * @since 2.2
    275      */
    276     protected double getNumericalVariance() {
    277         final int N = getNumberOfElements();
    278         final double s = getExponent();
    279 
    280         final double Hs2 = generalizedHarmonic(N, s - 2);
    281         final double Hs1 = generalizedHarmonic(N, s - 1);
    282         final double Hs = generalizedHarmonic(N, s);
    283 
    284         return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
    285     }
    286 }
    287