Home | History | Annotate | Download | only in descriptive
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package org.apache.commons.math.stat.descriptive;
     19 
     20 import java.io.Serializable;
     21 import java.util.Collection;
     22 import java.util.Iterator;
     23 
     24 /**
     25  * <p>
     26  * An aggregator for {@code SummaryStatistics} from several data sets or
     27  * data set partitions.  In its simplest usage mode, the client creates an
     28  * instance via the zero-argument constructor, then uses
     29  * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
     30  * for each individual data set / partition.  The per-set statistics objects
     31  * are used as normal, and at any time the aggregate statistics for all the
     32  * contributors can be obtained from this object.
     33  * </p><p>
     34  * Clients with specialized requirements can use alternative constructors to
     35  * control the statistics implementations and initial values used by the
     36  * contributing and the internal aggregate {@code SummaryStatistics} objects.
     37  * </p><p>
     38  * A static {@link #aggregate(Collection)} method is also included that computes
     39  * aggregate statistics directly from a Collection of SummaryStatistics instances.
     40  * </p><p>
     41  * When {@link #createContributingStatistics()} is used to create SummaryStatistics
     42  * instances to be aggregated concurrently, the created instances'
     43  * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
     44  * instance maintained by this class.  In multithreaded environments, if the functionality
     45  * provided by {@link #aggregate(Collection)} is adequate, that method should be used
     46  * to avoid unecessary computation and synchronization delays.</p>
     47  *
     48  * @since 2.0
     49  * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $
     50  *
     51  */
     52 public class AggregateSummaryStatistics implements StatisticalSummary,
     53         Serializable {
     54 
     55 
     56     /** Serializable version identifier */
     57     private static final long serialVersionUID = -8207112444016386906L;
     58 
     59     /**
     60      * A SummaryStatistics serving as a prototype for creating SummaryStatistics
     61      * contributing to this aggregate
     62      */
     63     private final SummaryStatistics statisticsPrototype;
     64 
     65     /**
     66      * The SummaryStatistics in which aggregate statistics are accumulated.
     67      */
     68     private final SummaryStatistics statistics;
     69 
     70     /**
     71      * Initializes a new AggregateSummaryStatistics with default statistics
     72      * implementations.
     73      *
     74      */
     75     public AggregateSummaryStatistics() {
     76         this(new SummaryStatistics());
     77     }
     78 
     79     /**
     80      * Initializes a new AggregateSummaryStatistics with the specified statistics
     81      * object as a prototype for contributing statistics and for the internal
     82      * aggregate statistics.  This provides for customized statistics implementations
     83      * to be used by contributing and aggregate statistics.
     84      *
     85      * @param prototypeStatistics a {@code SummaryStatistics} serving as a
     86      *      prototype both for the internal aggregate statistics and for
     87      *      contributing statistics obtained via the
     88      *      {@code createContributingStatistics()} method.  Being a prototype
     89      *      means that other objects are initialized by copying this object's state.
     90      *      If {@code null}, a new, default statistics object is used.  Any statistic
     91      *      values in the prototype are propagated to contributing statistics
     92      *      objects and (once) into these aggregate statistics.
     93      * @see #createContributingStatistics()
     94      */
     95     public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) {
     96         this(prototypeStatistics,
     97              prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
     98     }
     99 
    100     /**
    101      * Initializes a new AggregateSummaryStatistics with the specified statistics
    102      * object as a prototype for contributing statistics and for the internal
    103      * aggregate statistics.  This provides for different statistics implementations
    104      * to be used by contributing and aggregate statistics and for an initial
    105      * state to be supplied for the aggregate statistics.
    106      *
    107      * @param prototypeStatistics a {@code SummaryStatistics} serving as a
    108      *      prototype both for the internal aggregate statistics and for
    109      *      contributing statistics obtained via the
    110      *      {@code createContributingStatistics()} method.  Being a prototype
    111      *      means that other objects are initialized by copying this object's state.
    112      *      If {@code null}, a new, default statistics object is used.  Any statistic
    113      *      values in the prototype are propagated to contributing statistics
    114      *      objects, but not into these aggregate statistics.
    115      * @param initialStatistics a {@code SummaryStatistics} to serve as the
    116      *      internal aggregate statistics object.  If {@code null}, a new, default
    117      *      statistics object is used.
    118      * @see #createContributingStatistics()
    119      */
    120     public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
    121                                       SummaryStatistics initialStatistics) {
    122         this.statisticsPrototype =
    123             (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
    124         this.statistics =
    125             (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
    126     }
    127 
    128     /**
    129      * {@inheritDoc}.  This version returns the maximum over all the aggregated
    130      * data.
    131      *
    132      * @see StatisticalSummary#getMax()
    133      */
    134     public double getMax() {
    135         synchronized (statistics) {
    136             return statistics.getMax();
    137         }
    138     }
    139 
    140     /**
    141      * {@inheritDoc}.  This version returns the mean of all the aggregated data.
    142      *
    143      * @see StatisticalSummary#getMean()
    144      */
    145     public double getMean() {
    146         synchronized (statistics) {
    147             return statistics.getMean();
    148         }
    149     }
    150 
    151     /**
    152      * {@inheritDoc}.  This version returns the minimum over all the aggregated
    153      * data.
    154      *
    155      * @see StatisticalSummary#getMin()
    156      */
    157     public double getMin() {
    158         synchronized (statistics) {
    159             return statistics.getMin();
    160         }
    161     }
    162 
    163     /**
    164      * {@inheritDoc}.  This version returns a count of all the aggregated data.
    165      *
    166      * @see StatisticalSummary#getN()
    167      */
    168     public long getN() {
    169         synchronized (statistics) {
    170             return statistics.getN();
    171         }
    172     }
    173 
    174     /**
    175      * {@inheritDoc}.  This version returns the standard deviation of all the
    176      * aggregated data.
    177      *
    178      * @see StatisticalSummary#getStandardDeviation()
    179      */
    180     public double getStandardDeviation() {
    181         synchronized (statistics) {
    182             return statistics.getStandardDeviation();
    183         }
    184     }
    185 
    186     /**
    187      * {@inheritDoc}.  This version returns a sum of all the aggregated data.
    188      *
    189      * @see StatisticalSummary#getSum()
    190      */
    191     public double getSum() {
    192         synchronized (statistics) {
    193             return statistics.getSum();
    194         }
    195     }
    196 
    197     /**
    198      * {@inheritDoc}.  This version returns the variance of all the aggregated
    199      * data.
    200      *
    201      * @see StatisticalSummary#getVariance()
    202      */
    203     public double getVariance() {
    204         synchronized (statistics) {
    205             return statistics.getVariance();
    206         }
    207     }
    208 
    209     /**
    210      * Returns the sum of the logs of all the aggregated data.
    211      *
    212      * @return the sum of logs
    213      * @see SummaryStatistics#getSumOfLogs()
    214      */
    215     public double getSumOfLogs() {
    216         synchronized (statistics) {
    217             return statistics.getSumOfLogs();
    218         }
    219     }
    220 
    221     /**
    222      * Returns the geometric mean of all the aggregated data.
    223      *
    224      * @return the geometric mean
    225      * @see SummaryStatistics#getGeometricMean()
    226      */
    227     public double getGeometricMean() {
    228         synchronized (statistics) {
    229             return statistics.getGeometricMean();
    230         }
    231     }
    232 
    233     /**
    234      * Returns the sum of the squares of all the aggregated data.
    235      *
    236      * @return The sum of squares
    237      * @see SummaryStatistics#getSumsq()
    238      */
    239     public double getSumsq() {
    240         synchronized (statistics) {
    241             return statistics.getSumsq();
    242         }
    243     }
    244 
    245     /**
    246      * Returns a statistic related to the Second Central Moment.  Specifically,
    247      * what is returned is the sum of squared deviations from the sample mean
    248      * among the all of the aggregated data.
    249      *
    250      * @return second central moment statistic
    251      * @see SummaryStatistics#getSecondMoment()
    252      */
    253     public double getSecondMoment() {
    254         synchronized (statistics) {
    255             return statistics.getSecondMoment();
    256         }
    257     }
    258 
    259     /**
    260      * Return a {@link StatisticalSummaryValues} instance reporting current
    261      * aggregate statistics.
    262      *
    263      * @return Current values of aggregate statistics
    264      */
    265     public StatisticalSummary getSummary() {
    266         synchronized (statistics) {
    267             return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
    268                     getMax(), getMin(), getSum());
    269         }
    270     }
    271 
    272     /**
    273      * Creates and returns a {@code SummaryStatistics} whose data will be
    274      * aggregated with those of this {@code AggregateSummaryStatistics}.
    275      *
    276      * @return a {@code SummaryStatistics} whose data will be aggregated with
    277      *      those of this {@code AggregateSummaryStatistics}.  The initial state
    278      *      is a copy of the configured prototype statistics.
    279      */
    280     public SummaryStatistics createContributingStatistics() {
    281         SummaryStatistics contributingStatistics
    282                 = new AggregatingSummaryStatistics(statistics);
    283 
    284         SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
    285 
    286         return contributingStatistics;
    287     }
    288 
    289     /**
    290      * Computes aggregate summary statistics. This method can be used to combine statistics
    291      * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
    292      * should contain the same values that would have been obtained by computing a single
    293      * StatisticalSummary over the combined dataset.
    294      * <p>
    295      * Returns null if the collection is empty or null.
    296      * </p>
    297      *
    298      * @param statistics collection of SummaryStatistics to aggregate
    299      * @return summary statistics for the combined dataset
    300      */
    301     public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
    302         if (statistics == null) {
    303             return null;
    304         }
    305         Iterator<SummaryStatistics> iterator = statistics.iterator();
    306         if (!iterator.hasNext()) {
    307             return null;
    308         }
    309         SummaryStatistics current = iterator.next();
    310         long n = current.getN();
    311         double min = current.getMin();
    312         double sum = current.getSum();
    313         double max = current.getMax();
    314         double m2 = current.getSecondMoment();
    315         double mean = current.getMean();
    316         while (iterator.hasNext()) {
    317             current = iterator.next();
    318             if (current.getMin() < min || Double.isNaN(min)) {
    319                 min = current.getMin();
    320             }
    321             if (current.getMax() > max || Double.isNaN(max)) {
    322                 max = current.getMax();
    323             }
    324             sum += current.getSum();
    325             final double oldN = n;
    326             final double curN = current.getN();
    327             n += curN;
    328             final double meanDiff = current.getMean() - mean;
    329             mean = sum / n;
    330             m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
    331         }
    332         final double variance;
    333         if (n == 0) {
    334             variance = Double.NaN;
    335         } else if (n == 1) {
    336             variance = 0d;
    337         } else {
    338             variance = m2 / (n - 1);
    339         }
    340         return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
    341     }
    342 
    343     /**
    344      * A SummaryStatistics that also forwards all values added to it to a second
    345      * {@code SummaryStatistics} for aggregation.
    346      *
    347      * @since 2.0
    348      */
    349     private static class AggregatingSummaryStatistics extends SummaryStatistics {
    350 
    351         /**
    352          * The serialization version of this class
    353          */
    354         private static final long serialVersionUID = 1L;
    355 
    356         /**
    357          * An additional SummaryStatistics into which values added to these
    358          * statistics (and possibly others) are aggregated
    359          */
    360         private final SummaryStatistics aggregateStatistics;
    361 
    362         /**
    363          * Initializes a new AggregatingSummaryStatistics with the specified
    364          * aggregate statistics object
    365          *
    366          * @param aggregateStatistics a {@code SummaryStatistics} into which
    367          *      values added to this statistics object should be aggregated
    368          */
    369         public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
    370             this.aggregateStatistics = aggregateStatistics;
    371         }
    372 
    373         /**
    374          * {@inheritDoc}.  This version adds the provided value to the configured
    375          * aggregate after adding it to these statistics.
    376          *
    377          * @see SummaryStatistics#addValue(double)
    378          */
    379         @Override
    380         public void addValue(double value) {
    381             super.addValue(value);
    382             synchronized (aggregateStatistics) {
    383                 aggregateStatistics.addValue(value);
    384             }
    385         }
    386 
    387         /**
    388          * Returns true iff <code>object</code> is a
    389          * <code>SummaryStatistics</code> instance and all statistics have the
    390          * same values as this.
    391          * @param object the object to test equality against.
    392          * @return true if object equals this
    393          */
    394         @Override
    395         public boolean equals(Object object) {
    396             if (object == this) {
    397                 return true;
    398             }
    399             if (object instanceof AggregatingSummaryStatistics == false) {
    400                 return false;
    401             }
    402             AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
    403             return super.equals(stat) &&
    404                    aggregateStatistics.equals(stat.aggregateStatistics);
    405         }
    406 
    407         /**
    408          * Returns hash code based on values of statistics
    409          * @return hash code
    410          */
    411         @Override
    412         public int hashCode() {
    413             return 123 + super.hashCode() + aggregateStatistics.hashCode();
    414         }
    415     }
    416 }
    417