Home | History | Annotate | Download | only in stat
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 package org.apache.commons.math.stat;
     18 
     19 import java.io.Serializable;
     20 import java.text.NumberFormat;
     21 import java.util.Iterator;
     22 import java.util.Comparator;
     23 import java.util.TreeMap;
     24 
     25 import org.apache.commons.math.MathRuntimeException;
     26 import org.apache.commons.math.exception.util.LocalizedFormats;
     27 
     28 /**
     29  * Maintains a frequency distribution.
     30  * <p>
     31  * Accepts int, long, char or Comparable values.  New values added must be
     32  * comparable to those that have been added, otherwise the add method will
     33  * throw an IllegalArgumentException.</p>
     34  * <p>
     35  * Integer values (int, long, Integer, Long) are not distinguished by type --
     36  * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
     37  * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
     38  * <p>
     39  * char values are converted by <code>addValue</code> to Character instances.
     40  * As such, these values are not comparable to integral values, so attempts
     41  * to combine integral types with chars in a frequency distribution will fail.
     42  * </p>
     43  * <p>
     44  * The values are ordered using the default (natural order), unless a
     45  * <code>Comparator</code> is supplied in the constructor.</p>
     46  *
     47  * @version $Revision: 1054186 $ $Date: 2011-01-01 03:28:46 +0100 (sam. 01 janv. 2011) $
     48  */
     49 public class Frequency implements Serializable {
     50 
     51     /** Serializable version identifier */
     52     private static final long serialVersionUID = -3845586908418844111L;
     53 
     54     /** underlying collection */
     55     private final TreeMap<Comparable<?>, Long> freqTable;
     56 
     57     /**
     58      * Default constructor.
     59      */
     60     public Frequency() {
     61         freqTable = new TreeMap<Comparable<?>, Long>();
     62     }
     63 
     64     /**
     65      * Constructor allowing values Comparator to be specified.
     66      *
     67      * @param comparator Comparator used to order values
     68      */
     69     @SuppressWarnings("unchecked") // TODO is the cast OK?
     70     public Frequency(Comparator<?> comparator) {
     71         freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
     72     }
     73 
     74     /**
     75      * Return a string representation of this frequency
     76      * distribution.
     77      *
     78      * @return a string representation.
     79      */
     80     @Override
     81     public String toString() {
     82         NumberFormat nf = NumberFormat.getPercentInstance();
     83         StringBuilder outBuffer = new StringBuilder();
     84         outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
     85         Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
     86         while (iter.hasNext()) {
     87             Comparable<?> value = iter.next();
     88             outBuffer.append(value);
     89             outBuffer.append('\t');
     90             outBuffer.append(getCount(value));
     91             outBuffer.append('\t');
     92             outBuffer.append(nf.format(getPct(value)));
     93             outBuffer.append('\t');
     94             outBuffer.append(nf.format(getCumPct(value)));
     95             outBuffer.append('\n');
     96         }
     97         return outBuffer.toString();
     98     }
     99 
    100     /**
    101      * Adds 1 to the frequency count for v.
    102      * <p>
    103      * If other objects have already been added to this Frequency, v must
    104      * be comparable to those that have already been added.
    105      * </p>
    106      *
    107      * @param v the value to add.
    108      * @throws IllegalArgumentException if <code>v</code> is not Comparable,
    109      *         or is not comparable with previous entries
    110      * @deprecated use {@link #addValue(Comparable)} instead
    111      */
    112     @Deprecated
    113     public void addValue(Object v) {
    114         if (v instanceof Comparable<?>){
    115             addValue((Comparable<?>) v);
    116         } else {
    117             throw MathRuntimeException.createIllegalArgumentException(
    118                   LocalizedFormats.CLASS_DOESNT_IMPLEMENT_COMPARABLE,
    119                   v.getClass().getName());
    120         }
    121     }
    122 
    123     /**
    124      * Adds 1 to the frequency count for v.
    125      * <p>
    126      * If other objects have already been added to this Frequency, v must
    127      * be comparable to those that have already been added.
    128      * </p>
    129      *
    130      * @param v the value to add.
    131      * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
    132      */
    133     public void addValue(Comparable<?> v){
    134         Comparable<?> obj = v;
    135         if (v instanceof Integer) {
    136            obj = Long.valueOf(((Integer) v).longValue());
    137         }
    138         try {
    139             Long count = freqTable.get(obj);
    140             if (count == null) {
    141                 freqTable.put(obj, Long.valueOf(1));
    142             } else {
    143                 freqTable.put(obj, Long.valueOf(count.longValue() + 1));
    144             }
    145         } catch (ClassCastException ex) {
    146             //TreeMap will throw ClassCastException if v is not comparable
    147             throw MathRuntimeException.createIllegalArgumentException(
    148                   LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
    149                   v.getClass().getName());
    150         }
    151     }
    152 
    153     /**
    154      * Adds 1 to the frequency count for v.
    155      *
    156      * @param v the value to add.
    157      */
    158     public void addValue(int v) {
    159         addValue(Long.valueOf(v));
    160     }
    161 
    162     /**
    163      * Adds 1 to the frequency count for v.
    164      *
    165      * @param v the value to add.
    166      * @deprecated to be removed in math 3.0
    167      */
    168     @Deprecated
    169     public void addValue(Integer v) {
    170         addValue(Long.valueOf(v.longValue()));
    171     }
    172 
    173     /**
    174      * Adds 1 to the frequency count for v.
    175      *
    176      * @param v the value to add.
    177      */
    178     public void addValue(long v) {
    179         addValue(Long.valueOf(v));
    180     }
    181 
    182     /**
    183      * Adds 1 to the frequency count for v.
    184      *
    185      * @param v the value to add.
    186      */
    187     public void addValue(char v) {
    188         addValue(Character.valueOf(v));
    189     }
    190 
    191     /** Clears the frequency table */
    192     public void clear() {
    193         freqTable.clear();
    194     }
    195 
    196     /**
    197      * Returns an Iterator over the set of values that have been added.
    198      * <p>
    199      * If added values are integral (i.e., integers, longs, Integers, or Longs),
    200      * they are converted to Longs when they are added, so the objects returned
    201      * by the Iterator will in this case be Longs.</p>
    202      *
    203      * @return values Iterator
    204      */
    205     public Iterator<Comparable<?>> valuesIterator() {
    206         return freqTable.keySet().iterator();
    207     }
    208 
    209     //-------------------------------------------------------------------------
    210 
    211     /**
    212      * Returns the sum of all frequencies.
    213      *
    214      * @return the total frequency count.
    215      */
    216     public long getSumFreq() {
    217         long result = 0;
    218         Iterator<Long> iterator = freqTable.values().iterator();
    219         while (iterator.hasNext())  {
    220             result += iterator.next().longValue();
    221         }
    222         return result;
    223     }
    224 
    225     /**
    226      * Returns the number of values = v.
    227      * Returns 0 if the value is not comparable.
    228      *
    229      * @param v the value to lookup.
    230      * @return the frequency of v.
    231      * @deprecated replaced by {@link #getCount(Comparable)} as of 2.0
    232      */
    233     @Deprecated
    234     public long getCount(Object v) {
    235         return getCount((Comparable<?>) v);
    236     }
    237 
    238     /**
    239      * Returns the number of values = v.
    240      * Returns 0 if the value is not comparable.
    241      *
    242      * @param v the value to lookup.
    243      * @return the frequency of v.
    244      */
    245     public long getCount(Comparable<?> v) {
    246         if (v instanceof Integer) {
    247             return getCount(((Integer) v).longValue());
    248         }
    249         long result = 0;
    250         try {
    251             Long count =  freqTable.get(v);
    252             if (count != null) {
    253                 result = count.longValue();
    254             }
    255         } catch (ClassCastException ex) {
    256             // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
    257         }
    258         return result;
    259     }
    260 
    261     /**
    262      * Returns the number of values = v.
    263      *
    264      * @param v the value to lookup.
    265      * @return the frequency of v.
    266      */
    267     public long getCount(int v) {
    268         return getCount(Long.valueOf(v));
    269     }
    270 
    271     /**
    272      * Returns the number of values = v.
    273      *
    274      * @param v the value to lookup.
    275      * @return the frequency of v.
    276      */
    277     public long getCount(long v) {
    278         return getCount(Long.valueOf(v));
    279     }
    280 
    281     /**
    282      * Returns the number of values = v.
    283      *
    284      * @param v the value to lookup.
    285      * @return the frequency of v.
    286      */
    287     public long getCount(char v) {
    288         return getCount(Character.valueOf(v));
    289     }
    290 
    291     /**
    292      * Returns the number of values in the frequency table.
    293      *
    294      * @return the number of unique values that have been added to the frequency table.
    295      * @see #valuesIterator()
    296      */
    297     public int getUniqueCount(){
    298         return freqTable.keySet().size();
    299     }
    300 
    301     //-------------------------------------------------------------
    302 
    303     /**
    304       * Returns the percentage of values that are equal to v
    305      * (as a proportion between 0 and 1).
    306      * <p>
    307      * Returns <code>Double.NaN</code> if no values have been added.</p>
    308      *
    309      * @param v the value to lookup
    310      * @return the proportion of values equal to v
    311      * @deprecated replaced by {@link #getPct(Comparable)} as of 2.0
    312      */
    313     @Deprecated
    314     public double getPct(Object v) {
    315         return getPct((Comparable<?>) v);
    316     }
    317 
    318     /**
    319      * Returns the percentage of values that are equal to v
    320      * (as a proportion between 0 and 1).
    321      * <p>
    322      * Returns <code>Double.NaN</code> if no values have been added.</p>
    323      *
    324      * @param v the value to lookup
    325      * @return the proportion of values equal to v
    326      */
    327     public double getPct(Comparable<?> v) {
    328         final long sumFreq = getSumFreq();
    329         if (sumFreq == 0) {
    330             return Double.NaN;
    331         }
    332         return (double) getCount(v) / (double) sumFreq;
    333     }
    334 
    335     /**
    336      * Returns the percentage of values that are equal to v
    337      * (as a proportion between 0 and 1).
    338      *
    339      * @param v the value to lookup
    340      * @return the proportion of values equal to v
    341      */
    342     public double getPct(int v) {
    343         return getPct(Long.valueOf(v));
    344     }
    345 
    346     /**
    347      * Returns the percentage of values that are equal to v
    348      * (as a proportion between 0 and 1).
    349      *
    350      * @param v the value to lookup
    351      * @return the proportion of values equal to v
    352      */
    353     public double getPct(long v) {
    354         return getPct(Long.valueOf(v));
    355     }
    356 
    357     /**
    358      * Returns the percentage of values that are equal to v
    359      * (as a proportion between 0 and 1).
    360      *
    361      * @param v the value to lookup
    362      * @return the proportion of values equal to v
    363      */
    364     public double getPct(char v) {
    365         return getPct(Character.valueOf(v));
    366     }
    367 
    368     //-----------------------------------------------------------------------------------------
    369 
    370     /**
    371      * Returns the cumulative frequency of values less than or equal to v.
    372      * <p>
    373      * Returns 0 if v is not comparable to the values set.</p>
    374      *
    375      * @param v the value to lookup.
    376      * @return the proportion of values equal to v
    377      * @deprecated replaced by {@link #getCumFreq(Comparable)} as of 2.0
    378      */
    379     @Deprecated
    380     public long getCumFreq(Object v) {
    381         return getCumFreq((Comparable<?>) v);
    382     }
    383 
    384     /**
    385      * Returns the cumulative frequency of values less than or equal to v.
    386      * <p>
    387      * Returns 0 if v is not comparable to the values set.</p>
    388      *
    389      * @param v the value to lookup.
    390      * @return the proportion of values equal to v
    391      */
    392     public long getCumFreq(Comparable<?> v) {
    393         if (getSumFreq() == 0) {
    394             return 0;
    395         }
    396         if (v instanceof Integer) {
    397             return getCumFreq(((Integer) v).longValue());
    398         }
    399         @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?>
    400         Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
    401         if (c == null) {
    402             c = new NaturalComparator();
    403         }
    404         long result = 0;
    405 
    406         try {
    407             Long value = freqTable.get(v);
    408             if (value != null) {
    409                 result = value.longValue();
    410             }
    411         } catch (ClassCastException ex) {
    412             return result;   // v is not comparable
    413         }
    414 
    415         if (c.compare(v, freqTable.firstKey()) < 0) {
    416             return 0;  // v is comparable, but less than first value
    417         }
    418 
    419         if (c.compare(v, freqTable.lastKey()) >= 0) {
    420             return getSumFreq();    // v is comparable, but greater than the last value
    421         }
    422 
    423         Iterator<Comparable<?>> values = valuesIterator();
    424         while (values.hasNext()) {
    425             Comparable<?> nextValue = values.next();
    426             if (c.compare(v, nextValue) > 0) {
    427                 result += getCount(nextValue);
    428             } else {
    429                 return result;
    430             }
    431         }
    432         return result;
    433     }
    434 
    435      /**
    436      * Returns the cumulative frequency of values less than or equal to v.
    437      * <p>
    438      * Returns 0 if v is not comparable to the values set.</p>
    439      *
    440      * @param v the value to lookup
    441      * @return the proportion of values equal to v
    442      */
    443     public long getCumFreq(int v) {
    444         return getCumFreq(Long.valueOf(v));
    445     }
    446 
    447      /**
    448      * Returns the cumulative frequency of values less than or equal to v.
    449      * <p>
    450      * Returns 0 if v is not comparable to the values set.</p>
    451      *
    452      * @param v the value to lookup
    453      * @return the proportion of values equal to v
    454      */
    455     public long getCumFreq(long v) {
    456         return getCumFreq(Long.valueOf(v));
    457     }
    458 
    459     /**
    460      * Returns the cumulative frequency of values less than or equal to v.
    461      * <p>
    462      * Returns 0 if v is not comparable to the values set.</p>
    463      *
    464      * @param v the value to lookup
    465      * @return the proportion of values equal to v
    466      */
    467     public long getCumFreq(char v) {
    468         return getCumFreq(Character.valueOf(v));
    469     }
    470 
    471     //----------------------------------------------------------------------------------------------
    472 
    473     /**
    474      * Returns the cumulative percentage of values less than or equal to v
    475      * (as a proportion between 0 and 1).
    476      * <p>
    477      * Returns <code>Double.NaN</code> if no values have been added.
    478      * Returns 0 if at least one value has been added, but v is not comparable
    479      * to the values set.</p>
    480      *
    481      * @param v the value to lookup
    482      * @return the proportion of values less than or equal to v
    483      * @deprecated replaced by {@link #getCumPct(Comparable)} as of 2.0
    484      */
    485     @Deprecated
    486     public double getCumPct(Object v) {
    487         return getCumPct((Comparable<?>) v);
    488 
    489     }
    490 
    491     /**
    492      * Returns the cumulative percentage of values less than or equal to v
    493      * (as a proportion between 0 and 1).
    494      * <p>
    495      * Returns <code>Double.NaN</code> if no values have been added.
    496      * Returns 0 if at least one value has been added, but v is not comparable
    497      * to the values set.</p>
    498      *
    499      * @param v the value to lookup
    500      * @return the proportion of values less than or equal to v
    501      */
    502     public double getCumPct(Comparable<?> v) {
    503         final long sumFreq = getSumFreq();
    504         if (sumFreq == 0) {
    505             return Double.NaN;
    506         }
    507         return (double) getCumFreq(v) / (double) sumFreq;
    508     }
    509 
    510     /**
    511      * Returns the cumulative percentage of values less than or equal to v
    512      * (as a proportion between 0 and 1).
    513      * <p>
    514      * Returns 0 if v is not comparable to the values set.</p>
    515      *
    516      * @param v the value to lookup
    517      * @return the proportion of values less than or equal to v
    518      */
    519     public double getCumPct(int v) {
    520         return getCumPct(Long.valueOf(v));
    521     }
    522 
    523     /**
    524      * Returns the cumulative percentage of values less than or equal to v
    525      * (as a proportion between 0 and 1).
    526      * <p>
    527      * Returns 0 if v is not comparable to the values set.</p>
    528      *
    529      * @param v the value to lookup
    530      * @return the proportion of values less than or equal to v
    531      */
    532     public double getCumPct(long v) {
    533         return getCumPct(Long.valueOf(v));
    534     }
    535 
    536     /**
    537      * Returns the cumulative percentage of values less than or equal to v
    538      * (as a proportion between 0 and 1).
    539      * <p>
    540      * Returns 0 if v is not comparable to the values set.</p>
    541      *
    542      * @param v the value to lookup
    543      * @return the proportion of values less than or equal to v
    544      */
    545     public double getCumPct(char v) {
    546         return getCumPct(Character.valueOf(v));
    547     }
    548 
    549     /**
    550      * A Comparator that compares comparable objects using the
    551      * natural order.  Copied from Commons Collections ComparableComparator.
    552      */
    553     private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
    554 
    555         /** Serializable version identifier */
    556         private static final long serialVersionUID = -3852193713161395148L;
    557 
    558         /**
    559          * Compare the two {@link Comparable Comparable} arguments.
    560          * This method is equivalent to:
    561          * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
    562          *
    563          * @param  o1 the first object
    564          * @param  o2 the second object
    565          * @return  result of comparison
    566          * @throws NullPointerException when <i>o1</i> is <code>null</code>,
    567          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
    568          * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
    569          *         or when <code>((Comparable)o1).compareTo(o2)</code> does
    570          */
    571         @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
    572         public int compare(Comparable<T> o1, Comparable<T> o2) {
    573             return o1.compareTo((T) o2);
    574         }
    575     }
    576 
    577     /** {@inheritDoc} */
    578     @Override
    579     public int hashCode() {
    580         final int prime = 31;
    581         int result = 1;
    582         result = prime * result +
    583                  ((freqTable == null) ? 0 : freqTable.hashCode());
    584         return result;
    585     }
    586 
    587     /** {@inheritDoc} */
    588     @Override
    589     public boolean equals(Object obj) {
    590         if (this == obj)
    591             return true;
    592         if (!(obj instanceof Frequency))
    593             return false;
    594         Frequency other = (Frequency) obj;
    595         if (freqTable == null) {
    596             if (other.freqTable != null)
    597                 return false;
    598         } else if (!freqTable.equals(other.freqTable))
    599             return false;
    600         return true;
    601     }
    602 
    603 }
    604