Home | History | Annotate | Download | only in ranking
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package org.apache.commons.math.stat.ranking;
     19 
     20 import java.util.ArrayList;
     21 import java.util.Arrays;
     22 import java.util.Iterator;
     23 import java.util.List;
     24 
     25 import org.apache.commons.math.exception.MathInternalError;
     26 import org.apache.commons.math.random.RandomData;
     27 import org.apache.commons.math.random.RandomDataImpl;
     28 import org.apache.commons.math.random.RandomGenerator;
     29 import org.apache.commons.math.util.FastMath;
     30 
     31 
     32 /**
     33  * <p> Ranking based on the natural ordering on doubles.</p>
     34  * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
     35  * are handled using the selected {@link TiesStrategy}.
     36  * Configuration settings are supplied in optional constructor arguments.
     37  * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
     38  * respectively. When using {@link TiesStrategy#RANDOM}, a
     39  * {@link RandomGenerator} may be supplied as a constructor argument.</p>
     40  * <p>Examples:
     41  * <table border="1" cellpadding="3">
     42  * <tr><th colspan="3">
     43  * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
     44  * </th></tr>
     45  * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
     46  * <th><code>rank(data)</code></th>
     47  * <tr>
     48  * <td>default (NaNs maximal)</td>
     49  * <td>default (ties averaged)</td>
     50  * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
     51  * <tr>
     52  * <td>default (NaNs maximal)</td>
     53  * <td>MINIMUM</td>
     54  * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
     55  * <tr>
     56  * <td>MINIMAL</td>
     57  * <td>default (ties averaged)</td>
     58  * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
     59  * <tr>
     60  * <td>REMOVED</td>
     61  * <td>SEQUENTIAL</td>
     62  * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
     63  * <tr>
     64  * <td>MINIMAL</td>
     65  * <td>MAXIMUM</td>
     66  * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
     67  *
     68  * @since 2.0
     69  * @version $Revision: 1061496 $ $Date: 2011-01-20 21:32:16 +0100 (jeu. 20 janv. 2011) $
     70  */
     71 public class NaturalRanking implements RankingAlgorithm {
     72 
     73     /** default NaN strategy */
     74     public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
     75 
     76     /** default ties strategy */
     77     public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
     78 
     79     /** NaN strategy - defaults to NaNs maximal */
     80     private final NaNStrategy nanStrategy;
     81 
     82     /** Ties strategy - defaults to ties averaged */
     83     private final TiesStrategy tiesStrategy;
     84 
     85     /** Source of random data - used only when ties strategy is RANDOM */
     86     private final RandomData randomData;
     87 
     88     /**
     89      * Create a NaturalRanking with default strategies for handling ties and NaNs.
     90      */
     91     public NaturalRanking() {
     92         super();
     93         tiesStrategy = DEFAULT_TIES_STRATEGY;
     94         nanStrategy = DEFAULT_NAN_STRATEGY;
     95         randomData = null;
     96     }
     97 
     98     /**
     99      * Create a NaturalRanking with the given TiesStrategy.
    100      *
    101      * @param tiesStrategy the TiesStrategy to use
    102      */
    103     public NaturalRanking(TiesStrategy tiesStrategy) {
    104         super();
    105         this.tiesStrategy = tiesStrategy;
    106         nanStrategy = DEFAULT_NAN_STRATEGY;
    107         randomData = new RandomDataImpl();
    108     }
    109 
    110     /**
    111      * Create a NaturalRanking with the given NaNStrategy.
    112      *
    113      * @param nanStrategy the NaNStrategy to use
    114      */
    115     public NaturalRanking(NaNStrategy nanStrategy) {
    116         super();
    117         this.nanStrategy = nanStrategy;
    118         tiesStrategy = DEFAULT_TIES_STRATEGY;
    119         randomData = null;
    120     }
    121 
    122     /**
    123      * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
    124      *
    125      * @param nanStrategy NaNStrategy to use
    126      * @param tiesStrategy TiesStrategy to use
    127      */
    128     public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
    129         super();
    130         this.nanStrategy = nanStrategy;
    131         this.tiesStrategy = tiesStrategy;
    132         randomData = new RandomDataImpl();
    133     }
    134 
    135     /**
    136      * Create a NaturalRanking with TiesStrategy.RANDOM and the given
    137      * RandomGenerator as the source of random data.
    138      *
    139      * @param randomGenerator source of random data
    140      */
    141     public NaturalRanking(RandomGenerator randomGenerator) {
    142         super();
    143         this.tiesStrategy = TiesStrategy.RANDOM;
    144         nanStrategy = DEFAULT_NAN_STRATEGY;
    145         randomData = new RandomDataImpl(randomGenerator);
    146     }
    147 
    148 
    149     /**
    150      * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
    151      * and the given source of random data.
    152      *
    153      * @param nanStrategy NaNStrategy to use
    154      * @param randomGenerator source of random data
    155      */
    156     public NaturalRanking(NaNStrategy nanStrategy,
    157             RandomGenerator randomGenerator) {
    158         super();
    159         this.nanStrategy = nanStrategy;
    160         this.tiesStrategy = TiesStrategy.RANDOM;
    161         randomData = new RandomDataImpl(randomGenerator);
    162     }
    163 
    164     /**
    165      * Return the NaNStrategy
    166      *
    167      * @return returns the NaNStrategy
    168      */
    169     public NaNStrategy getNanStrategy() {
    170         return nanStrategy;
    171     }
    172 
    173     /**
    174      * Return the TiesStrategy
    175      *
    176      * @return the TiesStrategy
    177      */
    178     public TiesStrategy getTiesStrategy() {
    179         return tiesStrategy;
    180     }
    181 
    182     /**
    183      * Rank <code>data</code> using the natural ordering on Doubles, with
    184      * NaN values handled according to <code>nanStrategy</code> and ties
    185      * resolved using <code>tiesStrategy.</code>
    186      *
    187      * @param data array to be ranked
    188      * @return array of ranks
    189      */
    190     public double[] rank(double[] data) {
    191 
    192         // Array recording initial positions of data to be ranked
    193         IntDoublePair[] ranks = new IntDoublePair[data.length];
    194         for (int i = 0; i < data.length; i++) {
    195             ranks[i] = new IntDoublePair(data[i], i);
    196         }
    197 
    198         // Recode, remove or record positions of NaNs
    199         List<Integer> nanPositions = null;
    200         switch (nanStrategy) {
    201             case MAXIMAL: // Replace NaNs with +INFs
    202                 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
    203                 break;
    204             case MINIMAL: // Replace NaNs with -INFs
    205                 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
    206                 break;
    207             case REMOVED: // Drop NaNs from data
    208                 ranks = removeNaNs(ranks);
    209                 break;
    210             case FIXED:   // Record positions of NaNs
    211                 nanPositions = getNanPositions(ranks);
    212                 break;
    213             default: // this should not happen unless NaNStrategy enum is changed
    214                 throw new MathInternalError();
    215         }
    216 
    217         // Sort the IntDoublePairs
    218         Arrays.sort(ranks);
    219 
    220         // Walk the sorted array, filling output array using sorted positions,
    221         // resolving ties as we go
    222         double[] out = new double[ranks.length];
    223         int pos = 1;  // position in sorted array
    224         out[ranks[0].getPosition()] = pos;
    225         List<Integer> tiesTrace = new ArrayList<Integer>();
    226         tiesTrace.add(ranks[0].getPosition());
    227         for (int i = 1; i < ranks.length; i++) {
    228             if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
    229                 // tie sequence has ended (or had length 1)
    230                 pos = i + 1;
    231                 if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
    232                     resolveTie(out, tiesTrace);
    233                 }
    234                 tiesTrace = new ArrayList<Integer>();
    235                 tiesTrace.add(ranks[i].getPosition());
    236             } else {
    237                 // tie sequence continues
    238                 tiesTrace.add(ranks[i].getPosition());
    239             }
    240             out[ranks[i].getPosition()] = pos;
    241         }
    242         if (tiesTrace.size() > 1) {  // handle tie sequence at end
    243             resolveTie(out, tiesTrace);
    244         }
    245         if (nanStrategy == NaNStrategy.FIXED) {
    246             restoreNaNs(out, nanPositions);
    247         }
    248         return out;
    249     }
    250 
    251     /**
    252      * Returns an array that is a copy of the input array with IntDoublePairs
    253      * having NaN values removed.
    254      *
    255      * @param ranks input array
    256      * @return array with NaN-valued entries removed
    257      */
    258     private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
    259         if (!containsNaNs(ranks)) {
    260             return ranks;
    261         }
    262         IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
    263         int j = 0;
    264         for (int i = 0; i < ranks.length; i++) {
    265             if (Double.isNaN(ranks[i].getValue())) {
    266                 // drop, but adjust original ranks of later elements
    267                 for (int k = i + 1; k < ranks.length; k++) {
    268                     ranks[k] = new IntDoublePair(
    269                             ranks[k].getValue(), ranks[k].getPosition() - 1);
    270                 }
    271             } else {
    272                 outRanks[j] = new IntDoublePair(
    273                         ranks[i].getValue(), ranks[i].getPosition());
    274                 j++;
    275             }
    276         }
    277         IntDoublePair[] returnRanks = new IntDoublePair[j];
    278         System.arraycopy(outRanks, 0, returnRanks, 0, j);
    279         return returnRanks;
    280     }
    281 
    282     /**
    283      * Recodes NaN values to the given value.
    284      *
    285      * @param ranks array to recode
    286      * @param value the value to replace NaNs with
    287      */
    288     private void recodeNaNs(IntDoublePair[] ranks, double value) {
    289         for (int i = 0; i < ranks.length; i++) {
    290             if (Double.isNaN(ranks[i].getValue())) {
    291                 ranks[i] = new IntDoublePair(
    292                         value, ranks[i].getPosition());
    293             }
    294         }
    295     }
    296 
    297     /**
    298      * Checks for presence of NaNs in <code>ranks.</code>
    299      *
    300      * @param ranks array to be searched for NaNs
    301      * @return true iff ranks contains one or more NaNs
    302      */
    303     private boolean containsNaNs(IntDoublePair[] ranks) {
    304         for (int i = 0; i < ranks.length; i++) {
    305             if (Double.isNaN(ranks[i].getValue())) {
    306                 return true;
    307             }
    308         }
    309         return false;
    310     }
    311 
    312     /**
    313      * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
    314      * The input <code>ranks</code> array is expected to take the same value
    315      * for all indices in <code>tiesTrace</code>.  The common value is recoded
    316      * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
    317      * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
    318      * The same array and trace with tiesStrategy AVERAGE will come out
    319      * <5,8,3,6,3,7,1,3>.
    320      *
    321      * @param ranks array of ranks
    322      * @param tiesTrace list of indices where <code>ranks</code> is constant
    323      * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
    324      * </code>
    325      */
    326     private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
    327 
    328         // constant value of ranks over tiesTrace
    329         final double c = ranks[tiesTrace.get(0)];
    330 
    331         // length of sequence of tied ranks
    332         final int length = tiesTrace.size();
    333 
    334         switch (tiesStrategy) {
    335             case  AVERAGE:  // Replace ranks with average
    336                 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
    337                 break;
    338             case MAXIMUM:   // Replace ranks with maximum values
    339                 fill(ranks, tiesTrace, c + length - 1);
    340                 break;
    341             case MINIMUM:   // Replace ties with minimum
    342                 fill(ranks, tiesTrace, c);
    343                 break;
    344             case RANDOM:    // Fill with random integral values in [c, c + length - 1]
    345                 Iterator<Integer> iterator = tiesTrace.iterator();
    346                 long f = FastMath.round(c);
    347                 while (iterator.hasNext()) {
    348                     ranks[iterator.next()] =
    349                         randomData.nextLong(f, f + length - 1);
    350                 }
    351                 break;
    352             case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
    353                 // walk and fill
    354                 iterator = tiesTrace.iterator();
    355                 f = FastMath.round(c);
    356                 int i = 0;
    357                 while (iterator.hasNext()) {
    358                     ranks[iterator.next()] = f + i++;
    359                 }
    360                 break;
    361             default: // this should not happen unless TiesStrategy enum is changed
    362                 throw new MathInternalError();
    363         }
    364     }
    365 
    366     /**
    367      * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
    368      *
    369      * @param data array to modify
    370      * @param tiesTrace list of index values to set
    371      * @param value value to set
    372      */
    373     private void fill(double[] data, List<Integer> tiesTrace, double value) {
    374         Iterator<Integer> iterator = tiesTrace.iterator();
    375         while (iterator.hasNext()) {
    376             data[iterator.next()] = value;
    377         }
    378     }
    379 
    380     /**
    381      * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
    382      *
    383      * @param ranks array to modify
    384      * @param nanPositions list of index values to set to <code>Double.NaN</code>
    385      */
    386     private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
    387         if (nanPositions.size() == 0) {
    388             return;
    389         }
    390         Iterator<Integer> iterator = nanPositions.iterator();
    391         while (iterator.hasNext()) {
    392             ranks[iterator.next().intValue()] = Double.NaN;
    393         }
    394 
    395     }
    396 
    397     /**
    398      * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
    399      *
    400      * @param ranks array to search for <code>NaNs</code>
    401      * @return list of indexes i such that <code>ranks[i] = NaN</code>
    402      */
    403     private List<Integer> getNanPositions(IntDoublePair[] ranks) {
    404         ArrayList<Integer> out = new ArrayList<Integer>();
    405         for (int i = 0; i < ranks.length; i++) {
    406             if (Double.isNaN(ranks[i].getValue())) {
    407                 out.add(Integer.valueOf(i));
    408             }
    409         }
    410         return out;
    411     }
    412 
    413     /**
    414      * Represents the position of a double value in an ordering.
    415      * Comparable interface is implemented so Arrays.sort can be used
    416      * to sort an array of IntDoublePairs by value.  Note that the
    417      * implicitly defined natural ordering is NOT consistent with equals.
    418      */
    419     private static class IntDoublePair implements Comparable<IntDoublePair>  {
    420 
    421         /** Value of the pair */
    422         private final double value;
    423 
    424         /** Original position of the pair */
    425         private final int position;
    426 
    427         /**
    428          * Construct an IntDoublePair with the given value and position.
    429          * @param value the value of the pair
    430          * @param position the original position
    431          */
    432         public IntDoublePair(double value, int position) {
    433             this.value = value;
    434             this.position = position;
    435         }
    436 
    437         /**
    438          * Compare this IntDoublePair to another pair.
    439          * Only the <strong>values</strong> are compared.
    440          *
    441          * @param other the other pair to compare this to
    442          * @return result of <code>Double.compare(value, other.value)</code>
    443          */
    444         public int compareTo(IntDoublePair other) {
    445             return Double.compare(value, other.value);
    446         }
    447 
    448         /**
    449          * Returns the value of the pair.
    450          * @return value
    451          */
    452         public double getValue() {
    453             return value;
    454         }
    455 
    456         /**
    457          * Returns the original position of the pair.
    458          * @return position
    459          */
    460         public int getPosition() {
    461             return position;
    462         }
    463     }
    464 }
    465