1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.math.stat.ranking; 19 20 import java.util.ArrayList; 21 import java.util.Arrays; 22 import java.util.Iterator; 23 import java.util.List; 24 25 import org.apache.commons.math.exception.MathInternalError; 26 import org.apache.commons.math.random.RandomData; 27 import org.apache.commons.math.random.RandomDataImpl; 28 import org.apache.commons.math.random.RandomGenerator; 29 import org.apache.commons.math.util.FastMath; 30 31 32 /** 33 * <p> Ranking based on the natural ordering on doubles.</p> 34 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties 35 * are handled using the selected {@link TiesStrategy}. 36 * Configuration settings are supplied in optional constructor arguments. 37 * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE}, 38 * respectively. When using {@link TiesStrategy#RANDOM}, a 39 * {@link RandomGenerator} may be supplied as a constructor argument.</p> 40 * <p>Examples: 41 * <table border="1" cellpadding="3"> 42 * <tr><th colspan="3"> 43 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17) 44 * </th></tr> 45 * <tr><th>NaNStrategy</th><th>TiesStrategy</th> 46 * <th><code>rank(data)</code></th> 47 * <tr> 48 * <td>default (NaNs maximal)</td> 49 * <td>default (ties averaged)</td> 50 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr> 51 * <tr> 52 * <td>default (NaNs maximal)</td> 53 * <td>MINIMUM</td> 54 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr> 55 * <tr> 56 * <td>MINIMAL</td> 57 * <td>default (ties averaged)</td> 58 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr> 59 * <tr> 60 * <td>REMOVED</td> 61 * <td>SEQUENTIAL</td> 62 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr> 63 * <tr> 64 * <td>MINIMAL</td> 65 * <td>MAXIMUM</td> 66 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p> 67 * 68 * @since 2.0 69 * @version $Revision: 1061496 $ $Date: 2011-01-20 21:32:16 +0100 (jeu. 20 janv. 2011) $ 70 */ 71 public class NaturalRanking implements RankingAlgorithm { 72 73 /** default NaN strategy */ 74 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL; 75 76 /** default ties strategy */ 77 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE; 78 79 /** NaN strategy - defaults to NaNs maximal */ 80 private final NaNStrategy nanStrategy; 81 82 /** Ties strategy - defaults to ties averaged */ 83 private final TiesStrategy tiesStrategy; 84 85 /** Source of random data - used only when ties strategy is RANDOM */ 86 private final RandomData randomData; 87 88 /** 89 * Create a NaturalRanking with default strategies for handling ties and NaNs. 90 */ 91 public NaturalRanking() { 92 super(); 93 tiesStrategy = DEFAULT_TIES_STRATEGY; 94 nanStrategy = DEFAULT_NAN_STRATEGY; 95 randomData = null; 96 } 97 98 /** 99 * Create a NaturalRanking with the given TiesStrategy. 100 * 101 * @param tiesStrategy the TiesStrategy to use 102 */ 103 public NaturalRanking(TiesStrategy tiesStrategy) { 104 super(); 105 this.tiesStrategy = tiesStrategy; 106 nanStrategy = DEFAULT_NAN_STRATEGY; 107 randomData = new RandomDataImpl(); 108 } 109 110 /** 111 * Create a NaturalRanking with the given NaNStrategy. 112 * 113 * @param nanStrategy the NaNStrategy to use 114 */ 115 public NaturalRanking(NaNStrategy nanStrategy) { 116 super(); 117 this.nanStrategy = nanStrategy; 118 tiesStrategy = DEFAULT_TIES_STRATEGY; 119 randomData = null; 120 } 121 122 /** 123 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy. 124 * 125 * @param nanStrategy NaNStrategy to use 126 * @param tiesStrategy TiesStrategy to use 127 */ 128 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) { 129 super(); 130 this.nanStrategy = nanStrategy; 131 this.tiesStrategy = tiesStrategy; 132 randomData = new RandomDataImpl(); 133 } 134 135 /** 136 * Create a NaturalRanking with TiesStrategy.RANDOM and the given 137 * RandomGenerator as the source of random data. 138 * 139 * @param randomGenerator source of random data 140 */ 141 public NaturalRanking(RandomGenerator randomGenerator) { 142 super(); 143 this.tiesStrategy = TiesStrategy.RANDOM; 144 nanStrategy = DEFAULT_NAN_STRATEGY; 145 randomData = new RandomDataImpl(randomGenerator); 146 } 147 148 149 /** 150 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM 151 * and the given source of random data. 152 * 153 * @param nanStrategy NaNStrategy to use 154 * @param randomGenerator source of random data 155 */ 156 public NaturalRanking(NaNStrategy nanStrategy, 157 RandomGenerator randomGenerator) { 158 super(); 159 this.nanStrategy = nanStrategy; 160 this.tiesStrategy = TiesStrategy.RANDOM; 161 randomData = new RandomDataImpl(randomGenerator); 162 } 163 164 /** 165 * Return the NaNStrategy 166 * 167 * @return returns the NaNStrategy 168 */ 169 public NaNStrategy getNanStrategy() { 170 return nanStrategy; 171 } 172 173 /** 174 * Return the TiesStrategy 175 * 176 * @return the TiesStrategy 177 */ 178 public TiesStrategy getTiesStrategy() { 179 return tiesStrategy; 180 } 181 182 /** 183 * Rank <code>data</code> using the natural ordering on Doubles, with 184 * NaN values handled according to <code>nanStrategy</code> and ties 185 * resolved using <code>tiesStrategy.</code> 186 * 187 * @param data array to be ranked 188 * @return array of ranks 189 */ 190 public double[] rank(double[] data) { 191 192 // Array recording initial positions of data to be ranked 193 IntDoublePair[] ranks = new IntDoublePair[data.length]; 194 for (int i = 0; i < data.length; i++) { 195 ranks[i] = new IntDoublePair(data[i], i); 196 } 197 198 // Recode, remove or record positions of NaNs 199 List<Integer> nanPositions = null; 200 switch (nanStrategy) { 201 case MAXIMAL: // Replace NaNs with +INFs 202 recodeNaNs(ranks, Double.POSITIVE_INFINITY); 203 break; 204 case MINIMAL: // Replace NaNs with -INFs 205 recodeNaNs(ranks, Double.NEGATIVE_INFINITY); 206 break; 207 case REMOVED: // Drop NaNs from data 208 ranks = removeNaNs(ranks); 209 break; 210 case FIXED: // Record positions of NaNs 211 nanPositions = getNanPositions(ranks); 212 break; 213 default: // this should not happen unless NaNStrategy enum is changed 214 throw new MathInternalError(); 215 } 216 217 // Sort the IntDoublePairs 218 Arrays.sort(ranks); 219 220 // Walk the sorted array, filling output array using sorted positions, 221 // resolving ties as we go 222 double[] out = new double[ranks.length]; 223 int pos = 1; // position in sorted array 224 out[ranks[0].getPosition()] = pos; 225 List<Integer> tiesTrace = new ArrayList<Integer>(); 226 tiesTrace.add(ranks[0].getPosition()); 227 for (int i = 1; i < ranks.length; i++) { 228 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) { 229 // tie sequence has ended (or had length 1) 230 pos = i + 1; 231 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve 232 resolveTie(out, tiesTrace); 233 } 234 tiesTrace = new ArrayList<Integer>(); 235 tiesTrace.add(ranks[i].getPosition()); 236 } else { 237 // tie sequence continues 238 tiesTrace.add(ranks[i].getPosition()); 239 } 240 out[ranks[i].getPosition()] = pos; 241 } 242 if (tiesTrace.size() > 1) { // handle tie sequence at end 243 resolveTie(out, tiesTrace); 244 } 245 if (nanStrategy == NaNStrategy.FIXED) { 246 restoreNaNs(out, nanPositions); 247 } 248 return out; 249 } 250 251 /** 252 * Returns an array that is a copy of the input array with IntDoublePairs 253 * having NaN values removed. 254 * 255 * @param ranks input array 256 * @return array with NaN-valued entries removed 257 */ 258 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) { 259 if (!containsNaNs(ranks)) { 260 return ranks; 261 } 262 IntDoublePair[] outRanks = new IntDoublePair[ranks.length]; 263 int j = 0; 264 for (int i = 0; i < ranks.length; i++) { 265 if (Double.isNaN(ranks[i].getValue())) { 266 // drop, but adjust original ranks of later elements 267 for (int k = i + 1; k < ranks.length; k++) { 268 ranks[k] = new IntDoublePair( 269 ranks[k].getValue(), ranks[k].getPosition() - 1); 270 } 271 } else { 272 outRanks[j] = new IntDoublePair( 273 ranks[i].getValue(), ranks[i].getPosition()); 274 j++; 275 } 276 } 277 IntDoublePair[] returnRanks = new IntDoublePair[j]; 278 System.arraycopy(outRanks, 0, returnRanks, 0, j); 279 return returnRanks; 280 } 281 282 /** 283 * Recodes NaN values to the given value. 284 * 285 * @param ranks array to recode 286 * @param value the value to replace NaNs with 287 */ 288 private void recodeNaNs(IntDoublePair[] ranks, double value) { 289 for (int i = 0; i < ranks.length; i++) { 290 if (Double.isNaN(ranks[i].getValue())) { 291 ranks[i] = new IntDoublePair( 292 value, ranks[i].getPosition()); 293 } 294 } 295 } 296 297 /** 298 * Checks for presence of NaNs in <code>ranks.</code> 299 * 300 * @param ranks array to be searched for NaNs 301 * @return true iff ranks contains one or more NaNs 302 */ 303 private boolean containsNaNs(IntDoublePair[] ranks) { 304 for (int i = 0; i < ranks.length; i++) { 305 if (Double.isNaN(ranks[i].getValue())) { 306 return true; 307 } 308 } 309 return false; 310 } 311 312 /** 313 * Resolve a sequence of ties, using the configured {@link TiesStrategy}. 314 * The input <code>ranks</code> array is expected to take the same value 315 * for all indices in <code>tiesTrace</code>. The common value is recoded 316 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>, 317 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged. 318 * The same array and trace with tiesStrategy AVERAGE will come out 319 * <5,8,3,6,3,7,1,3>. 320 * 321 * @param ranks array of ranks 322 * @param tiesTrace list of indices where <code>ranks</code> is constant 323 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] 324 * </code> 325 */ 326 private void resolveTie(double[] ranks, List<Integer> tiesTrace) { 327 328 // constant value of ranks over tiesTrace 329 final double c = ranks[tiesTrace.get(0)]; 330 331 // length of sequence of tied ranks 332 final int length = tiesTrace.size(); 333 334 switch (tiesStrategy) { 335 case AVERAGE: // Replace ranks with average 336 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d); 337 break; 338 case MAXIMUM: // Replace ranks with maximum values 339 fill(ranks, tiesTrace, c + length - 1); 340 break; 341 case MINIMUM: // Replace ties with minimum 342 fill(ranks, tiesTrace, c); 343 break; 344 case RANDOM: // Fill with random integral values in [c, c + length - 1] 345 Iterator<Integer> iterator = tiesTrace.iterator(); 346 long f = FastMath.round(c); 347 while (iterator.hasNext()) { 348 ranks[iterator.next()] = 349 randomData.nextLong(f, f + length - 1); 350 } 351 break; 352 case SEQUENTIAL: // Fill sequentially from c to c + length - 1 353 // walk and fill 354 iterator = tiesTrace.iterator(); 355 f = FastMath.round(c); 356 int i = 0; 357 while (iterator.hasNext()) { 358 ranks[iterator.next()] = f + i++; 359 } 360 break; 361 default: // this should not happen unless TiesStrategy enum is changed 362 throw new MathInternalError(); 363 } 364 } 365 366 /** 367 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code> 368 * 369 * @param data array to modify 370 * @param tiesTrace list of index values to set 371 * @param value value to set 372 */ 373 private void fill(double[] data, List<Integer> tiesTrace, double value) { 374 Iterator<Integer> iterator = tiesTrace.iterator(); 375 while (iterator.hasNext()) { 376 data[iterator.next()] = value; 377 } 378 } 379 380 /** 381 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code> 382 * 383 * @param ranks array to modify 384 * @param nanPositions list of index values to set to <code>Double.NaN</code> 385 */ 386 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) { 387 if (nanPositions.size() == 0) { 388 return; 389 } 390 Iterator<Integer> iterator = nanPositions.iterator(); 391 while (iterator.hasNext()) { 392 ranks[iterator.next().intValue()] = Double.NaN; 393 } 394 395 } 396 397 /** 398 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code> 399 * 400 * @param ranks array to search for <code>NaNs</code> 401 * @return list of indexes i such that <code>ranks[i] = NaN</code> 402 */ 403 private List<Integer> getNanPositions(IntDoublePair[] ranks) { 404 ArrayList<Integer> out = new ArrayList<Integer>(); 405 for (int i = 0; i < ranks.length; i++) { 406 if (Double.isNaN(ranks[i].getValue())) { 407 out.add(Integer.valueOf(i)); 408 } 409 } 410 return out; 411 } 412 413 /** 414 * Represents the position of a double value in an ordering. 415 * Comparable interface is implemented so Arrays.sort can be used 416 * to sort an array of IntDoublePairs by value. Note that the 417 * implicitly defined natural ordering is NOT consistent with equals. 418 */ 419 private static class IntDoublePair implements Comparable<IntDoublePair> { 420 421 /** Value of the pair */ 422 private final double value; 423 424 /** Original position of the pair */ 425 private final int position; 426 427 /** 428 * Construct an IntDoublePair with the given value and position. 429 * @param value the value of the pair 430 * @param position the original position 431 */ 432 public IntDoublePair(double value, int position) { 433 this.value = value; 434 this.position = position; 435 } 436 437 /** 438 * Compare this IntDoublePair to another pair. 439 * Only the <strong>values</strong> are compared. 440 * 441 * @param other the other pair to compare this to 442 * @return result of <code>Double.compare(value, other.value)</code> 443 */ 444 public int compareTo(IntDoublePair other) { 445 return Double.compare(value, other.value); 446 } 447 448 /** 449 * Returns the value of the pair. 450 * @return value 451 */ 452 public double getValue() { 453 return value; 454 } 455 456 /** 457 * Returns the original position of the pair. 458 * @return position 459 */ 460 public int getPosition() { 461 return position; 462 } 463 } 464 } 465