Home | History | Annotate | Download | only in correlation
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 package org.apache.commons.math.stat.correlation;
     18 
     19 import org.apache.commons.math.MathRuntimeException;
     20 import org.apache.commons.math.exception.util.LocalizedFormats;
     21 import org.apache.commons.math.linear.RealMatrix;
     22 import org.apache.commons.math.linear.BlockRealMatrix;
     23 import org.apache.commons.math.stat.descriptive.moment.Mean;
     24 import org.apache.commons.math.stat.descriptive.moment.Variance;
     25 
     26 /**
     27  * Computes covariances for pairs of arrays or columns of a matrix.
     28  *
     29  * <p>The constructors that take <code>RealMatrix</code> or
     30  * <code>double[][]</code> arguments generate covariance matrices.  The
     31  * columns of the input matrices are assumed to represent variable values.</p>
     32  *
     33  * <p>The constructor argument <code>biasCorrected</code> determines whether or
     34  * not computed covariances are bias-corrected.</p>
     35  *
     36  * <p>Unbiased covariances are given by the formula</p>
     37  * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
     38  * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
     39  * is the mean of the <code>Y</code> values.
     40  *
     41  * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
     42  *
     43  * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 aot 2010) $
     44  * @since 2.0
     45  */
     46 public class Covariance {
     47 
     48     /** covariance matrix */
     49     private final RealMatrix covarianceMatrix;
     50 
     51     /**
     52      * Create an empty covariance matrix.
     53      */
     54     /** Number of observations (length of covariate vectors) */
     55     private final int n;
     56 
     57     /**
     58      * Create a Covariance with no data
     59      */
     60     public Covariance() {
     61         super();
     62         covarianceMatrix = null;
     63         n = 0;
     64     }
     65 
     66     /**
     67      * Create a Covariance matrix from a rectangular array
     68      * whose columns represent covariates.
     69      *
     70      * <p>The <code>biasCorrected</code> parameter determines whether or not
     71      * covariance estimates are bias-corrected.</p>
     72      *
     73      * <p>The input array must be rectangular with at least two columns
     74      * and two rows.</p>
     75      *
     76      * @param data rectangular array with columns representing covariates
     77      * @param biasCorrected true means covariances are bias-corrected
     78      * @throws IllegalArgumentException if the input data array is not
     79      * rectangular with at least two rows and two columns.
     80      */
     81     public Covariance(double[][] data, boolean biasCorrected) {
     82         this(new BlockRealMatrix(data), biasCorrected);
     83     }
     84 
     85     /**
     86      * Create a Covariance matrix from a rectangular array
     87      * whose columns represent covariates.
     88      *
     89      * <p>The input array must be rectangular with at least two columns
     90      * and two rows</p>
     91      *
     92      * @param data rectangular array with columns representing covariates
     93      * @throws IllegalArgumentException if the input data array is not
     94      * rectangular with at least two rows and two columns.
     95      */
     96     public Covariance(double[][] data) {
     97         this(data, true);
     98     }
     99 
    100     /**
    101      * Create a covariance matrix from a matrix whose columns
    102      * represent covariates.
    103      *
    104      * <p>The <code>biasCorrected</code> parameter determines whether or not
    105      * covariance estimates are bias-corrected.</p>
    106      *
    107      * <p>The matrix must have at least two columns and two rows</p>
    108      *
    109      * @param matrix matrix with columns representing covariates
    110      * @param biasCorrected true means covariances are bias-corrected
    111      * @throws IllegalArgumentException if the input matrix does not have
    112      * at least two rows and two columns
    113      */
    114     public Covariance(RealMatrix matrix, boolean biasCorrected) {
    115        checkSufficientData(matrix);
    116        n = matrix.getRowDimension();
    117        covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
    118     }
    119 
    120     /**
    121      * Create a covariance matrix from a matrix whose columns
    122      * represent covariates.
    123      *
    124      * <p>The matrix must have at least two columns and two rows</p>
    125      *
    126      * @param matrix matrix with columns representing covariates
    127      * @throws IllegalArgumentException if the input matrix does not have
    128      * at least two rows and two columns
    129      */
    130     public Covariance(RealMatrix matrix) {
    131         this(matrix, true);
    132     }
    133 
    134     /**
    135      * Returns the covariance matrix
    136      *
    137      * @return covariance matrix
    138      */
    139     public RealMatrix getCovarianceMatrix() {
    140         return covarianceMatrix;
    141     }
    142 
    143     /**
    144      * Returns the number of observations (length of covariate vectors)
    145      *
    146      * @return number of observations
    147      */
    148 
    149     public int getN() {
    150         return n;
    151     }
    152 
    153     /**
    154      * Compute a covariance matrix from a matrix whose columns represent
    155      * covariates.
    156      * @param matrix input matrix (must have at least two columns and two rows)
    157      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
    158      * @return covariance matrix
    159      */
    160     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
    161         int dimension = matrix.getColumnDimension();
    162         Variance variance = new Variance(biasCorrected);
    163         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
    164         for (int i = 0; i < dimension; i++) {
    165             for (int j = 0; j < i; j++) {
    166               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
    167               outMatrix.setEntry(i, j, cov);
    168               outMatrix.setEntry(j, i, cov);
    169             }
    170             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
    171         }
    172         return outMatrix;
    173     }
    174 
    175     /**
    176      * Create a covariance matrix from a matrix whose columns represent
    177      * covariates. Covariances are computed using the bias-corrected formula.
    178      * @param matrix input matrix (must have at least two columns and two rows)
    179      * @return covariance matrix
    180      * @see #Covariance
    181      */
    182     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
    183         return computeCovarianceMatrix(matrix, true);
    184     }
    185 
    186     /**
    187      * Compute a covariance matrix from a rectangular array whose columns represent
    188      * covariates.
    189      * @param data input array (must have at least two columns and two rows)
    190      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
    191      * @return covariance matrix
    192      */
    193     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
    194         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
    195     }
    196 
    197     /**
    198      * Create a covariance matrix from a rectangual array whose columns represent
    199      * covariates. Covariances are computed using the bias-corrected formula.
    200      * @param data input array (must have at least two columns and two rows)
    201      * @return covariance matrix
    202      * @see #Covariance
    203      */
    204     protected RealMatrix computeCovarianceMatrix(double[][] data) {
    205         return computeCovarianceMatrix(data, true);
    206     }
    207 
    208     /**
    209      * Computes the covariance between the two arrays.
    210      *
    211      * <p>Array lengths must match and the common length must be at least 2.</p>
    212      *
    213      * @param xArray first data array
    214      * @param yArray second data array
    215      * @param biasCorrected if true, returned value will be bias-corrected
    216      * @return returns the covariance for the two arrays
    217      * @throws  IllegalArgumentException if the arrays lengths do not match or
    218      * there is insufficient data
    219      */
    220     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
    221         throws IllegalArgumentException {
    222         Mean mean = new Mean();
    223         double result = 0d;
    224         int length = xArray.length;
    225         if (length != yArray.length) {
    226             throw MathRuntimeException.createIllegalArgumentException(
    227                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
    228         } else if (length < 2) {
    229             throw MathRuntimeException.createIllegalArgumentException(
    230                   LocalizedFormats.INSUFFICIENT_DIMENSION, length, 2);
    231         } else {
    232             double xMean = mean.evaluate(xArray);
    233             double yMean = mean.evaluate(yArray);
    234             for (int i = 0; i < length; i++) {
    235                 double xDev = xArray[i] - xMean;
    236                 double yDev = yArray[i] - yMean;
    237                 result += (xDev * yDev - result) / (i + 1);
    238             }
    239         }
    240         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
    241     }
    242 
    243     /**
    244      * Computes the covariance between the two arrays, using the bias-corrected
    245      * formula.
    246      *
    247      * <p>Array lengths must match and the common length must be at least 2.</p>
    248      *
    249      * @param xArray first data array
    250      * @param yArray second data array
    251      * @return returns the covariance for the two arrays
    252      * @throws  IllegalArgumentException if the arrays lengths do not match or
    253      * there is insufficient data
    254      */
    255     public double covariance(final double[] xArray, final double[] yArray)
    256         throws IllegalArgumentException {
    257         return covariance(xArray, yArray, true);
    258     }
    259 
    260     /**
    261      * Throws IllegalArgumentException of the matrix does not have at least
    262      * two columns and two rows
    263      * @param matrix matrix to check
    264      */
    265     private void checkSufficientData(final RealMatrix matrix) {
    266         int nRows = matrix.getRowDimension();
    267         int nCols = matrix.getColumnDimension();
    268         if (nRows < 2 || nCols < 2) {
    269             throw MathRuntimeException.createIllegalArgumentException(
    270                     LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
    271                     nRows, nCols);
    272         }
    273     }
    274 }
    275