1 #!/usr/local/bin/perl 2 # ******************************************************************** 3 # * COPYRIGHT: 4 # * Copyright (c) 2002, International Business Machines Corporation and 5 # * others. All Rights Reserved. 6 # ******************************************************************** 7 8 package Dataset; 9 use Statistics::Descriptive; 10 use Statistics::Distributions; 11 use strict; 12 13 # Create a new Dataset with the given data. 14 sub new { 15 my ($class) = shift; 16 my $self = bless { 17 _data => \@_, 18 _scale => 1.0, 19 _mean => 0.0, 20 _error => 0.0, 21 }, $class; 22 23 my $n = @_; 24 25 if ($n >= 1) { 26 my $stats = Statistics::Descriptive::Full->new(); 27 $stats->add_data(@{$self->{_data}}); 28 $self->{_mean} = $stats->mean(); 29 30 if ($n >= 2) { 31 # Use a t distribution rather than Gaussian because (a) we 32 # assume an underlying normal dist, (b) we do not know the 33 # standard deviation -- we estimate it from the data, and (c) 34 # we MAY have a small sample size (also works for large n). 35 my $t = Statistics::Distributions::tdistr($n-1, 0.005); 36 $self->{_error} = $t * $stats->standard_deviation(); 37 } 38 } 39 40 $self; 41 } 42 43 # Set a scaling factor for all data; 1.0 means no scaling. 44 # Scale must be > 0. 45 sub setScale { 46 my ($self, $scale) = @_; 47 $self->{_scale} = $scale; 48 } 49 50 # Multiply the scaling factor by a value. 51 sub scaleBy { 52 my ($self, $a) = @_; 53 $self->{_scale} *= $a; 54 } 55 56 # Return the mean. 57 sub getMean { 58 my $self = shift; 59 return $self->{_mean} * $self->{_scale}; 60 } 61 62 # Return a 99% error based on the t distribution. The dataset 63 # is desribed as getMean() +/- getError(). 64 sub getError { 65 my $self = shift; 66 return $self->{_error} * $self->{_scale}; 67 } 68 69 # Divide two Datasets and return a new one, maintaining the 70 # mean+/-error. The new Dataset has no data points. 71 sub divide { 72 my $self = shift; 73 my $rhs = shift; 74 75 my $minratio = ($self->{_mean} - $self->{_error}) / 76 ($rhs->{_mean} + $rhs->{_error}); 77 my $maxratio = ($self->{_mean} + $self->{_error}) / 78 ($rhs->{_mean} - $rhs->{_error}); 79 80 my $result = Dataset->new(); 81 $result->{_mean} = ($minratio + $maxratio) / 2; 82 $result->{_error} = $result->{_mean} - $minratio; 83 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 84 $result; 85 } 86 87 # subtracts two Datasets and return a new one, maintaining the 88 # mean+/-error. The new Dataset has no data points. 89 sub subtract { 90 my $self = shift; 91 my $rhs = shift; 92 93 my $result = Dataset->new(); 94 $result->{_mean} = $self->{_mean} - $rhs->{_mean}; 95 $result->{_error} = $self->{_error} + $rhs->{_error}; 96 $result->{_scale} = $self->{_scale}; 97 $result; 98 } 99 100 # adds two Datasets and return a new one, maintaining the 101 # mean+/-error. The new Dataset has no data points. 102 sub add { 103 my $self = shift; 104 my $rhs = shift; 105 106 my $result = Dataset->new(); 107 $result->{_mean} = $self->{_mean} + $rhs->{_mean}; 108 $result->{_error} = $self->{_error} + $rhs->{_error}; 109 $result->{_scale} = $self->{_scale}; 110 $result; 111 } 112 113 # Divides a dataset by a scalar. 114 # The new Dataset has no data points. 115 sub divideByScalar { 116 my $self = shift; 117 my $s = shift; 118 119 my $result = Dataset->new(); 120 $result->{_mean} = $self->{_mean}/$s; 121 $result->{_error} = $self->{_error}/$s; 122 $result->{_scale} = $self->{_scale}; 123 $result; 124 } 125 126 # Divides a dataset by a scalar. 127 # The new Dataset has no data points. 128 sub multiplyByScalar { 129 my $self = shift; 130 my $s = shift; 131 132 my $result = Dataset->new(); 133 $result->{_mean} = $self->{_mean}*$s; 134 $result->{_error} = $self->{_error}*$s; 135 $result->{_scale} = $self->{_scale}; 136 $result; 137 } 138 139 1; 140