1 #/** 2 # ******************************************************************************* 3 # * Copyright (C) 2002-2004, International Business Machines Corporation and * 4 # * others. All Rights Reserved. * 5 # ******************************************************************************* 6 # */ 7 package Dataset; 8 use Statistics::Descriptive; 9 use Statistics::Distributions; 10 use strict; 11 12 # Create a new Dataset with the given data. 13 sub new { 14 my ($class) = shift; 15 my $self = bless { 16 _data => \@_, 17 _scale => 1.0, 18 _mean => 0.0, 19 _error => 0.0, 20 }, $class; 21 22 my $n = @_; 23 24 if ($n >= 1) { 25 my $stats = Statistics::Descriptive::Full->new(); 26 $stats->add_data(@{$self->{_data}}); 27 $self->{_mean} = $stats->mean(); 28 29 if ($n >= 2) { 30 # Use a t distribution rather than Gaussian because (a) we 31 # assume an underlying normal dist, (b) we do not know the 32 # standard deviation -- we estimate it from the data, and (c) 33 # we MAY have a small sample size (also works for large n). 34 my $t = Statistics::Distributions::tdistr($n-1, 0.005); 35 $self->{_error} = $t * $stats->standard_deviation(); 36 } 37 } 38 39 $self; 40 } 41 42 # Set a scaling factor for all data; 1.0 means no scaling. 43 # Scale must be > 0. 44 sub setScale { 45 my ($self, $scale) = @_; 46 $self->{_scale} = $scale; 47 } 48 49 # Multiply the scaling factor by a value. 50 sub scaleBy { 51 my ($self, $a) = @_; 52 $self->{_scale} *= $a; 53 } 54 55 # Return the mean. 56 sub getMean { 57 my $self = shift; 58 return $self->{_mean} * $self->{_scale}; 59 } 60 61 # Return a 99% error based on the t distribution. The dataset 62 # is desribed as getMean() +/- getError(). 63 sub getError { 64 my $self = shift; 65 return $self->{_error} * $self->{_scale}; 66 } 67 68 # Divide two Datasets and return a new one, maintaining the 69 # mean+/-error. The new Dataset has no data points. 70 sub divide { 71 my $self = shift; 72 my $rhs = shift; 73 74 my $minratio = ($self->{_mean} - $self->{_error}) / 75 ($rhs->{_mean} + $rhs->{_error}); 76 my $maxratio = ($self->{_mean} + $self->{_error}) / 77 ($rhs->{_mean} - $rhs->{_error}); 78 79 my $result = Dataset->new(); 80 $result->{_mean} = ($minratio + $maxratio) / 2; 81 $result->{_error} = $result->{_mean} - $minratio; 82 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 83 $result; 84 } 85 86 1; 87