1 #!/usr/local/bin/perl 2 # ******************************************************************** 3 # * Copyright (C) 2016 and later: Unicode, Inc. and others. 4 # * License & terms of use: http://www.unicode.org/copyright.html#License 5 # ******************************************************************** 6 # ******************************************************************** 7 # * COPYRIGHT: 8 # * Copyright (c) 2002, International Business Machines Corporation and 9 # * others. All Rights Reserved. 10 # ******************************************************************** 11 12 package Dataset; 13 use Statistics::Descriptive; 14 use Statistics::Distributions; 15 use strict; 16 17 # Create a new Dataset with the given data. 18 sub new { 19 my ($class) = shift; 20 my $self = bless { 21 _data => \@_, 22 _scale => 1.0, 23 _mean => 0.0, 24 _error => 0.0, 25 }, $class; 26 27 my $n = @_; 28 29 if ($n >= 1) { 30 my $stats = Statistics::Descriptive::Full->new(); 31 $stats->add_data(@{$self->{_data}}); 32 $self->{_mean} = $stats->mean(); 33 34 if ($n >= 2) { 35 # Use a t distribution rather than Gaussian because (a) we 36 # assume an underlying normal dist, (b) we do not know the 37 # standard deviation -- we estimate it from the data, and (c) 38 # we MAY have a small sample size (also works for large n). 39 my $t = Statistics::Distributions::tdistr($n-1, 0.005); 40 $self->{_error} = $t * $stats->standard_deviation(); 41 } 42 } 43 44 $self; 45 } 46 47 # Set a scaling factor for all data; 1.0 means no scaling. 48 # Scale must be > 0. 49 sub setScale { 50 my ($self, $scale) = @_; 51 $self->{_scale} = $scale; 52 } 53 54 # Multiply the scaling factor by a value. 55 sub scaleBy { 56 my ($self, $a) = @_; 57 $self->{_scale} *= $a; 58 } 59 60 # Return the mean. 61 sub getMean { 62 my $self = shift; 63 return $self->{_mean} * $self->{_scale}; 64 } 65 66 # Return a 99% error based on the t distribution. The dataset 67 # is desribed as getMean() +/- getError(). 68 sub getError { 69 my $self = shift; 70 return $self->{_error} * $self->{_scale}; 71 } 72 73 # Divide two Datasets and return a new one, maintaining the 74 # mean+/-error. The new Dataset has no data points. 75 sub divide { 76 my $self = shift; 77 my $rhs = shift; 78 79 my $minratio = ($self->{_mean} - $self->{_error}) / 80 ($rhs->{_mean} + $rhs->{_error}); 81 my $maxratio = ($self->{_mean} + $self->{_error}) / 82 ($rhs->{_mean} - $rhs->{_error}); 83 84 my $result = Dataset->new(); 85 $result->{_mean} = ($minratio + $maxratio) / 2; 86 $result->{_error} = $result->{_mean} - $minratio; 87 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 88 $result; 89 } 90 91 # subtracts two Datasets and return a new one, maintaining the 92 # mean+/-error. The new Dataset has no data points. 93 sub subtract { 94 my $self = shift; 95 my $rhs = shift; 96 97 my $result = Dataset->new(); 98 $result->{_mean} = $self->{_mean} - $rhs->{_mean}; 99 $result->{_error} = $self->{_error} + $rhs->{_error}; 100 $result->{_scale} = $self->{_scale}; 101 $result; 102 } 103 104 # adds two Datasets and return a new one, maintaining the 105 # mean+/-error. The new Dataset has no data points. 106 sub add { 107 my $self = shift; 108 my $rhs = shift; 109 110 my $result = Dataset->new(); 111 $result->{_mean} = $self->{_mean} + $rhs->{_mean}; 112 $result->{_error} = $self->{_error} + $rhs->{_error}; 113 $result->{_scale} = $self->{_scale}; 114 $result; 115 } 116 117 # Divides a dataset by a scalar. 118 # The new Dataset has no data points. 119 sub divideByScalar { 120 my $self = shift; 121 my $s = shift; 122 123 my $result = Dataset->new(); 124 $result->{_mean} = $self->{_mean}/$s; 125 $result->{_error} = $self->{_error}/$s; 126 $result->{_scale} = $self->{_scale}; 127 $result; 128 } 129 130 # Divides a dataset by a scalar. 131 # The new Dataset has no data points. 132 sub multiplyByScalar { 133 my $self = shift; 134 my $s = shift; 135 136 my $result = Dataset->new(); 137 $result->{_mean} = $self->{_mean}*$s; 138 $result->{_error} = $self->{_error}*$s; 139 $result->{_scale} = $self->{_scale}; 140 $result; 141 } 142 143 1; 144