1 #/** 2 # * 2016 and later: Unicode, Inc. and others. 3 # * License & terms of use: http://www.unicode.org/copyright.html#License 4 # ******************************************************************************* 5 # * Copyright (C) 2002-2004, International Business Machines Corporation and * 6 # * others. All Rights Reserved. * 7 # ******************************************************************************* 8 # */ 9 package Dataset; 10 use Statistics::Descriptive; 11 use Statistics::Distributions; 12 use strict; 13 14 # Create a new Dataset with the given data. 15 sub new { 16 my ($class) = shift; 17 my $self = bless { 18 _data => \@_, 19 _scale => 1.0, 20 _mean => 0.0, 21 _error => 0.0, 22 }, $class; 23 24 my $n = @_; 25 26 if ($n >= 1) { 27 my $stats = Statistics::Descriptive::Full->new(); 28 $stats->add_data(@{$self->{_data}}); 29 $self->{_mean} = $stats->mean(); 30 31 if ($n >= 2) { 32 # Use a t distribution rather than Gaussian because (a) we 33 # assume an underlying normal dist, (b) we do not know the 34 # standard deviation -- we estimate it from the data, and (c) 35 # we MAY have a small sample size (also works for large n). 36 my $t = Statistics::Distributions::tdistr($n-1, 0.005); 37 $self->{_error} = $t * $stats->standard_deviation(); 38 } 39 } 40 41 $self; 42 } 43 44 # Set a scaling factor for all data; 1.0 means no scaling. 45 # Scale must be > 0. 46 sub setScale { 47 my ($self, $scale) = @_; 48 $self->{_scale} = $scale; 49 } 50 51 # Multiply the scaling factor by a value. 52 sub scaleBy { 53 my ($self, $a) = @_; 54 $self->{_scale} *= $a; 55 } 56 57 # Return the mean. 58 sub getMean { 59 my $self = shift; 60 return $self->{_mean} * $self->{_scale}; 61 } 62 63 # Return a 99% error based on the t distribution. The dataset 64 # is desribed as getMean() +/- getError(). 65 sub getError { 66 my $self = shift; 67 return $self->{_error} * $self->{_scale}; 68 } 69 70 # Divide two Datasets and return a new one, maintaining the 71 # mean+/-error. The new Dataset has no data points. 72 sub divide { 73 my $self = shift; 74 my $rhs = shift; 75 76 my $minratio = ($self->{_mean} - $self->{_error}) / 77 ($rhs->{_mean} + $rhs->{_error}); 78 my $maxratio = ($self->{_mean} + $self->{_error}) / 79 ($rhs->{_mean} - $rhs->{_error}); 80 81 my $result = Dataset->new(); 82 $result->{_mean} = ($minratio + $maxratio) / 2; 83 $result->{_error} = $result->{_mean} - $minratio; 84 $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 85 $result; 86 } 87 88 1; 89