Home | History | Annotate | Download | only in perldriver
      1 #!/usr/local/bin/perl
      2 #  ********************************************************************
      3 #  * Copyright (C) 2016 and later: Unicode, Inc. and others.
      4 #  * License & terms of use: http://www.unicode.org/copyright.html#License
      5 #  ********************************************************************
      6 #  ********************************************************************
      7 #  * COPYRIGHT:
      8 #  * Copyright (c) 2002, International Business Machines Corporation and
      9 #  * others. All Rights Reserved.
     10 #  ********************************************************************
     11 
     12 package Dataset;
     13 use Statistics::Descriptive;
     14 use Statistics::Distributions;
     15 use strict;
     16 
     17 # Create a new Dataset with the given data.
     18 sub new {
     19     my ($class) = shift;
     20     my $self = bless {
     21         _data => \@_,
     22         _scale => 1.0,
     23         _mean => 0.0,
     24         _error => 0.0,
     25     }, $class;
     26 
     27     my $n = @_;
     28     
     29     if ($n >= 1) {
     30         my $stats = Statistics::Descriptive::Full->new();
     31         $stats->add_data(@{$self->{_data}});
     32         $self->{_mean} = $stats->mean();
     33 
     34         if ($n >= 2) {
     35             # Use a t distribution rather than Gaussian because (a) we
     36             # assume an underlying normal dist, (b) we do not know the
     37             # standard deviation -- we estimate it from the data, and (c)
     38             # we MAY have a small sample size (also works for large n).
     39             my $t = Statistics::Distributions::tdistr($n-1, 0.005);
     40             $self->{_error} = $t * $stats->standard_deviation();
     41         }
     42     }
     43 
     44     $self;
     45 }
     46 
     47 # Set a scaling factor for all data; 1.0 means no scaling.
     48 # Scale must be > 0.
     49 sub setScale {
     50     my ($self, $scale) = @_;
     51     $self->{_scale} = $scale;
     52 }
     53 
     54 # Multiply the scaling factor by a value.
     55 sub scaleBy {
     56     my ($self, $a) = @_;
     57     $self->{_scale} *= $a;
     58 }
     59 
     60 # Return the mean.
     61 sub getMean {
     62     my $self = shift;
     63     return $self->{_mean} * $self->{_scale};
     64 }
     65 
     66 # Return a 99% error based on the t distribution.  The dataset
     67 # is desribed as getMean() +/- getError().
     68 sub getError {
     69     my $self = shift;
     70     return $self->{_error} * $self->{_scale};
     71 }
     72 
     73 # Divide two Datasets and return a new one, maintaining the
     74 # mean+/-error.  The new Dataset has no data points.
     75 sub divide {
     76     my $self = shift;
     77     my $rhs = shift;
     78     
     79     my $minratio = ($self->{_mean} - $self->{_error}) /
     80                    ($rhs->{_mean} + $rhs->{_error});
     81     my $maxratio = ($self->{_mean} + $self->{_error}) /
     82                    ($rhs->{_mean} - $rhs->{_error});
     83 
     84     my $result = Dataset->new();
     85     $result->{_mean} = ($minratio + $maxratio) / 2;
     86     $result->{_error} = $result->{_mean} - $minratio;
     87     $result->{_scale} = $self->{_scale} / $rhs->{_scale};
     88     $result;
     89 }
     90 
     91 # subtracts two Datasets and return a new one, maintaining the
     92 # mean+/-error.  The new Dataset has no data points.
     93 sub subtract {
     94     my $self = shift;
     95     my $rhs = shift;
     96     
     97     my $result = Dataset->new();
     98     $result->{_mean} = $self->{_mean} - $rhs->{_mean};
     99     $result->{_error} = $self->{_error} + $rhs->{_error};
    100     $result->{_scale} = $self->{_scale};
    101     $result;
    102 }
    103 
    104 # adds two Datasets and return a new one, maintaining the
    105 # mean+/-error.  The new Dataset has no data points.
    106 sub add {
    107     my $self = shift;
    108     my $rhs = shift;
    109     
    110     my $result = Dataset->new();
    111     $result->{_mean} = $self->{_mean} + $rhs->{_mean};
    112     $result->{_error} = $self->{_error} + $rhs->{_error};
    113     $result->{_scale} = $self->{_scale};
    114     $result;
    115 }
    116 
    117 # Divides a dataset by a scalar.
    118 # The new Dataset has no data points.
    119 sub divideByScalar {
    120     my $self = shift;
    121     my $s = shift;
    122     
    123     my $result = Dataset->new();
    124     $result->{_mean} = $self->{_mean}/$s;
    125     $result->{_error} = $self->{_error}/$s;
    126     $result->{_scale} = $self->{_scale};
    127     $result;
    128 }
    129 
    130 # Divides a dataset by a scalar.
    131 # The new Dataset has no data points.
    132 sub multiplyByScalar {
    133     my $self = shift;
    134     my $s = shift;
    135     
    136     my $result = Dataset->new();
    137     $result->{_mean} = $self->{_mean}*$s;
    138     $result->{_error} = $self->{_error}*$s;
    139     $result->{_scale} = $self->{_scale};
    140     $result;
    141 }
    142 
    143 1;
    144