Home | History | Annotate | Download | only in perldriver
      1 #!/usr/local/bin/perl
      2 #  ********************************************************************
      3 #  * COPYRIGHT:
      4 #  * Copyright (c) 2002, International Business Machines Corporation and
      5 #  * others. All Rights Reserved.
      6 #  ********************************************************************
      7 
      8 package Dataset;
      9 use Statistics::Descriptive;
     10 use Statistics::Distributions;
     11 use strict;
     12 
     13 # Create a new Dataset with the given data.
     14 sub new {
     15     my ($class) = shift;
     16     my $self = bless {
     17         _data => \@_,
     18         _scale => 1.0,
     19         _mean => 0.0,
     20         _error => 0.0,
     21     }, $class;
     22 
     23     my $n = @_;
     24     
     25     if ($n >= 1) {
     26         my $stats = Statistics::Descriptive::Full->new();
     27         $stats->add_data(@{$self->{_data}});
     28         $self->{_mean} = $stats->mean();
     29 
     30         if ($n >= 2) {
     31             # Use a t distribution rather than Gaussian because (a) we
     32             # assume an underlying normal dist, (b) we do not know the
     33             # standard deviation -- we estimate it from the data, and (c)
     34             # we MAY have a small sample size (also works for large n).
     35             my $t = Statistics::Distributions::tdistr($n-1, 0.005);
     36             $self->{_error} = $t * $stats->standard_deviation();
     37         }
     38     }
     39 
     40     $self;
     41 }
     42 
     43 # Set a scaling factor for all data; 1.0 means no scaling.
     44 # Scale must be > 0.
     45 sub setScale {
     46     my ($self, $scale) = @_;
     47     $self->{_scale} = $scale;
     48 }
     49 
     50 # Multiply the scaling factor by a value.
     51 sub scaleBy {
     52     my ($self, $a) = @_;
     53     $self->{_scale} *= $a;
     54 }
     55 
     56 # Return the mean.
     57 sub getMean {
     58     my $self = shift;
     59     return $self->{_mean} * $self->{_scale};
     60 }
     61 
     62 # Return a 99% error based on the t distribution.  The dataset
     63 # is desribed as getMean() +/- getError().
     64 sub getError {
     65     my $self = shift;
     66     return $self->{_error} * $self->{_scale};
     67 }
     68 
     69 # Divide two Datasets and return a new one, maintaining the
     70 # mean+/-error.  The new Dataset has no data points.
     71 sub divide {
     72     my $self = shift;
     73     my $rhs = shift;
     74     
     75     my $minratio = ($self->{_mean} - $self->{_error}) /
     76                    ($rhs->{_mean} + $rhs->{_error});
     77     my $maxratio = ($self->{_mean} + $self->{_error}) /
     78                    ($rhs->{_mean} - $rhs->{_error});
     79 
     80     my $result = Dataset->new();
     81     $result->{_mean} = ($minratio + $maxratio) / 2;
     82     $result->{_error} = $result->{_mean} - $minratio;
     83     $result->{_scale} = $self->{_scale} / $rhs->{_scale};
     84     $result;
     85 }
     86 
     87 # subtracts two Datasets and return a new one, maintaining the
     88 # mean+/-error.  The new Dataset has no data points.
     89 sub subtract {
     90     my $self = shift;
     91     my $rhs = shift;
     92     
     93     my $result = Dataset->new();
     94     $result->{_mean} = $self->{_mean} - $rhs->{_mean};
     95     $result->{_error} = $self->{_error} + $rhs->{_error};
     96     $result->{_scale} = $self->{_scale};
     97     $result;
     98 }
     99 
    100 # adds two Datasets and return a new one, maintaining the
    101 # mean+/-error.  The new Dataset has no data points.
    102 sub add {
    103     my $self = shift;
    104     my $rhs = shift;
    105     
    106     my $result = Dataset->new();
    107     $result->{_mean} = $self->{_mean} + $rhs->{_mean};
    108     $result->{_error} = $self->{_error} + $rhs->{_error};
    109     $result->{_scale} = $self->{_scale};
    110     $result;
    111 }
    112 
    113 # Divides a dataset by a scalar.
    114 # The new Dataset has no data points.
    115 sub divideByScalar {
    116     my $self = shift;
    117     my $s = shift;
    118     
    119     my $result = Dataset->new();
    120     $result->{_mean} = $self->{_mean}/$s;
    121     $result->{_error} = $self->{_error}/$s;
    122     $result->{_scale} = $self->{_scale};
    123     $result;
    124 }
    125 
    126 # Divides a dataset by a scalar.
    127 # The new Dataset has no data points.
    128 sub multiplyByScalar {
    129     my $self = shift;
    130     my $s = shift;
    131     
    132     my $result = Dataset->new();
    133     $result->{_mean} = $self->{_mean}*$s;
    134     $result->{_error} = $self->{_error}*$s;
    135     $result->{_scale} = $self->{_scale};
    136     $result;
    137 }
    138 
    139 1;
    140