Home | History | Annotate | Download | only in perf-tests
      1 #/**
      2 # *******************************************************************************
      3 # * Copyright (C) 2002-2004, International Business Machines Corporation and    *
      4 # * others. All Rights Reserved.                                                *
      5 # *******************************************************************************
      6 # */
      7 package Dataset;
      8 use Statistics::Descriptive;
      9 use Statistics::Distributions;
     10 use strict;
     11 
     12 # Create a new Dataset with the given data.
     13 sub new {
     14     my ($class) = shift;
     15     my $self = bless {
     16         _data => \@_,
     17         _scale => 1.0,
     18         _mean => 0.0,
     19         _error => 0.0,
     20     }, $class;
     21 
     22     my $n = @_;
     23     
     24     if ($n >= 1) {
     25         my $stats = Statistics::Descriptive::Full->new();
     26         $stats->add_data(@{$self->{_data}});
     27         $self->{_mean} = $stats->mean();
     28 
     29         if ($n >= 2) {
     30             # Use a t distribution rather than Gaussian because (a) we
     31             # assume an underlying normal dist, (b) we do not know the
     32             # standard deviation -- we estimate it from the data, and (c)
     33             # we MAY have a small sample size (also works for large n).
     34             my $t = Statistics::Distributions::tdistr($n-1, 0.005);
     35             $self->{_error} = $t * $stats->standard_deviation();
     36         }
     37     }
     38 
     39     $self;
     40 }
     41 
     42 # Set a scaling factor for all data; 1.0 means no scaling.
     43 # Scale must be > 0.
     44 sub setScale {
     45     my ($self, $scale) = @_;
     46     $self->{_scale} = $scale;
     47 }
     48 
     49 # Multiply the scaling factor by a value.
     50 sub scaleBy {
     51     my ($self, $a) = @_;
     52     $self->{_scale} *= $a;
     53 }
     54 
     55 # Return the mean.
     56 sub getMean {
     57     my $self = shift;
     58     return $self->{_mean} * $self->{_scale};
     59 }
     60 
     61 # Return a 99% error based on the t distribution.  The dataset
     62 # is desribed as getMean() +/- getError().
     63 sub getError {
     64     my $self = shift;
     65     return $self->{_error} * $self->{_scale};
     66 }
     67 
     68 # Divide two Datasets and return a new one, maintaining the
     69 # mean+/-error.  The new Dataset has no data points.
     70 sub divide {
     71     my $self = shift;
     72     my $rhs = shift;
     73     
     74     my $minratio = ($self->{_mean} - $self->{_error}) /
     75                    ($rhs->{_mean} + $rhs->{_error});
     76     my $maxratio = ($self->{_mean} + $self->{_error}) /
     77                    ($rhs->{_mean} - $rhs->{_error});
     78 
     79     my $result = Dataset->new();
     80     $result->{_mean} = ($minratio + $maxratio) / 2;
     81     $result->{_error} = $result->{_mean} - $minratio;
     82     $result->{_scale} = $self->{_scale} / $rhs->{_scale};
     83     $result;
     84 }
     85 
     86 1;
     87