Home | History | Annotate | Download | only in perf-tests
      1 #/**
      2 # *  2016 and later: Unicode, Inc. and others.
      3 # * License & terms of use: http://www.unicode.org/copyright.html#License
      4 # *******************************************************************************
      5 # * Copyright (C) 2002-2004, International Business Machines Corporation and    *
      6 # * others. All Rights Reserved.                                                *
      7 # *******************************************************************************
      8 # */
      9 package Dataset;
     10 use Statistics::Descriptive;
     11 use Statistics::Distributions;
     12 use strict;
     13 
     14 # Create a new Dataset with the given data.
     15 sub new {
     16     my ($class) = shift;
     17     my $self = bless {
     18         _data => \@_,
     19         _scale => 1.0,
     20         _mean => 0.0,
     21         _error => 0.0,
     22     }, $class;
     23 
     24     my $n = @_;
     25     
     26     if ($n >= 1) {
     27         my $stats = Statistics::Descriptive::Full->new();
     28         $stats->add_data(@{$self->{_data}});
     29         $self->{_mean} = $stats->mean();
     30 
     31         if ($n >= 2) {
     32             # Use a t distribution rather than Gaussian because (a) we
     33             # assume an underlying normal dist, (b) we do not know the
     34             # standard deviation -- we estimate it from the data, and (c)
     35             # we MAY have a small sample size (also works for large n).
     36             my $t = Statistics::Distributions::tdistr($n-1, 0.005);
     37             $self->{_error} = $t * $stats->standard_deviation();
     38         }
     39     }
     40 
     41     $self;
     42 }
     43 
     44 # Set a scaling factor for all data; 1.0 means no scaling.
     45 # Scale must be > 0.
     46 sub setScale {
     47     my ($self, $scale) = @_;
     48     $self->{_scale} = $scale;
     49 }
     50 
     51 # Multiply the scaling factor by a value.
     52 sub scaleBy {
     53     my ($self, $a) = @_;
     54     $self->{_scale} *= $a;
     55 }
     56 
     57 # Return the mean.
     58 sub getMean {
     59     my $self = shift;
     60     return $self->{_mean} * $self->{_scale};
     61 }
     62 
     63 # Return a 99% error based on the t distribution.  The dataset
     64 # is desribed as getMean() +/- getError().
     65 sub getError {
     66     my $self = shift;
     67     return $self->{_error} * $self->{_scale};
     68 }
     69 
     70 # Divide two Datasets and return a new one, maintaining the
     71 # mean+/-error.  The new Dataset has no data points.
     72 sub divide {
     73     my $self = shift;
     74     my $rhs = shift;
     75     
     76     my $minratio = ($self->{_mean} - $self->{_error}) /
     77                    ($rhs->{_mean} + $rhs->{_error});
     78     my $maxratio = ($self->{_mean} + $self->{_error}) /
     79                    ($rhs->{_mean} - $rhs->{_error});
     80 
     81     my $result = Dataset->new();
     82     $result->{_mean} = ($minratio + $maxratio) / 2;
     83     $result->{_error} = $result->{_mean} - $minratio;
     84     $result->{_scale} = $self->{_scale} / $rhs->{_scale};
     85     $result;
     86 }
     87 
     88 1;
     89