Home | History | Annotate | Download | only in statistical_analysis
      1 #!/usr/bin/env python
      2 # Copyright 2016 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Tests for results_stats."""
      7 
      8 import os
      9 import sys
     10 
     11 import unittest
     12 
     13 try:
     14   import numpy as np
     15 except ImportError:
     16   np = None
     17 
     18 sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__),
     19                                                 '..')))
     20 from statistical_analysis import results_stats
     21 
     22 
     23 class StatisticalBenchmarkResultsAnalysisTest(unittest.TestCase):
     24   """Unit testing of several functions in results_stats."""
     25 
     26   def testGetChartsFromBenchmarkResultJson(self):
     27     """Unit test for errors raised when getting the charts element.
     28 
     29     Also makes sure that the 'trace' element is deleted if it exists.
     30     """
     31     input_json_wrong_format = {'charts_wrong': {}}
     32     input_json_empty = {'charts': {}}
     33     with self.assertRaises(ValueError):
     34       (results_stats.GetChartsFromBenchmarkResultJson(input_json_wrong_format))
     35     with self.assertRaises(ValueError):
     36       (results_stats.GetChartsFromBenchmarkResultJson(input_json_empty))
     37 
     38     input_json_with_trace = {'charts':
     39                              {'trace': {},
     40                               'Ex_metric_1':
     41                               {'Ex_page_1': {'type': 'list_of_scalar_values',
     42                                              'values': [1, 2]},
     43                                'Ex_page_2': {'type': 'histogram',
     44                                              'values': [1, 2]}},
     45                               'Ex_metric_2':
     46                               {'Ex_page_1': {'type': 'list_of_scalar_values'},
     47                                'Ex_page_2': {'type': 'list_of_scalar_values',
     48                                              'values': [1, 2]}}}}
     49 
     50     output = (results_stats.
     51               GetChartsFromBenchmarkResultJson(input_json_with_trace))
     52     expected_output = {'Ex_metric_1':
     53                        {'Ex_page_1': {'type': 'list_of_scalar_values',
     54                                       'values': [1, 2]}},
     55                        'Ex_metric_2':
     56                        {'Ex_page_2': {'type': 'list_of_scalar_values',
     57                                       'values': [1, 2]}}}
     58     self.assertEqual(output, expected_output)
     59 
     60   def testCreateBenchmarkResultDict(self):
     61     """Unit test for benchmark result dict created from a benchmark json.
     62 
     63     Creates a json of the format created by tools/perf/run_benchmark and then
     64     compares the output dict against an expected predefined output dict.
     65     """
     66     metric_names = ['messageloop_start_time',
     67                     'open_tabs_time',
     68                     'window_display_time']
     69     metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]
     70 
     71     input_json = {'charts': {}}
     72     for metric, metric_vals in zip(metric_names, metric_values):
     73       input_json['charts'][metric] = {'summary':
     74                                       {'values': metric_vals,
     75                                        'type': 'list_of_scalar_values'}}
     76 
     77     output = results_stats.CreateBenchmarkResultDict(input_json)
     78     expected_output = {'messageloop_start_time': [55, 72, 60],
     79                        'open_tabs_time': [54, 42, 65],
     80                        'window_display_time': [44, 89]}
     81 
     82     self.assertEqual(output, expected_output)
     83 
     84   def testCreatePagesetBenchmarkResultDict(self):
     85     """Unit test for pageset benchmark result dict created from benchmark json.
     86 
     87     Creates a json of the format created by tools/perf/run_benchmark when it
     88     includes a pageset and then compares the output dict against an expected
     89     predefined output dict.
     90     """
     91     metric_names = ['messageloop_start_time',
     92                     'open_tabs_time',
     93                     'window_display_time']
     94     metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]
     95     page_names = ['Ex_page_1', 'Ex_page_2']
     96 
     97     input_json = {'charts': {}}
     98     for metric, metric_vals in zip(metric_names, metric_values):
     99       input_json['charts'][metric] = {'summary':
    100                                       {'values': [0, 1, 2, 3],
    101                                        'type': 'list_of_scalar_values'}}
    102       for page in page_names:
    103         input_json['charts'][metric][page] = {'values': metric_vals,
    104                                               'type': 'list_of_scalar_values'}
    105 
    106     output = results_stats.CreatePagesetBenchmarkResultDict(input_json)
    107     expected_output = {'messageloop_start_time': {'Ex_page_1': [55, 72, 60],
    108                                                   'Ex_page_2': [55, 72, 60]},
    109                        'open_tabs_time': {'Ex_page_1': [54, 42, 65],
    110                                           'Ex_page_2': [54, 42, 65]},
    111                        'window_display_time': {'Ex_page_1': [44, 89],
    112                                                'Ex_page_2': [44, 89]}}
    113 
    114     self.assertEqual(output, expected_output)
    115 
    116   def testCombinePValues(self):
    117     """Unit test for Fisher's Method that combines multiple p-values."""
    118     test_p_values = [0.05, 0.04, 0.10, 0.07, 0.01]
    119 
    120     expected_output = 0.00047334256271885721
    121     output = results_stats.CombinePValues(test_p_values)
    122 
    123     self.assertEqual(output, expected_output)
    124 
    125   def CreateRandomNormalDistribution(self, mean=0, size=30):
    126     """Creates two pseudo random samples for testing in multiple methods."""
    127     if not np:
    128       raise ImportError('This function requires Numpy.')
    129 
    130     np.random.seed(0)
    131     sample = np.random.normal(loc=mean, scale=1, size=size)
    132 
    133     return sample
    134 
    135   def testIsNormallyDistributed(self):
    136     """Unit test for values returned when testing for normality."""
    137     if not np:
    138       self.skipTest("Numpy is not installed.")
    139 
    140     test_samples = [self.CreateRandomNormalDistribution(0),
    141                     self.CreateRandomNormalDistribution(1)]
    142 
    143     expected_outputs = [(True, 0.5253966450691223),
    144                         (True, 0.5253913402557373)]
    145     for sample, expected_output in zip(test_samples, expected_outputs):
    146       output = results_stats.IsNormallyDistributed(sample)
    147 
    148       self.assertEqual(output, expected_output)
    149 
    150   def testAreSamplesDifferent(self):
    151     """Unit test for values returned after running the statistical tests.
    152 
    153     Creates two pseudo-random normally distributed samples to run the
    154     statistical tests and compares the resulting answer and p-value against
    155     their pre-calculated values.
    156     """
    157     test_samples = [3 * [0, 0, 2, 4, 4], 3 * [5, 5, 7, 9, 9]]
    158     with self.assertRaises(results_stats.SampleSizeError):
    159       results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
    160                                         test=results_stats.MANN)
    161     with self.assertRaises(results_stats.NonNormalSampleError):
    162       results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
    163                                         test=results_stats.WELCH)
    164 
    165     test_samples_equal = (20 * [1], 20 * [1])
    166     expected_output_equal = (False, 1.0)
    167     output_equal = results_stats.AreSamplesDifferent(test_samples_equal[0],
    168                                                      test_samples_equal[1],
    169                                                      test=results_stats.MANN)
    170     self.assertEqual(output_equal, expected_output_equal)
    171 
    172     if not np:
    173       self.skipTest("Numpy is not installed.")
    174 
    175     test_samples = [self.CreateRandomNormalDistribution(0),
    176                     self.CreateRandomNormalDistribution(1)]
    177     test_options = results_stats.ALL_TEST_OPTIONS
    178 
    179     expected_outputs = [(True, 2 * 0.00068516628052438266),
    180                         (True, 0.0017459498829507842),
    181                         (True, 0.00084765230478226514)]
    182 
    183     for test, expected_output in zip(test_options, expected_outputs):
    184       output = results_stats.AreSamplesDifferent(test_samples[0],
    185                                                  test_samples[1],
    186                                                  test=test)
    187       self.assertEqual(output, expected_output)
    188 
    189   def testAssertThatKeysMatch(self):
    190     """Unit test for exception raised when input dicts' metrics don't match."""
    191     differing_input_dicts = [{'messageloop_start_time': [55, 72, 60],
    192                               'display_time': [44, 89]},
    193                              {'messageloop_start_time': [55, 72, 60]}]
    194     with self.assertRaises(results_stats.DictMismatchError):
    195       results_stats.AssertThatKeysMatch(differing_input_dicts[0],
    196                                         differing_input_dicts[1])
    197 
    198   def testAreBenchmarkResultsDifferent(self):
    199     """Unit test for statistical test outcome dict."""
    200     test_input_dicts = [{'open_tabs_time':
    201                          self.CreateRandomNormalDistribution(0),
    202                          'display_time':
    203                          self.CreateRandomNormalDistribution(0)},
    204                         {'open_tabs_time':
    205                          self.CreateRandomNormalDistribution(0),
    206                          'display_time':
    207                          self.CreateRandomNormalDistribution(1)}]
    208     test_options = results_stats.ALL_TEST_OPTIONS
    209 
    210     expected_outputs = [{'open_tabs_time': (False, 2 * 0.49704973080841425),
    211                          'display_time': (True, 2 * 0.00068516628052438266)},
    212                         {'open_tabs_time': (False, 1.0),
    213                          'display_time': (True, 0.0017459498829507842)},
    214                         {'open_tabs_time': (False, 1.0),
    215                          'display_time': (True, 0.00084765230478226514)}]
    216 
    217     for test, expected_output in zip(test_options, expected_outputs):
    218       output = results_stats.AreBenchmarkResultsDifferent(test_input_dicts[0],
    219                                                           test_input_dicts[1],
    220                                                           test=test)
    221       self.assertEqual(output, expected_output)
    222 
    223   def testArePagesetBenchmarkResultsDifferent(self):
    224     """Unit test for statistical test outcome dict."""
    225     distributions = (self.CreateRandomNormalDistribution(0),
    226                      self.CreateRandomNormalDistribution(1))
    227     test_input_dicts = ({'open_tabs_time': {'Ex_page_1': distributions[0],
    228                                             'Ex_page_2': distributions[0]},
    229                          'display_time': {'Ex_page_1': distributions[1],
    230                                           'Ex_page_2': distributions[1]}},
    231                         {'open_tabs_time': {'Ex_page_1': distributions[0],
    232                                             'Ex_page_2': distributions[1]},
    233                          'display_time': {'Ex_page_1': distributions[1],
    234                                           'Ex_page_2': distributions[0]}})
    235     test_options = results_stats.ALL_TEST_OPTIONS
    236 
    237     expected_outputs = ({'open_tabs_time':  # Mann.
    238                          {'Ex_page_1': (False, 2 * 0.49704973080841425),
    239                           'Ex_page_2': (True, 2 * 0.00068516628052438266)},
    240                          'display_time':
    241                          {'Ex_page_1': (False, 2 * 0.49704973080841425),
    242                           'Ex_page_2': (True, 2 * 0.00068516628052438266)}},
    243                         {'open_tabs_time':  # Kolmogorov.
    244                          {'Ex_page_1': (False, 1.0),
    245                           'Ex_page_2': (True, 0.0017459498829507842)},
    246                          'display_time':
    247                          {'Ex_page_1': (False, 1.0),
    248                           'Ex_page_2': (True, 0.0017459498829507842)}},
    249                         {'open_tabs_time':  # Welch.
    250                          {'Ex_page_1': (False, 1.0),
    251                           'Ex_page_2': (True, 0.00084765230478226514)},
    252                          'display_time':
    253                          {'Ex_page_1': (False, 1.0),
    254                           'Ex_page_2': (True, 0.00084765230478226514)}})
    255 
    256     for test, expected_output in zip(test_options, expected_outputs):
    257       output = (results_stats.
    258                 ArePagesetBenchmarkResultsDifferent(test_input_dicts[0],
    259                                                     test_input_dicts[1],
    260                                                     test=test))
    261       self.assertEqual(output, expected_output)
    262 
    263 
    264 if __name__ == '__main__':
    265   sys.exit(unittest.main())
    266