Home | History | Annotate | Download | only in auto_bisect
      1 # Copyright 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import os
      6 import re
      7 import shutil
      8 import unittest
      9 
     10 import bisect_perf_regression
     11 import bisect_results
     12 import source_control as source_control_module
     13 
     14 def _GetBisectPerformanceMetricsInstance():
     15   """Returns an instance of the BisectPerformanceMetrics class."""
     16   options_dict = {
     17     'debug_ignore_build': True,
     18     'debug_ignore_sync': True,
     19     'debug_ignore_perf_test': True,
     20     'command': 'fake_command',
     21     'metric': 'fake/metric',
     22     'good_revision': 280000,
     23     'bad_revision': 280005,
     24   }
     25   bisect_options = bisect_perf_regression.BisectOptions.FromDict(options_dict)
     26   source_control = source_control_module.DetermineAndCreateSourceControl(
     27       bisect_options)
     28   bisect_instance = bisect_perf_regression.BisectPerformanceMetrics(
     29       source_control, bisect_options)
     30   return bisect_instance
     31 
     32 
     33 class BisectPerfRegressionTest(unittest.TestCase):
     34   """Test case for other functions and classes in bisect-perf-regression.py."""
     35 
     36   def setUp(self):
     37     self.cwd = os.getcwd()
     38     os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__),
     39                                           os.path.pardir, os.path.pardir)))
     40 
     41   def tearDown(self):
     42     os.chdir(self.cwd)
     43 
     44   def _AssertConfidence(self, score, bad_values, good_values):
     45     """Checks whether the given sets of values have a given confidence score.
     46 
     47     The score represents our confidence that the two sets of values wouldn't
     48     be as different as they are just by chance; that is, that some real change
     49     occurred between the two sets of values.
     50 
     51     Args:
     52       score: Expected confidence score.
     53       bad_values: First list of numbers.
     54       good_values: Second list of numbers.
     55     """
     56     # ConfidenceScore takes a list of lists but these lists are flattened
     57     # inside the function.
     58     confidence = bisect_results.ConfidenceScore(
     59         [[v] for v in bad_values],
     60         [[v] for v in good_values])
     61     self.assertEqual(score, confidence)
     62 
     63   def testConfidenceScore_ZeroConfidence(self):
     64     # The good and bad sets contain the same values, so the confidence that
     65     # they're different should be zero.
     66     self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
     67 
     68   def testConfidenceScore_MediumConfidence(self):
     69     self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
     70 
     71   def testConfidenceScore_HighConfidence(self):
     72     self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
     73 
     74   def testConfidenceScore_VeryHighConfidence(self):
     75     # Confidence is high if the two sets of values have no internal variance.
     76     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
     77     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
     78 
     79   def testConfidenceScore_UnbalancedSampleSize(self):
     80     # The second set of numbers only contains one number, so confidence is 0.
     81     self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2], [1.4])
     82 
     83   def testConfidenceScore_EmptySample(self):
     84     # Confidence is zero if either or both samples are empty.
     85     self._AssertConfidence(0.0, [], [])
     86     self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
     87     self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
     88 
     89   def testConfidenceScore_FunctionalTestResults(self):
     90     self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
     91     self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
     92 
     93   def testConfidenceScore_RealWorldCases(self):
     94     """This method contains a set of data from actual bisect results.
     95 
     96     The confidence scores asserted below were all copied from the actual
     97     results, so the purpose of this test method is mainly to show what the
     98     results for real cases are, and compare when we change the confidence
     99     score function in the future.
    100     """
    101     self._AssertConfidence(80, [133, 130, 132, 132, 130, 129], [129, 129, 125])
    102     self._AssertConfidence(99.5, [668, 667], [498, 498, 499])
    103     self._AssertConfidence(80, [67, 68], [65, 65, 67])
    104     self._AssertConfidence(0, [514], [514])
    105     self._AssertConfidence(90, [616, 613, 607, 615], [617, 619, 619, 617])
    106     self._AssertConfidence(0, [3.5, 5.8, 4.7, 3.5, 3.6], [2.8])
    107     self._AssertConfidence(90, [3, 3, 3], [2, 2, 2, 3])
    108     self._AssertConfidence(0, [1999004, 1999627], [223355])
    109     self._AssertConfidence(90, [1040, 934, 961], [876, 875, 789])
    110     self._AssertConfidence(90, [309, 305, 304], [302, 302, 299, 303, 298])
    111 
    112   def testParseDEPSStringManually(self):
    113     """Tests DEPS parsing."""
    114     deps_file_contents = """
    115     vars = {
    116         'ffmpeg_hash':
    117              '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
    118         'webkit_url':
    119              'https://chromium.googlesource.com/chromium/blink.git',
    120         'git_url':
    121              'https://chromium.googlesource.com',
    122         'webkit_rev':
    123              '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
    124         'angle_revision':
    125              '74697cf2064c0a2c0d7e1b1b28db439286766a05'
    126     }"""
    127 
    128     # Should only expect SVN/git revisions to come through, and URLs should be
    129     # filtered out.
    130     expected_vars_dict = {
    131         'ffmpeg_hash': '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
    132         'webkit_rev': '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
    133         'angle_revision': '74697cf2064c0a2c0d7e1b1b28db439286766a05'
    134     }
    135     # Testing private function.
    136     # pylint: disable=W0212
    137     vars_dict = bisect_perf_regression._ParseRevisionsFromDEPSFileManually(
    138         deps_file_contents)
    139     self.assertEqual(vars_dict, expected_vars_dict)
    140 
    141   def _AssertParseResult(self, expected_values, result_string):
    142     """Asserts some values are parsed from a RESULT line."""
    143     results_template = ('RESULT other_chart: other_trace= 123 count\n'
    144                         'RESULT my_chart: my_trace= %(value)s\n')
    145     results = results_template % {'value': result_string}
    146     metric = ['my_chart', 'my_trace']
    147     # Testing private function.
    148     # pylint: disable=W0212
    149     values = bisect_perf_regression._TryParseResultValuesFromOutput(
    150         metric, results)
    151     self.assertEqual(expected_values, values)
    152 
    153   def testTryParseResultValuesFromOutput_WithSingleValue(self):
    154     """Tests result pattern <*>RESULT <graph>: <trace>= <value>"""
    155     self._AssertParseResult([66.88], '66.88 kb')
    156     self._AssertParseResult([66.88], '66.88 ')
    157     self._AssertParseResult([-66.88], '-66.88 kb')
    158     self._AssertParseResult([66], '66 kb')
    159     self._AssertParseResult([0.66], '.66 kb')
    160     self._AssertParseResult([], '. kb')
    161     self._AssertParseResult([], 'aaa kb')
    162 
    163   def testTryParseResultValuesFromOutput_WithMultiValue(self):
    164     """Tests result pattern <*>RESULT <graph>: <trace>= [<value>,<value>, ..]"""
    165     self._AssertParseResult([66.88], '[66.88] kb')
    166     self._AssertParseResult([66.88, 99.44], '[66.88, 99.44]kb')
    167     self._AssertParseResult([66.88, 99.44], '[ 66.88, 99.44 ]')
    168     self._AssertParseResult([-66.88, 99.44], '[-66.88, 99.44] kb')
    169     self._AssertParseResult([-66, 99], '[-66,99] kb')
    170     self._AssertParseResult([-66, 99], '[-66,99,] kb')
    171     self._AssertParseResult([-66, 0.99], '[-66,.99] kb')
    172     self._AssertParseResult([], '[] kb')
    173     self._AssertParseResult([], '[-66,abc] kb')
    174 
    175   def testTryParseResultValuesFromOutputWithMeanStd(self):
    176     """Tests result pattern <*>RESULT <graph>: <trace>= {<mean, std}"""
    177     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
    178     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
    179     self._AssertParseResult([33.22], '{33.22,3.6}kb')
    180     self._AssertParseResult([33.22], '{33.22,3.6} kb')
    181     self._AssertParseResult([33.22], '{ 33.22,3.6 }kb')
    182     self._AssertParseResult([-33.22], '{-33.22,3.6}kb')
    183     self._AssertParseResult([22], '{22,6}kb')
    184     self._AssertParseResult([.22], '{.22,6}kb')
    185     self._AssertParseResult([], '{.22,6, 44}kb')
    186     self._AssertParseResult([], '{}kb')
    187     self._AssertParseResult([], '{XYZ}kb')
    188 
    189   def _AssertCompatibleCommand(
    190       self, expected_command, original_command, revision, target_platform):
    191     """Tests the modification of the command that might be done.
    192 
    193     This modification to the command is done in order to get a Telemetry
    194     command that works; before some revisions, the browser name that Telemetry
    195     expects is different in some cases, but we want it to work anyway.
    196 
    197     Specifically, only for android:
    198       After r276628, only android-chrome-shell works.
    199       Prior to r274857, only android-chromium-testshell works.
    200       In the range [274857, 276628], both work.
    201     """
    202     bisect_options = bisect_perf_regression.BisectOptions()
    203     bisect_options.output_buildbot_annotations = None
    204     source_control = source_control_module.DetermineAndCreateSourceControl(
    205         bisect_options)
    206     bisect_instance = bisect_perf_regression.BisectPerformanceMetrics(
    207         source_control, bisect_options)
    208     bisect_instance.opts.target_platform = target_platform
    209     git_revision = bisect_instance.source_control.ResolveToRevision(
    210         revision, 'chromium', bisect_perf_regression.DEPOT_DEPS_NAME, 100)
    211     depot = 'chromium'
    212     command = bisect_instance.GetCompatibleCommand(
    213         original_command, git_revision, depot)
    214     self.assertEqual(expected_command, command)
    215 
    216   def testGetCompatibleCommand_ChangeToTestShell(self):
    217     # For revisions <= r274857, only android-chromium-testshell is used.
    218     self._AssertCompatibleCommand(
    219         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
    220         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
    221         274857, 'android')
    222 
    223   def testGetCompatibleCommand_ChangeToShell(self):
    224     # For revisions >= r276728, only android-chrome-shell can be used.
    225     self._AssertCompatibleCommand(
    226         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
    227         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
    228         276628, 'android')
    229 
    230   def testGetCompatibleCommand_NoChange(self):
    231     # For revisions < r276728, android-chromium-testshell can be used.
    232     self._AssertCompatibleCommand(
    233         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
    234         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
    235         274858, 'android')
    236     # For revisions > r274857, android-chrome-shell can be used.
    237     self._AssertCompatibleCommand(
    238         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
    239         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
    240         274858, 'android')
    241 
    242   def testGetCompatibleCommand_NonAndroidPlatform(self):
    243     # In most cases, there's no need to change Telemetry command.
    244     # For revisions >= r276728, only android-chrome-shell can be used.
    245     self._AssertCompatibleCommand(
    246         'tools/perf/run_benchmark -v --browser=release foo',
    247         'tools/perf/run_benchmark -v --browser=release foo',
    248         276628, 'chromium')
    249 
    250   # This method doesn't reference self; it fails if an error is thrown.
    251   # pylint: disable=R0201
    252   def testDryRun(self):
    253     """Does a dry run of the bisect script.
    254 
    255     This serves as a smoke test to catch errors in the basic execution of the
    256     script.
    257     """
    258     # Disable rmtree to avoid deleting local trees.
    259     old_rmtree = shutil.rmtree
    260     try:
    261       shutil.rmtree = lambda path, onerror: None
    262       bisect_instance = _GetBisectPerformanceMetricsInstance()
    263       results = bisect_instance.Run(bisect_instance.opts.command,
    264                                     bisect_instance.opts.bad_revision,
    265                                     bisect_instance.opts.good_revision,
    266                                     bisect_instance.opts.metric)
    267       bisect_instance.FormatAndPrintResults(results)
    268     finally:
    269       shutil.rmtree = old_rmtree
    270 
    271   def testGetCommitPosition(self):
    272     bisect_instance = _GetBisectPerformanceMetricsInstance()
    273     cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'
    274     self.assertEqual(
    275         291765, bisect_instance.source_control.GetCommitPosition(cp_git_rev))
    276 
    277     svn_git_rev = 'e6db23a037cad47299a94b155b95eebd1ee61a58'
    278     self.assertEqual(
    279         291467, bisect_instance.source_control.GetCommitPosition(svn_git_rev))
    280 
    281   def testGetCommitPositionForV8(self):
    282     bisect_instance = _GetBisectPerformanceMetricsInstance()
    283     v8_rev = '21d700eedcdd6570eff22ece724b63a5eefe78cb'
    284     depot_path = os.path.join(bisect_instance.src_cwd, 'v8')
    285     self.assertEqual(
    286         23634,
    287         bisect_instance.source_control.GetCommitPosition(v8_rev, depot_path))
    288 
    289   def testGetCommitPositionForWebKit(self):
    290     bisect_instance = _GetBisectPerformanceMetricsInstance()
    291     wk_rev = 'a94d028e0f2c77f159b3dac95eb90c3b4cf48c61'
    292     depot_path = os.path.join(bisect_instance.src_cwd, 'third_party', 'WebKit')
    293     self.assertEqual(
    294         181660,
    295         bisect_instance.source_control.GetCommitPosition(wk_rev, depot_path))
    296 
    297   def testUpdateDepsContent(self):
    298     bisect_instance = _GetBisectPerformanceMetricsInstance()
    299     deps_file = 'DEPS'
    300     # We are intentionally reading DEPS file contents instead of string literal
    301     # with few lines from DEPS because to check if the format we are expecting
    302     # to search is not changed in DEPS content.
    303     # TODO (prasadv): Add a separate test to validate the DEPS contents with the
    304     # format that bisect script expects.
    305     deps_contents = bisect_perf_regression.ReadStringFromFile(deps_file)
    306     deps_key = 'v8_revision'
    307     depot = 'v8'
    308     git_revision = 'a12345789a23456789a123456789a123456789'
    309     updated_content = bisect_instance.UpdateDepsContents(
    310         deps_contents, depot, git_revision, deps_key)
    311     self.assertIsNotNone(updated_content)
    312     ss = re.compile('["\']%s["\']: ["\']%s["\']' % (deps_key, git_revision))
    313     self.assertIsNotNone(re.search(ss, updated_content))
    314 
    315 
    316 class DepotDirectoryRegistryTest(unittest.TestCase):
    317 
    318   def setUp(self):
    319     self.old_chdir = os.chdir
    320     os.chdir = self.mockChdir
    321     self.old_depot_names = bisect_perf_regression.DEPOT_NAMES
    322     bisect_perf_regression.DEPOT_NAMES = ['mock_depot']
    323     self.old_depot_deps_name = bisect_perf_regression.DEPOT_DEPS_NAME
    324     bisect_perf_regression.DEPOT_DEPS_NAME = {'mock_depot': {'src': 'src/foo'}}
    325 
    326     self.registry = bisect_perf_regression.DepotDirectoryRegistry('/mock/src')
    327     self.cur_dir = None
    328 
    329   def tearDown(self):
    330     os.chdir = self.old_chdir
    331     bisect_perf_regression.DEPOT_NAMES = self.old_depot_names
    332     bisect_perf_regression.DEPOT_DEPS_NAME = self.old_depot_deps_name
    333 
    334   def mockChdir(self, new_dir):
    335     self.cur_dir = new_dir
    336 
    337   def testReturnsCorrectResultForChrome(self):
    338     self.assertEqual(self.registry.GetDepotDir('chromium'), '/mock/src')
    339 
    340   def testReturnsCorrectResultForChromeOS(self):
    341     self.assertEqual(self.registry.GetDepotDir('cros'), '/mock/src/tools/cros')
    342 
    343   def testUsesDepotSpecToInitializeRegistry(self):
    344     self.assertEqual(self.registry.GetDepotDir('mock_depot'), '/mock/src/foo')
    345 
    346   def testChangedTheDirectory(self):
    347     self.registry.ChangeToDepotDir('mock_depot')
    348     self.assertEqual(self.cur_dir, '/mock/src/foo')
    349 
    350 
    351 if __name__ == '__main__':
    352   unittest.main()
    353