Home | History | Annotate | Download | only in perf
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Runs perf tests.
      6 
      7 Our buildbot infrastructure requires each slave to run steps serially.
      8 This is sub-optimal for android, where these steps can run independently on
      9 multiple connected devices.
     10 
     11 The buildbots will run this script multiple times per cycle:
     12 - First: all steps listed in --steps in will be executed in parallel using all
     13 connected devices. Step results will be pickled to disk. Each step has a unique
     14 name. The result code will be ignored if the step name is listed in
     15 --flaky-steps.
     16 The buildbot will treat this step as a regular step, and will not process any
     17 graph data.
     18 
     19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file
     20 with the step results previously saved. The buildbot will then process the graph
     21 data accordingly.
     22 
     23 The JSON steps file contains a dictionary in the format:
     24 { "version": int,
     25   "steps": {
     26     "foo": {
     27       "device_affinity": int,
     28       "cmd": "script_to_execute foo"
     29     },
     30     "bar": {
     31       "device_affinity": int,
     32       "cmd": "script_to_execute bar"
     33     }
     34   }
     35 }
     36 
     37 The JSON flaky steps file contains a list with step names which results should
     38 be ignored:
     39 [
     40   "step_name_foo",
     41   "step_name_bar"
     42 ]
     43 
     44 Note that script_to_execute necessarily have to take at least the following
     45 option:
     46   --device: the serial number to be passed to all adb commands.
     47 """
     48 
     49 import collections
     50 import datetime
     51 import json
     52 import logging
     53 import os
     54 import pickle
     55 import sys
     56 import threading
     57 import time
     58 
     59 from pylib import cmd_helper
     60 from pylib import constants
     61 from pylib import forwarder
     62 from pylib.base import base_test_result
     63 from pylib.base import base_test_runner
     64 
     65 
     66 def OutputJsonList(json_input, json_output):
     67   with file(json_input, 'r') as i:
     68     all_steps = json.load(i)
     69   step_names = all_steps['steps'].keys()
     70   with file(json_output, 'w') as o:
     71     o.write(json.dumps(step_names))
     72   return 0
     73 
     74 
     75 def PrintTestOutput(test_name):
     76   """Helper method to print the output of previously executed test_name.
     77 
     78   Args:
     79     test_name: name of the test that has been previously executed.
     80 
     81   Returns:
     82     exit code generated by the test step.
     83   """
     84   file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
     85   if not os.path.exists(file_name):
     86     logging.error('File not found %s', file_name)
     87     return 1
     88 
     89   with file(file_name, 'r') as f:
     90     persisted_result = pickle.loads(f.read())
     91   logging.info('*' * 80)
     92   logging.info('Output from:')
     93   logging.info(persisted_result['cmd'])
     94   logging.info('*' * 80)
     95   print persisted_result['output']
     96 
     97   return persisted_result['exit_code']
     98 
     99 
    100 def PrintSummary(test_names):
    101   logging.info('*' * 80)
    102   logging.info('Sharding summary')
    103   device_total_time = collections.defaultdict(int)
    104   for test_name in test_names:
    105     file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
    106     if not os.path.exists(file_name):
    107       logging.info('%s : No status file found', test_name)
    108       continue
    109     with file(file_name, 'r') as f:
    110       result = pickle.loads(f.read())
    111     logging.info('%s : exit_code=%d in %d secs at %s',
    112                  result['name'], result['exit_code'], result['total_time'],
    113                  result['device'])
    114     device_total_time[result['device']] += result['total_time']
    115   for device, device_time in device_total_time.iteritems():
    116     logging.info('Total for device %s : %d secs', device, device_time)
    117   logging.info('Total steps time: %d secs', sum(device_total_time.values()))
    118 
    119 
    120 class _HeartBeatLogger(object):
    121   # How often to print the heartbeat on flush().
    122   _PRINT_INTERVAL = 30.0
    123 
    124   def __init__(self):
    125     """A file-like class for keeping the buildbot alive."""
    126     self._len = 0
    127     self._tick = time.time()
    128     self._stopped = threading.Event()
    129     self._timer = threading.Thread(target=self._runner)
    130     self._timer.start()
    131 
    132   def _runner(self):
    133     while not self._stopped.is_set():
    134       self.flush()
    135       self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL)
    136 
    137   def write(self, data):
    138     self._len += len(data)
    139 
    140   def flush(self):
    141     now = time.time()
    142     if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL:
    143       self._tick = now
    144       print '--single-step output length %d' % self._len
    145       sys.stdout.flush()
    146 
    147   def stop(self):
    148     self._stopped.set()
    149 
    150 
    151 class TestRunner(base_test_runner.BaseTestRunner):
    152   def __init__(self, test_options, device, shard_index, max_shard, tests,
    153       flaky_tests):
    154     """A TestRunner instance runs a perf test on a single device.
    155 
    156     Args:
    157       test_options: A PerfOptions object.
    158       device: Device to run the tests.
    159       shard_index: the index of this device.
    160       max_shards: the maximum shard index.
    161       tests: a dict mapping test_name to command.
    162       flaky_tests: a list of flaky test_name.
    163     """
    164     super(TestRunner, self).__init__(device, None, 'Release')
    165     self._options = test_options
    166     self._shard_index = shard_index
    167     self._max_shard = max_shard
    168     self._tests = tests
    169     self._flaky_tests = flaky_tests
    170 
    171   @staticmethod
    172   def _IsBetter(result):
    173     if result['actual_exit_code'] == 0:
    174       return True
    175     pickled = os.path.join(constants.PERF_OUTPUT_DIR,
    176                            result['name'])
    177     if not os.path.exists(pickled):
    178       return True
    179     with file(pickled, 'r') as f:
    180       previous = pickle.loads(f.read())
    181     return result['actual_exit_code'] < previous['actual_exit_code']
    182 
    183   @staticmethod
    184   def _SaveResult(result):
    185     if TestRunner._IsBetter(result):
    186       with file(os.path.join(constants.PERF_OUTPUT_DIR,
    187                              result['name']), 'w') as f:
    188         f.write(pickle.dumps(result))
    189 
    190   def _CheckDeviceAffinity(self, test_name):
    191     """Returns True if test_name has affinity for this shard."""
    192     affinity = (self._tests['steps'][test_name]['device_affinity'] %
    193                 self._max_shard)
    194     if self._shard_index == affinity:
    195       return True
    196     logging.info('Skipping %s on %s (affinity is %s, device is %s)',
    197                  test_name, self.device_serial, affinity, self._shard_index)
    198     return False
    199 
    200   def _LaunchPerfTest(self, test_name):
    201     """Runs a perf test.
    202 
    203     Args:
    204       test_name: the name of the test to be executed.
    205 
    206     Returns:
    207       A tuple containing (Output, base_test_result.ResultType)
    208     """
    209     if not self._CheckDeviceAffinity(test_name):
    210       return '', base_test_result.ResultType.PASS
    211 
    212     try:
    213       logging.warning('Unmapping device ports')
    214       forwarder.Forwarder.UnmapAllDevicePorts(self.device)
    215       self.device.old_interface.RestartAdbdOnDevice()
    216     except Exception as e:
    217       logging.error('Exception when tearing down device %s', e)
    218 
    219     cmd = ('%s --device %s' %
    220            (self._tests['steps'][test_name]['cmd'],
    221             self.device_serial))
    222     logging.info('%s : %s', test_name, cmd)
    223     start_time = datetime.datetime.now()
    224 
    225     timeout = 5400
    226     if self._options.no_timeout:
    227       timeout = None
    228     full_cmd = cmd
    229     if self._options.dry_run:
    230       full_cmd = 'echo %s' % cmd
    231 
    232     logfile = sys.stdout
    233     if self._options.single_step:
    234       # Just print a heart-beat so that the outer buildbot scripts won't timeout
    235       # without response.
    236       logfile = _HeartBeatLogger()
    237     cwd = os.path.abspath(constants.DIR_SOURCE_ROOT)
    238     if full_cmd.startswith('src/'):
    239       cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir))
    240     try:
    241       exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout(
    242           full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile)
    243     except cmd_helper.TimeoutError as e:
    244       exit_code = -1
    245       output = str(e)
    246     finally:
    247       if self._options.single_step:
    248         logfile.stop()
    249     end_time = datetime.datetime.now()
    250     if exit_code is None:
    251       exit_code = -1
    252     logging.info('%s : exit_code=%d in %d secs at %s',
    253                  test_name, exit_code, (end_time - start_time).seconds,
    254                  self.device_serial)
    255     result_type = base_test_result.ResultType.FAIL
    256     if exit_code == 0:
    257       result_type = base_test_result.ResultType.PASS
    258     actual_exit_code = exit_code
    259     if test_name in self._flaky_tests:
    260       # The exit_code is used at the second stage when printing the
    261       # test output. If the test is flaky, force to "0" to get that step green
    262       # whilst still gathering data to the perf dashboards.
    263       # The result_type is used by the test_dispatcher to retry the test.
    264       exit_code = 0
    265 
    266     persisted_result = {
    267         'name': test_name,
    268         'output': output,
    269         'exit_code': exit_code,
    270         'actual_exit_code': actual_exit_code,
    271         'result_type': result_type,
    272         'total_time': (end_time - start_time).seconds,
    273         'device': self.device_serial,
    274         'cmd': cmd,
    275     }
    276     self._SaveResult(persisted_result)
    277 
    278     return (output, result_type)
    279 
    280   def RunTest(self, test_name):
    281     """Run a perf test on the device.
    282 
    283     Args:
    284       test_name: String to use for logging the test result.
    285 
    286     Returns:
    287       A tuple of (TestRunResults, retry).
    288     """
    289     _, result_type = self._LaunchPerfTest(test_name)
    290     results = base_test_result.TestRunResults()
    291     results.AddResult(base_test_result.BaseTestResult(test_name, result_type))
    292     retry = None
    293     if not results.DidRunPass():
    294       retry = test_name
    295     return results, retry
    296