Home | History | Annotate | Download | only in perf
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Runs perf tests.
      6 
      7 Our buildbot infrastructure requires each slave to run steps serially.
      8 This is sub-optimal for android, where these steps can run independently on
      9 multiple connected devices.
     10 
     11 The buildbots will run this script multiple times per cycle:
     12 - First: all steps listed in --steps in will be executed in parallel using all
     13 connected devices. Step results will be pickled to disk. Each step has a unique
     14 name. The result code will be ignored if the step name is listed in
     15 --flaky-steps.
     16 The buildbot will treat this step as a regular step, and will not process any
     17 graph data.
     18 
     19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file
     20 with the step results previously saved. The buildbot will then process the graph
     21 data accordingly.
     22 
     23 The JSON steps file contains a dictionary in the format:
     24 { "version": int,
     25   "steps": {
     26     "foo": {
     27       "device_affinity": int,
     28       "cmd": "script_to_execute foo"
     29     },
     30     "bar": {
     31       "device_affinity": int,
     32       "cmd": "script_to_execute bar"
     33     }
     34   }
     35 }
     36 
     37 The JSON flaky steps file contains a list with step names which results should
     38 be ignored:
     39 [
     40   "step_name_foo",
     41   "step_name_bar"
     42 ]
     43 
     44 Note that script_to_execute necessarily have to take at least the following
     45 option:
     46   --device: the serial number to be passed to all adb commands.
     47 """
     48 
     49 import collections
     50 import io
     51 import json
     52 import logging
     53 import os
     54 import pickle
     55 import re
     56 import shutil
     57 import sys
     58 import tempfile
     59 import threading
     60 import time
     61 import zipfile
     62 
     63 from devil.android import battery_utils
     64 from devil.android import device_errors
     65 from devil.android import forwarder
     66 from devil.constants import exit_codes
     67 from devil.utils import cmd_helper
     68 from pylib import constants
     69 from pylib.base import base_test_result
     70 from pylib.base import base_test_runner
     71 from pylib.constants import host_paths
     72 
     73 
     74 # Regex for the master branch commit position.
     75 _GIT_CR_POS_RE = re.compile(r'^Cr-Commit-Position: refs/heads/master@{#(\d+)}$')
     76 
     77 
     78 def _GetChromiumRevision():
     79   # pylint: disable=line-too-long
     80   """Get the git hash and commit position of the chromium master branch.
     81 
     82   See: https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/runtest.py#212
     83 
     84   Returns:
     85     A dictionary with 'revision' and 'commit_pos' keys.
     86   """
     87   # pylint: enable=line-too-long
     88   status, output = cmd_helper.GetCmdStatusAndOutput(
     89       ['git', 'log', '-n', '1', '--pretty=format:%H%n%B', 'HEAD'],
     90       host_paths.DIR_SOURCE_ROOT)
     91   revision = None
     92   commit_pos = None
     93   if not status:
     94     lines = output.splitlines()
     95     revision = lines[0]
     96     for line in reversed(lines):
     97       m = _GIT_CR_POS_RE.match(line.strip())
     98       if m:
     99         commit_pos = int(m.group(1))
    100         break
    101   return {'revision': revision, 'commit_pos': commit_pos}
    102 
    103 
    104 def GetPersistedResult(test_name):
    105   file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
    106   if not os.path.exists(file_name):
    107     logging.error('File not found %s', file_name)
    108     return None
    109 
    110   with file(file_name, 'r') as f:
    111     return pickle.loads(f.read())
    112 
    113 
    114 def OutputJsonList(json_input, json_output):
    115   with file(json_input, 'r') as i:
    116     all_steps = json.load(i)
    117 
    118   step_values = []
    119   for k, v in all_steps['steps'].iteritems():
    120     data = {'test': k, 'device_affinity': v['device_affinity']}
    121 
    122     persisted_result = GetPersistedResult(k)
    123     if persisted_result:
    124       data['start_time'] = persisted_result['start_time']
    125       data['end_time'] = persisted_result['end_time']
    126       data['total_time'] = persisted_result['total_time']
    127       data['has_archive'] = persisted_result['archive_bytes'] is not None
    128     step_values.append(data)
    129 
    130   with file(json_output, 'w') as o:
    131     o.write(json.dumps(step_values))
    132   return 0
    133 
    134 
    135 def PrintTestOutput(test_name, json_file_name=None, archive_file_name=None):
    136   """Helper method to print the output of previously executed test_name.
    137 
    138   Args:
    139     test_name: name of the test that has been previously executed.
    140     json_file_name: name of the file to output chartjson data to.
    141     archive_file_name: name of the file to write the compressed ZIP archive.
    142 
    143   Returns:
    144     exit code generated by the test step.
    145   """
    146   persisted_result = GetPersistedResult(test_name)
    147   if not persisted_result:
    148     return exit_codes.INFRA
    149   logging.info('*' * 80)
    150   logging.info('Output from:')
    151   logging.info(persisted_result['cmd'])
    152   logging.info('*' * 80)
    153 
    154   output_formatted = ''
    155   persisted_outputs = persisted_result['output']
    156   for i in xrange(len(persisted_outputs)):
    157     output_formatted += '\n\nOutput from run #%d:\n\n%s' % (
    158         i, persisted_outputs[i])
    159   print output_formatted
    160 
    161   if json_file_name:
    162     with file(json_file_name, 'w') as f:
    163       f.write(persisted_result['chartjson'])
    164 
    165   if archive_file_name:
    166     if persisted_result['archive_bytes'] is not None:
    167       with file(archive_file_name, 'wb') as f:
    168         f.write(persisted_result['archive_bytes'])
    169     else:
    170       logging.error('The output dir was not archived.')
    171 
    172   return persisted_result['exit_code']
    173 
    174 
    175 def PrintSummary(test_names):
    176   logging.info('*' * 80)
    177   logging.info('Sharding summary')
    178   device_total_time = collections.defaultdict(int)
    179   for test_name in test_names:
    180     file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
    181     if not os.path.exists(file_name):
    182       logging.info('%s : No status file found', test_name)
    183       continue
    184     with file(file_name, 'r') as f:
    185       result = pickle.loads(f.read())
    186     logging.info('%s : exit_code=%d in %d secs at %s',
    187                  result['name'], result['exit_code'], result['total_time'],
    188                  result['device'])
    189     device_total_time[result['device']] += result['total_time']
    190   for device, device_time in device_total_time.iteritems():
    191     logging.info('Total for device %s : %d secs', device, device_time)
    192   logging.info('Total steps time: %d secs', sum(device_total_time.values()))
    193 
    194 
    195 class _HeartBeatLogger(object):
    196   # How often to print the heartbeat on flush().
    197   _PRINT_INTERVAL = 30.0
    198 
    199   def __init__(self):
    200     """A file-like class for keeping the buildbot alive."""
    201     self._len = 0
    202     self._tick = time.time()
    203     self._stopped = threading.Event()
    204     self._timer = threading.Thread(target=self._runner)
    205     self._timer.start()
    206 
    207   def _runner(self):
    208     while not self._stopped.is_set():
    209       self.flush()
    210       self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL)
    211 
    212   def write(self, data):
    213     self._len += len(data)
    214 
    215   def flush(self):
    216     now = time.time()
    217     if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL:
    218       self._tick = now
    219       print '--single-step output length %d' % self._len
    220       sys.stdout.flush()
    221 
    222   def stop(self):
    223     self._stopped.set()
    224 
    225 
    226 class TestRunner(base_test_runner.BaseTestRunner):
    227   def __init__(self, test_options, device, shard_index, max_shard, tests,
    228       flaky_tests):
    229     """A TestRunner instance runs a perf test on a single device.
    230 
    231     Args:
    232       test_options: A PerfOptions object.
    233       device: Device to run the tests.
    234       shard_index: the index of this device.
    235       max_shards: the maximum shard index.
    236       tests: a dict mapping test_name to command.
    237       flaky_tests: a list of flaky test_name.
    238     """
    239     super(TestRunner, self).__init__(device, None)
    240     self._options = test_options
    241     self._shard_index = shard_index
    242     self._max_shard = max_shard
    243     self._tests = tests
    244     self._flaky_tests = flaky_tests
    245     self._output_dir = None
    246     self._device_battery = battery_utils.BatteryUtils(self.device)
    247 
    248   @staticmethod
    249   def _SaveResult(result):
    250     pickled = os.path.join(constants.PERF_OUTPUT_DIR, result['name'])
    251     if os.path.exists(pickled):
    252       with file(pickled, 'r') as f:
    253         previous = pickle.loads(f.read())
    254         result['output'] = previous['output'] + result['output']
    255 
    256     with file(pickled, 'w') as f:
    257       f.write(pickle.dumps(result))
    258 
    259   def _CheckDeviceAffinity(self, test_name):
    260     """Returns True if test_name has affinity for this shard."""
    261     affinity = (self._tests['steps'][test_name]['device_affinity'] %
    262                 self._max_shard)
    263     if self._shard_index == affinity:
    264       return True
    265     logging.info('Skipping %s on %s (affinity is %s, device is %s)',
    266                  test_name, self.device_serial, affinity, self._shard_index)
    267     return False
    268 
    269   def _CleanupOutputDirectory(self):
    270     if self._output_dir:
    271       shutil.rmtree(self._output_dir, ignore_errors=True)
    272       self._output_dir = None
    273 
    274   def _ReadChartjsonOutput(self):
    275     if not self._output_dir:
    276       return ''
    277 
    278     json_output_path = os.path.join(self._output_dir, 'results-chart.json')
    279     try:
    280       with open(json_output_path) as f:
    281         return f.read()
    282     except IOError:
    283       logging.exception('Exception when reading chartjson.')
    284       logging.error('This usually means that telemetry did not run, so it could'
    285                     ' not generate the file. Please check the device running'
    286                     ' the test.')
    287       return ''
    288 
    289   def _WriteBuildBotJson(self):
    290     """Write metadata about the buildbot environment to the output dir."""
    291     data = {
    292       'chromium': _GetChromiumRevision(),
    293       'environment': dict(os.environ)}
    294     logging.info('BuildBot environment: %s', data)
    295     with open(os.path.join(self._output_dir, 'buildbot.json'), 'w') as f:
    296       json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))
    297 
    298   def _ArchiveOutputDir(self):
    299     """Archive all files in the output dir, and return as compressed bytes."""
    300     with io.BytesIO() as archive:
    301       with zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED) as contents:
    302         num_files = 0
    303         for absdir, _, files in os.walk(self._output_dir):
    304           reldir = os.path.relpath(absdir, self._output_dir)
    305           for filename in files:
    306             src_path = os.path.join(absdir, filename)
    307             # We use normpath to turn './file.txt' into just 'file.txt'.
    308             dst_path = os.path.normpath(os.path.join(reldir, filename))
    309             contents.write(src_path, dst_path)
    310             num_files += 1
    311       if num_files:
    312         logging.info('%d files in the output dir were archived.', num_files)
    313       else:
    314         logging.warning('No files in the output dir. Archive is empty.')
    315       return archive.getvalue()
    316 
    317   def _LaunchPerfTest(self, test_name):
    318     """Runs a perf test.
    319 
    320     Args:
    321       test_name: the name of the test to be executed.
    322 
    323     Returns:
    324       A tuple containing (Output, base_test_result.ResultType)
    325     """
    326     if not self._CheckDeviceAffinity(test_name):
    327       return '', base_test_result.ResultType.PASS
    328 
    329     try:
    330       logging.warning('Unmapping device ports')
    331       forwarder.Forwarder.UnmapAllDevicePorts(self.device)
    332       self.device.RestartAdbd()
    333     except Exception as e: # pylint: disable=broad-except
    334       logging.error('Exception when tearing down device %s', e)
    335 
    336     test_config = self._tests['steps'][test_name]
    337     cmd = ('%s --device %s' % (test_config['cmd'], self.device_serial))
    338 
    339     if (self._options.collect_chartjson_data
    340         or test_config.get('archive_output_dir')):
    341       self._output_dir = tempfile.mkdtemp()
    342       self._WriteBuildBotJson()
    343       cmd = cmd + ' --output-dir=%s' % self._output_dir
    344 
    345     logging.info(
    346         'temperature: %s (0.1 C)',
    347         str(self._device_battery.GetBatteryInfo().get('temperature')))
    348     if self._options.max_battery_temp:
    349       self._device_battery.LetBatteryCoolToTemperature(
    350           self._options.max_battery_temp)
    351 
    352     logging.info('Charge level: %s%%',
    353         str(self._device_battery.GetBatteryInfo().get('level')))
    354     if self._options.min_battery_level:
    355       self._device_battery.ChargeDeviceToLevel(
    356           self._options.min_battery_level)
    357     self.device.SetScreen(True)
    358 
    359     logging.info('%s : %s', test_name, cmd)
    360     start_time = time.time()
    361 
    362     timeout = test_config.get('timeout', 3600)
    363     if self._options.no_timeout:
    364       timeout = None
    365     logging.info('Timeout for %s test: %s', test_name, timeout)
    366     full_cmd = cmd
    367     if self._options.dry_run:
    368       full_cmd = 'echo %s' % cmd
    369 
    370     logfile = sys.stdout
    371     archive_bytes = None
    372     if self._options.single_step:
    373       # Just print a heart-beat so that the outer buildbot scripts won't timeout
    374       # without response.
    375       logfile = _HeartBeatLogger()
    376     cwd = os.path.abspath(host_paths.DIR_SOURCE_ROOT)
    377     if full_cmd.startswith('src/'):
    378       cwd = os.path.abspath(os.path.join(host_paths.DIR_SOURCE_ROOT, os.pardir))
    379     try:
    380       exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout(
    381           full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile)
    382       json_output = self._ReadChartjsonOutput()
    383       if test_config.get('archive_output_dir'):
    384         archive_bytes = self._ArchiveOutputDir()
    385     except cmd_helper.TimeoutError as e:
    386       exit_code = -1
    387       output = e.output
    388       json_output = ''
    389     finally:
    390       self._CleanupOutputDirectory()
    391       if self._options.single_step:
    392         logfile.stop()
    393     end_time = time.time()
    394     if exit_code is None:
    395       exit_code = -1
    396     logging.info('%s : exit_code=%d in %d secs at %s',
    397                  test_name, exit_code, end_time - start_time,
    398                  self.device_serial)
    399 
    400     if exit_code == 0:
    401       result_type = base_test_result.ResultType.PASS
    402     else:
    403       result_type = base_test_result.ResultType.FAIL
    404       # Since perf tests use device affinity, give the device a chance to
    405       # recover if it is offline after a failure. Otherwise, the master sharder
    406       # will remove it from the pool and future tests on this device will fail.
    407       try:
    408         self.device.WaitUntilFullyBooted(timeout=120)
    409       except device_errors.CommandTimeoutError as e:
    410         logging.error('Device failed to return after %s: %s', test_name, e)
    411 
    412     actual_exit_code = exit_code
    413     if test_name in self._flaky_tests:
    414       # The exit_code is used at the second stage when printing the
    415       # test output. If the test is flaky, force to "0" to get that step green
    416       # whilst still gathering data to the perf dashboards.
    417       # The result_type is used by the test_dispatcher to retry the test.
    418       exit_code = 0
    419 
    420     persisted_result = {
    421         'name': test_name,
    422         'output': [output],
    423         'chartjson': json_output,
    424         'archive_bytes': archive_bytes,
    425         'exit_code': exit_code,
    426         'actual_exit_code': actual_exit_code,
    427         'result_type': result_type,
    428         'start_time': start_time,
    429         'end_time': end_time,
    430         'total_time': end_time - start_time,
    431         'device': self.device_serial,
    432         'cmd': cmd,
    433     }
    434     self._SaveResult(persisted_result)
    435 
    436     return (output, result_type)
    437 
    438   def RunTest(self, test_name):
    439     """Run a perf test on the device.
    440 
    441     Args:
    442       test_name: String to use for logging the test result.
    443 
    444     Returns:
    445       A tuple of (TestRunResults, retry).
    446     """
    447     _, result_type = self._LaunchPerfTest(test_name)
    448     results = base_test_result.TestRunResults()
    449     results.AddResult(base_test_result.BaseTestResult(test_name, result_type))
    450     retry = None
    451     if not results.DidRunPass():
    452       retry = test_name
    453     return results, retry
    454