Home | History | Annotate | Download | only in telemetry_AFDOGenerate
      1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """
      6 Test to generate the AFDO profile for a set of ChromeOS benchmarks.
      7 
      8 This will run a pre-determined set of benchmarks on the DUT under
      9 the monitoring of the linux "perf" tool. The resulting perf.data
     10 file will then be copied to Google Storage (GS) where it can be
     11 used by the AFDO optimized build.
     12 
     13 Given that the telemetry benchmarks are quite unstable on ChromeOS at
     14 this point, this test also supports a mode where the benchmarks are
     15 executed outside of the telemetry framework. It is not the same as
     16 executing the benchmarks under telemetry because there is no telemetry
     17 measurement taken but, for the purposes of profiling Chrome, it should
     18 be pretty close.
     19 
     20 Example invocation:
     21 /usr/bin/test_that --debug --board=lumpy <DUT IP>
     22   --args="ignore_failures=True local=True gs_test_location=True"
     23   telemetry_AFDOGenerate
     24 """
     25 
     26 import bz2
     27 import logging
     28 import os
     29 import time
     30 
     31 from autotest_lib.client.common_lib import error, utils
     32 from autotest_lib.server import autotest
     33 from autotest_lib.server import profilers
     34 from autotest_lib.server import test
     35 from autotest_lib.server import utils
     36 from autotest_lib.server.cros import telemetry_runner
     37 
     38 # List of benchmarks to run to capture profile information. This is
     39 # based on the "superhero" and "perf_v2" list and other telemetry
     40 # benchmarks. Goal is to have a short list that is as representative
     41 # as possible and takes a short time to execute. At this point the
     42 # list of benchmarks is in flux.
     43 TELEMETRY_AFDO_BENCHMARKS = (
     44     ('page_cycler_v2.typical_25', ('--pageset-repeat=1',)),
     45     ('page_cycler_v2.intl_ja_zh', ('--pageset-repeat=1',)),
     46     # Temporarily disable these two benchmarks to finish in 60 minutes.
     47     # ('page_cycler_v2.intl_ar_fa_he', ('--pageset-repeat=1',)),
     48     # ('page_cycler_v2.intl_es_fr_pt-BR', ('--pageset-repeat=1',)),
     49     # ('page_cycler_v2.intl_ko_th_vi', ('--pageset-repeat=1',)),
     50     # ('page_cycler_v2.intl_hi_ru', ('--pageset-repeat=1',)),
     51     ('octane',),
     52     ('kraken',),
     53     ('speedometer',),
     54     ('dromaeo.domcoreattr',),
     55     ('dromaeo.domcoremodify',),
     56     )
     57 
     58 # Temporarily disable this benchmark because it is failing a
     59 # lot. Filed chromium:590127
     60 # ('smoothness.tough_webgl_cases',)
     61 
     62 # Some benchmarks removed from the profile set:
     63 # 'page_cycler.morejs' -> uninteresting, seems to fail frequently,
     64 # 'page_cycler.moz' -> seems very old.
     65 # 'media.tough_video_cases' -> removed this because it does not bring
     66 #                              any benefit and takes more than 12 mins
     67 
     68 # List of boards where this test can be run.  Currently, it needs a
     69 # machines with at least 4GB of memory or 2GB of /tmp.
     70 # This must be consistent with chromite.
     71 GCC_BOARDS = ['lumpy']
     72 
     73 # Should be disjoint with GCC_BOARDS
     74 LLVM_BOARDS = ['chell', 'samus']
     75 
     76 class telemetry_AFDOGenerate(test.test):
     77     """
     78     Run one or more telemetry benchmarks under the "perf" monitoring
     79     tool, generate a "perf.data" file and upload to GS for comsumption
     80     by the AFDO optimized build.
     81     """
     82     version = 1
     83 
     84 
     85     def run_once(self, host, args):
     86         """Run a set of telemetry benchmarks.
     87 
     88         @param host: Host machine where test is run
     89         @param args: A dictionary of the arguments that were passed
     90                 to this test.
     91         @returns None.
     92         """
     93         self._host = host
     94         host_board = host.get_board().split(':')[1]
     95 
     96         if not (host_board in LLVM_BOARDS or host_board in GCC_BOARDS):
     97             raise error.TestFail(
     98                     'This test cannot be run on board %s' % host_board)
     99 
    100         self._parse_args(args)
    101 
    102         if self._minimal_telemetry:
    103             self._run_tests_minimal_telemetry()
    104         else:
    105             self._telemetry_runner = telemetry_runner.TelemetryRunner(
    106                     self._host, self._local, telemetry_on_dut=False)
    107 
    108             for benchmark_info in TELEMETRY_AFDO_BENCHMARKS:
    109                 benchmark = benchmark_info[0]
    110                 args = () if len(benchmark_info) == 1 else benchmark_info[1]
    111                 try:
    112                     self._run_test_with_retry(benchmark, *args)
    113                 except error.TestBaseException:
    114                     if not self._ignore_failures:
    115                         raise
    116                     else:
    117                         logging.info('Ignoring failure from benchmark %s.',
    118                                      benchmark)
    119 
    120 
    121     def after_run_once(self):
    122         """After the profile information has been collected, compress it
    123         and upload it to GS
    124         """
    125         PERF_FILE = 'perf.data'
    126         COMP_PERF_FILE = 'chromeos-chrome-%s-%s.perf.data'
    127         perf_data = os.path.join(self.profdir, PERF_FILE)
    128         comp_data = os.path.join(self.profdir, COMP_PERF_FILE % (
    129                 self._arch, self._version))
    130         compressed = self._compress_file(perf_data, comp_data)
    131         self._gs_upload(compressed, os.path.basename(compressed))
    132 
    133         # Also create copy of this file using "LATEST" as version so
    134         # it can be found in case the builder is looking for a version
    135         # number that does not match. It is ok to use a slighly old
    136         # version of the this file for the optimized build
    137         latest_data =  COMP_PERF_FILE % (self._arch, 'LATEST')
    138         latest_compressed = self._get_compressed_name(latest_data)
    139         self._gs_upload(compressed, latest_compressed)
    140 
    141 
    142     def _parse_args(self, args):
    143         """Parses input arguments to this autotest.
    144 
    145         @param args: Options->values dictionary.
    146         @raises error.TestFail if a bad option is passed.
    147         """
    148 
    149         # Set default values for the options.
    150         # Architecture for which we are collecting afdo data.
    151         self._arch = 'amd64'
    152         # Use an alternate GS location where everyone can write.
    153         # Set default depending on whether this is executing in
    154         # the lab environment or not
    155         self._gs_test_location = not utils.host_is_in_lab_zone(
    156                 self._host.hostname)
    157         # Ignore individual test failures.
    158         self._ignore_failures = False
    159         # Use local copy of telemetry instead of using the dev server copy.
    160         self._local = False
    161         # Chrome version to which the AFDO data corresponds.
    162         self._version, _ = self._host.get_chrome_version()
    163         # Try to use the minimal support from Telemetry. The Telemetry
    164         # benchmarks in ChromeOS are too flaky at this point. So, initially,
    165         # this will be set to True by default.
    166         self._minimal_telemetry = False
    167 
    168         for option_name, value in args.iteritems():
    169             if option_name == 'arch':
    170                 self._arch = value
    171             elif option_name == 'gs_test_location':
    172                 self._gs_test_location = (value == 'True')
    173             elif option_name == 'ignore_failures':
    174                 self._ignore_failures = (value == 'True')
    175             elif option_name == 'local':
    176                 self._local = (value == 'True')
    177             elif option_name == 'minimal_telemetry':
    178                 self._minimal_telemetry = (value == 'True')
    179             elif option_name == 'version':
    180                 self._version = value
    181             else:
    182                 raise error.TestFail('Unknown option passed: %s' % option_name)
    183 
    184 
    185     def _run_test(self, benchmark, *args):
    186         """Run the benchmark using Telemetry.
    187 
    188         @param benchmark: Name of the benchmark to run.
    189         @param args: Additional arguments to pass to the telemetry execution
    190                      script.
    191         @raises Raises error.TestFail if execution of test failed.
    192                 Also re-raise any exceptions thrown by run_telemetry benchmark.
    193         """
    194         try:
    195             logging.info('Starting run for Telemetry benchmark %s', benchmark)
    196             start_time = time.time()
    197             result = self._telemetry_runner.run_telemetry_benchmark(
    198                     benchmark, None, *args)
    199             end_time = time.time()
    200             logging.info('Completed Telemetry benchmark %s in %f seconds',
    201                          benchmark, end_time - start_time)
    202         except error.TestBaseException as e:
    203             end_time = time.time()
    204             logging.info('Got exception from Telemetry benchmark %s '
    205                          'after %f seconds. Exception: %s',
    206                          benchmark, end_time - start_time, str(e))
    207             raise
    208 
    209         # We dont generate any keyvals for this run. This is not
    210         # an official run of the benchmark. We are just running it to get
    211         # a profile from it.
    212 
    213         if result.status is telemetry_runner.SUCCESS_STATUS:
    214             logging.info('Benchmark %s succeeded', benchmark)
    215         else:
    216             raise error.TestFail('An error occurred while executing'
    217                                  ' benchmark: %s' % benchmark)
    218 
    219 
    220     def _run_test_with_retry(self, benchmark, *args):
    221         """Run the benchmark using Telemetry. Retry in case of failure.
    222 
    223         @param benchmark: Name of the benchmark to run.
    224         @param args: Additional arguments to pass to the telemetry execution
    225                      script.
    226         @raises Re-raise any exceptions thrown by _run_test.
    227         """
    228 
    229         tried = False
    230         while True:
    231             try:
    232                 self._run_test(benchmark, *args)
    233                 logging.info('Benchmark %s succeeded on %s try',
    234                              benchmark,
    235                              'first' if not tried else 'second')
    236                 break
    237             except error.TestBaseException:
    238                 if not tried:
    239                    tried = True
    240                    logging.info('Benchmark %s failed. Retrying ...',
    241                                 benchmark)
    242                 else:
    243                     logging.info('Benchmark %s failed twice. Not retrying',
    244                                   benchmark)
    245                     raise
    246 
    247 
    248     def _run_tests_minimal_telemetry(self):
    249         """Run the benchmarks using the minimal support from Telemetry.
    250 
    251         The benchmarks are run using a client side autotest test. This test
    252         will control Chrome directly using the chrome.Chrome support and it
    253         will ask Chrome to display the benchmark pages directly instead of
    254         using the "page sets" and "measurements" support from Telemetry.
    255         In this way we avoid using Telemetry benchmark support which is not
    256         stable on ChromeOS yet.
    257         """
    258         AFDO_GENERATE_CLIENT_TEST = 'telemetry_AFDOGenerateClient'
    259 
    260         # We dont want to "inherit" the profiler settings for this test
    261         # to the client test. Doing so will end up in two instances of
    262         # the profiler (perf) being executed at the same time.
    263         # Filed a feature request about this. See crbug/342958.
    264 
    265         # Save the current settings for profilers.
    266         saved_profilers = self.job.profilers
    267         saved_default_profile_only = self.job.default_profile_only
    268 
    269         # Reset the state of the profilers.
    270         self.job.default_profile_only = False
    271         self.job.profilers = profilers.profilers(self.job)
    272 
    273         # Execute the client side test.
    274         client_at = autotest.Autotest(self._host)
    275         client_at.run_test(AFDO_GENERATE_CLIENT_TEST, args='')
    276 
    277         # Restore the settings for the profilers.
    278         self.job.default_profile_only = saved_default_profile_only
    279         self.job.profiler = saved_profilers
    280 
    281 
    282     @staticmethod
    283     def _get_compressed_name(name):
    284         """Given a file name, return bz2 compressed name.
    285         @param name: Name of uncompressed file.
    286         @returns name of compressed file.
    287         """
    288         return name + '.bz2'
    289 
    290     @staticmethod
    291     def _compress_file(unc_file, com_file):
    292         """Compresses specified file with bz2.
    293 
    294         @param unc_file: name of file to compress.
    295         @param com_file: prefix name of compressed file.
    296         @raises error.TestFail if compression failed
    297         @returns Name of compressed file.
    298         """
    299         dest = ''
    300         with open(unc_file, 'r') as inp:
    301             dest = telemetry_AFDOGenerate._get_compressed_name(com_file)
    302             with bz2.BZ2File(dest, 'w') as out:
    303                 for data in inp:
    304                     out.write(data)
    305         if not dest or not os.path.isfile(dest):
    306             raise error.TestFail('Could not compress %s' % unc_file)
    307         return dest
    308 
    309 
    310     def _gs_upload(self, local_file, remote_basename):
    311         """Uploads file to google storage specific location.
    312 
    313         @param local_file: name of file to upload.
    314         @param remote_basename: basename of remote file.
    315         @raises error.TestFail if upload failed.
    316         @returns nothing.
    317         """
    318         GS_GCC_DEST = 'gs://chromeos-prebuilt/afdo-job/canonicals/%s'
    319         GS_LLVM_DEST = 'gs://chromeos-prebuilt/afdo-job/llvm/%s'
    320         GS_TEST_DEST = 'gs://chromeos-throw-away-bucket/afdo-job/canonicals/%s'
    321         GS_ACL = 'project-private'
    322 
    323         board = self._host.get_board().split(':')[1]
    324 
    325         if self._gs_test_location:
    326             gs_dest = GS_TEST_DEST
    327         elif board in GCC_BOARDS:
    328             gs_dest = GS_GCC_DEST
    329         elif board in LLVM_BOARDS:
    330             gs_dest = GS_LLVM_DEST
    331         else:
    332             raise error.TestFail(
    333                     'This test cannot be run on board %s' % board)
    334 
    335         remote_file = gs_dest % remote_basename
    336 
    337         logging.info('About to upload to GS: %s', remote_file)
    338         if not utils.gs_upload(local_file,
    339                                remote_file,
    340                                GS_ACL, result_dir=self.resultsdir):
    341             logging.info('Failed upload to GS: %s', remote_file)
    342             raise error.TestFail('Unable to gs upload %s to %s' %
    343                                  (local_file, remote_file))
    344 
    345         logging.info('Successfull upload to GS: %s', remote_file)
    346