Home | History | Annotate | Download | only in platform_BootPerf
      1 # Copyright (c) 2009 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import glob
      6 import logging
      7 import os
      8 import re
      9 import shutil
     10 import time
     11 import utils
     12 
     13 from autotest_lib.client.bin import test
     14 from autotest_lib.client.common_lib import error
     15 
     16 class platform_BootPerf(test.test):
     17     """Test to gather recorded boot time statistics.
     18 
     19     The primary function of this test is to gather a rather large
     20     assortment of performance keyvals that capture timing and disk
     21     usage statistics associated with the most recent boot or reboot.
     22 
     23     The test calculates some or all of the following keyvals:
     24       * seconds_kernel_to_startup
     25       * seconds_kernel_to_startup_done
     26       * seconds_kernel_to_x_started
     27       * seconds_kernel_to_chrome_exec
     28       * seconds_kernel_to_chrome_main
     29       * seconds_kernel_to_signin_start
     30       * seconds_kernel_to_signin_wait
     31       * seconds_kernel_to_signin_users
     32       * seconds_kernel_to_signin_seen
     33       * seconds_kernel_to_login
     34       * seconds_kernel_to_network
     35       * rdbytes_kernel_to_startup
     36       * rdbytes_kernel_to_startup_done
     37       * rdbytes_kernel_to_x_started
     38       * rdbytes_kernel_to_chrome_exec
     39       * rdbytes_kernel_to_chrome_main
     40       * rdbytes_kernel_to_login
     41       * seconds_power_on_to_lf_start
     42       * seconds_power_on_to_lf_end
     43       * seconds_power_on_to_lk_start
     44       * seconds_power_on_to_lk_end
     45       * seconds_power_on_to_kernel
     46       * seconds_shutdown_time
     47       * seconds_reboot_time
     48       * seconds_reboot_error
     49       * mhz_primary_cpu
     50 
     51     """
     52 
     53     version = 2
     54 
     55     # Names of keyvals, their associated bootstat events, and 'Required' flag.
     56     # Test fails if a required event is not found.
     57     # Each event samples statistics measured since kernel startup
     58     # at a specific moment on the boot critical path:
     59     #   pre-startup - The start of the `chromeos_startup` script;
     60     #     roughly, the time when /sbin/init emits the `startup`
     61     #     Upstart event.
     62     #   post-startup - Completion of the `chromeos_startup` script.
     63     #   x-started - Completion of X server initialization.
     64     #   chrome-exec - The moment when session_manager exec's the
     65     #     first Chrome process.
     66     #   chrome-main - The moment when the first Chrome process
     67     #     begins executing in main().
     68     #   kernel_to_signin_start - The moment when LoadPage(loginSceenURL)
     69     #     is called, i.e. initialization starts.
     70     #   kernel_to_signin_wait - The moment when UI thread has finished signin
     71     #     screen initialization and now waits until JS sends "ready" event.
     72     #   kernel_to_signin_users - The moment when UI thread receives "ready" from
     73     #     JS code. So V8 is initialized and running, etc...
     74     #   kernel_to_signin_seen - The moment when user can actually see signin UI.
     75     #   boot-complete - Completion of boot after Chrome presents the
     76     #     login screen.
     77     #   kernel_to_android_start - The moment when Android is started.
     78     _EVENT_KEYVALS = [
     79         # N.B.  Keyval attribute names go into a database that
     80         # truncates after 30 characters.  The key names below are
     81         # prefixed with 8 characters, either 'seconds_' or
     82         # 'rdbytes_', so we have 22 characters wiggle room.
     83         #
     84         # ----+----1----+----2--
     85         ('kernel_to_startup',       'pre-startup',                     True),
     86         ('kernel_to_startup_done',  'post-startup',                    True),
     87         ('kernel_to_chrome_exec',   'chrome-exec',                     True),
     88         ('kernel_to_chrome_main',   'chrome-main',                     True),
     89         # These two events do not happen if device is in OOBE.
     90         ('kernel_to_signin_start',  'login-start-signin-screen',       False),
     91         ('kernel_to_signin_wait',
     92             'login-wait-for-signin-state-initialize',                  False),
     93         # This event doesn't happen if device has no users.
     94         ('kernel_to_signin_users',  'login-send-user-list',            False),
     95         ('kernel_to_signin_seen',   'login-prompt-visible',            True),
     96         ('kernel_to_login',         'boot-complete',                   True),
     97         ('kernel_to_android_start', 'android-start',                   False)
     98     ]
     99 
    100     _CPU_FREQ_FILE = ('/sys/devices/system/cpu/cpu0'
    101                       '/cpufreq/cpuinfo_max_freq')
    102 
    103     _UPTIME_PREFIX = 'uptime-'
    104     _DISK_PREFIX = 'disk-'
    105 
    106     _FIRMWARE_TIME_FILE = '/tmp/firmware-boot-time'
    107 
    108     _BOOTSTAT_ARCHIVE_GLOB = '/var/log/metrics/shutdown.[0-9]*'
    109     _UPTIME_FILE_GLOB = os.path.join('/tmp', _UPTIME_PREFIX + '*')
    110     _DISK_FILE_GLOB = os.path.join('/tmp', _DISK_PREFIX + '*')
    111 
    112     _RAMOOPS_FILE = "/dev/pstore/console-ramoops"
    113 
    114 
    115     def _copy_timestamp_files(self):
    116         """Copy raw data files to the test results."""
    117         statlist = (glob.glob(self._UPTIME_FILE_GLOB) +
    118                             glob.glob(self._DISK_FILE_GLOB))
    119         for fname in statlist:
    120             shutil.copy(fname, self.resultsdir)
    121         try:
    122             shutil.copy(self._FIRMWARE_TIME_FILE, self.resultsdir)
    123         except Exception:
    124             pass
    125 
    126     def _copy_console_ramoops(self):
    127         """Copy console_ramoops from previous reboot."""
    128         # If reboot was misbehaving, looking at ramoops may provide clues.
    129         try:
    130             shutil.copy(self._RAMOOPS_FILE, self.resultsdir)
    131         except Exception:
    132             pass
    133 
    134     def _parse_bootstat(self, filename, fieldnum):
    135         """Read values from a bootstat event file.
    136 
    137         Each line of a bootstat event file represents one occurrence
    138         of the event.  Each line is a copy of the content of
    139         /proc/uptime ("uptime-" files) or /sys/block/<dev>/stat
    140         ("disk-" files), captured at the time of the occurrence.
    141         For either kind of file, each line is a blank separated list
    142         of fields.
    143 
    144         The given event file can contain either uptime or disk data.
    145         This function reads all lines (occurrences) in the event
    146         file, and returns the value of the given field
    147 
    148         @param filename         Filename of the bootstat event.
    149         @param fieldnum         Which field of the file.
    150         @return                 List of values of `fieldnum` for
    151                                 all occurrences in the file.
    152         @raises error.TestFail  Raised if the event file is missing,
    153                                 unreadable, or malformed.
    154 
    155         """
    156         try:
    157             with open(filename) as statfile:
    158                 values = map(lambda l: float(l.split()[fieldnum]),
    159                              statfile.readlines())
    160             return values
    161         except IOError:
    162             raise error.TestFail('Failed to read bootstat file "%s"' %
    163                                  filename)
    164 
    165 
    166     def _parse_uptime(self, eventname, bootstat_dir='/tmp', index=0):
    167         """Return time since boot for a bootstat event.
    168 
    169         @param eventname        Name of the bootstat event.
    170         @param boostat_dir      Directory containing the bootstat
    171                                 files.
    172         @param index            Index of which occurrence of the event
    173                                 to select.
    174         @return                 Time since boot for the selected
    175                                 event.
    176 
    177         """
    178         event_file = os.path.join(bootstat_dir,
    179                                   self._UPTIME_PREFIX) + eventname
    180         return self._parse_bootstat(event_file, 0)[index]
    181 
    182 
    183     def _parse_diskstat(self, eventname, bootstat_dir='/tmp', index=0):
    184         """Return sectors read since boot for a bootstat event.
    185 
    186         @param eventname        Name of the bootstat event.
    187         @param boostat_dir      Directory containing the bootstat files.
    188         @param index            Index of which occurrence of the event
    189                                 to select.
    190         @return                 Number of sectors read since boot for
    191                                 the selected event.
    192 
    193         """
    194         event_file = os.path.join(bootstat_dir,
    195                                   self._DISK_PREFIX) + eventname
    196         return self._parse_bootstat(event_file, 2)[index]
    197 
    198 
    199     def _gather_vboot_times(self, results):
    200         """Read and report firmware internal timestamps.
    201 
    202         The firmware for all Chrome platforms except Mario records
    203         the ticks since power on at selected times during startup.
    204         These timestamps can be extracted from the `crossystem`
    205         command.
    206 
    207         If the timestamps are available, convert the tick times to
    208         seconds and record the following keyvals in `results`:
    209           * seconds_power_on_to_lf_start
    210           * seconds_power_on_to_lf_end
    211           * seconds_power_on_to_lk_start
    212           * seconds_power_on_to_lk_end
    213 
    214         The frequency of the recorded tick timestamps is determined
    215         by reading `_CPU_FREQ_FILE` and is recorded in the keyval
    216         mhz_primary_cpu.
    217 
    218         @param results  Keyvals dictionary.
    219 
    220         """
    221         try:
    222             khz = int(utils.read_one_line(self._CPU_FREQ_FILE))
    223         except IOError:
    224             logging.info('Test is unable to read "%s", not calculating the '
    225                          'vboot times.', self._CPU_FREQ_FILE)
    226             return
    227         hertz = khz * 1000.0
    228         results['mhz_primary_cpu'] = khz / 1000.0
    229         try:
    230             out = utils.system_output('crossystem')
    231         except error.CmdError:
    232             logging.info('Unable to run crossystem, not calculating the vboot '
    233                          'times.')
    234             return
    235         # Parse the crossystem output, we are looking for vdat_timers
    236         items = out.splitlines()
    237         for item in items:
    238             times_re = re.compile(r'LF=(\d+),(\d+) LK=(\d+),(\d+)')
    239             match = re.findall(times_re, item)
    240             if match:
    241                 times = map(lambda s: round(float(s) / hertz, 2), match[0])
    242                 results['seconds_power_on_to_lf_start'] = times[0]
    243                 results['seconds_power_on_to_lf_end'] = times[1]
    244                 results['seconds_power_on_to_lk_start'] = times[2]
    245                 results['seconds_power_on_to_lk_end'] = times[3]
    246 
    247 
    248     def _gather_firmware_boot_time(self, results):
    249         """Read and report firmware startup time.
    250 
    251         The boot process writes the firmware startup time to the
    252         file named in `_FIRMWARE_TIME_FILE`.  Read the time from that
    253         file, and record it in `results` as the keyval
    254         seconds_power_on_to_kernel.
    255 
    256         @param results  Keyvals dictionary.
    257 
    258         """
    259         try:
    260             # If the firmware boot time is not available, the file
    261             # will not exist.
    262             data = utils.read_one_line(self._FIRMWARE_TIME_FILE)
    263         except IOError:
    264             return
    265         firmware_time = float(data)
    266         boot_time = results['seconds_kernel_to_login']
    267         results['seconds_power_on_to_kernel'] = firmware_time
    268         results['seconds_power_on_to_login'] = (
    269                 round(firmware_time + boot_time, 2))
    270 
    271 
    272     def _gather_time_keyvals(self, results):
    273         """Read and report boot time keyvals.
    274 
    275         Read "seconds since kernel startup" from the bootstat files
    276         for the events named in `_EVENT_KEYVALS`, and store the
    277         values as perf keyvals.  The following keyvals are recorded:
    278           * seconds_kernel_to_startup
    279           * seconds_kernel_to_startup_done
    280           * seconds_kernel_to_x_started
    281           * seconds_kernel_to_chrome_exec
    282           * seconds_kernel_to_chrome_main
    283           * seconds_kernel_to_login
    284           * seconds_kernel_to_network
    285         All of these keyvals are considered mandatory, except
    286         for seconds_kernel_to_network.
    287 
    288         @param results          Keyvals dictionary.
    289         @raises error.TestFail  Raised if any mandatory keyval can't
    290                                 be determined.
    291 
    292         """
    293         for keyval_name, event_name, required in self._EVENT_KEYVALS:
    294             key = 'seconds_' + keyval_name
    295             try:
    296                 results[key] = self._parse_uptime(event_name)
    297             except error.TestFail:
    298                 if required:
    299                     raise;
    300 
    301         # Not all 'uptime-network-*-ready' files necessarily exist;
    302         # probably there's only one.  We go through a list of
    303         # possibilities and pick the first one we find.  We're not
    304         # looking for 3G here, so we're not guaranteed to find any
    305         # file.
    306         network_ready_events = [
    307             'network-wifi-ready',
    308             'network-ethernet-ready'
    309         ]
    310 
    311         for event_name in network_ready_events:
    312             try:
    313                 network_time = self._parse_uptime(event_name)
    314                 results['seconds_kernel_to_network'] = network_time
    315                 break
    316             except error.TestFail:
    317                 pass
    318 
    319 
    320     def _gather_disk_keyvals(self, results):
    321         """Read and report disk read keyvals.
    322 
    323         Read "sectors read since kernel startup" from the bootstat
    324         files for the events named in `_EVENT_KEYVALS`, convert the
    325         values to "bytes read since boot", and store the values as
    326         perf keyvals.  The following keyvals are recorded:
    327           * rdbytes_kernel_to_startup
    328           * rdbytes_kernel_to_startup_done
    329           * rdbytes_kernel_to_x_started
    330           * rdbytes_kernel_to_chrome_exec
    331           * rdbytes_kernel_to_chrome_main
    332           * rdbytes_kernel_to_login
    333 
    334         Disk statistics are reported in units of 512 byte sectors;
    335         we convert the keyvals to bytes so that downstream consumers
    336         don't have to ask "How big is a sector?".
    337 
    338         @param results  Keyvals dictionary.
    339 
    340         """
    341         # We expect an error when reading disk statistics for the
    342         # "chrome-main" event because Chrome (not bootstat) generates
    343         # that event, and it doesn't include the disk statistics.
    344         # We get around that by ignoring all errors.
    345         for keyval_name, event_name, required in self._EVENT_KEYVALS:
    346             try:
    347                 key = 'rdbytes_' + keyval_name
    348                 results[key] = 512 * self._parse_diskstat(event_name)
    349             except Exception:
    350                 pass
    351 
    352 
    353     def _calculate_timeval(self, event, t0, t1, t_uptime):
    354         """Estimate the absolute time of a time since boot.
    355 
    356         Input values `event` and `t_uptime` are times measured as
    357         seconds since boot (for the same boot event, as from
    358         /proc/uptime).  The input values `t0` and `t1` are two
    359         values measured as seconds since the epoch.  The three "t"
    360         values were sampled in the order `t0`, `t_uptime`, `t1`.
    361 
    362         Estimate the time of `event` measured as seconds since the
    363         epoch.  Also estimate the worst-case error based on the time
    364         elapsed between `t0` and `t1`.
    365 
    366         All values are floats.  The precision of `event` and
    367         `t_uptime` is expected to be kernel jiffies (i.e. one
    368         centisecond).  The output result is rounded to the nearest
    369         jiffy.
    370 
    371         @param event    A time to be converted from "seconds since
    372                         boot" into "seconds since the epoch".
    373         @param t0       A reference time earlier than time `t1`, and
    374                         measured as "seconds since the epoch".
    375         @param t1       A reference time later than time `t0`, and
    376                         measured as "seconds since the epoch".
    377         @param t_uptime A reference time measured as "seconds since
    378                         boot", in between time `t0` and `t1`.
    379 
    380         @return         An estimate of the time of `event` measured
    381                         as seconds since the epoch, rounded to the
    382                         nearest jiffy.
    383 
    384         """
    385         # Floating point geeks may argue that these calculations
    386         # don't guarantee the promised precision:  I don't care;
    387         # it's good enough.
    388         boot_timeval = round((t0 + t1) / 2, 2) - t_uptime
    389         error = (t1 - t0) / 2
    390         return boot_timeval + event, error
    391 
    392 
    393     def _gather_reboot_keyvals(self, results):
    394         """Read and report shutdown and reboot times.
    395 
    396         The shutdown process saves all bootstat files in /var/log,
    397         plus it saves a timestamp file that can be used to convert
    398         "time since boot" into times in UTC.  Read the saved files
    399         from the most recent shutdown, and use them to calculate
    400         the time spent from the start of that shutdown until the
    401         completion of the most recent boot.  Record these keyvals:
    402           * seconds_shutdown_time
    403           * seconds_reboot_time
    404           * seconds_reboot_error
    405 
    406         @param results  Keyvals dictionary.
    407 
    408         """
    409         bootstat_archives = glob.glob(self._BOOTSTAT_ARCHIVE_GLOB)
    410         if not bootstat_archives:
    411             return
    412         bootstat_dir = max(bootstat_archives)
    413         boot_id = open("/proc/sys/kernel/random/boot_id", "r").read()
    414         didrun_path = os.path.join(bootstat_dir, "bootperf_ran")
    415         if not os.path.exists(didrun_path):
    416             with open(didrun_path, "w") as didrun:
    417                 didrun.write(boot_id)
    418         elif open(didrun_path, "r").read() != boot_id:
    419             logging.warning("Ignoring reboot based on stale shutdown %s",
    420                          os.path.basename(bootstat_dir))
    421             return
    422         timestamp_path = os.path.join(bootstat_dir, 'timestamp')
    423         try:
    424             with open(timestamp_path) as timestamp:
    425                 archive_t0 = float(timestamp.readline())
    426                 archive_t1 = float(timestamp.readline())
    427         except IOError:
    428             raise error.TestFail('Failed to read "%s"' % timestamp_path)
    429         archive_uptime = self._parse_uptime('archive',
    430                                             bootstat_dir=bootstat_dir)
    431         shutdown_uptime = self._parse_uptime('ui-post-stop',
    432                                              bootstat_dir=bootstat_dir,
    433                                              index=-1)
    434         shutdown_timeval, shutdown_error = self._calculate_timeval(
    435                 shutdown_uptime, archive_t0, archive_t1, archive_uptime)
    436         boot_t0 = time.time()
    437         with open('/proc/uptime') as uptime_file:
    438             uptime = float(uptime_file.readline().split()[0])
    439         boot_t1 = time.time()
    440         boot_timeval, boot_error = self._calculate_timeval(
    441                 results['seconds_kernel_to_login'],
    442                 boot_t0, boot_t1, uptime)
    443         reboot_time = round(boot_timeval - shutdown_timeval, 2)
    444         poweron_time = results['seconds_power_on_to_login']
    445         shutdown_time = round(reboot_time - poweron_time, 2)
    446         results['seconds_reboot_time'] = reboot_time
    447         results['seconds_reboot_error'] = shutdown_error + boot_error
    448         results['seconds_shutdown_time'] = shutdown_time
    449 
    450 
    451     def run_once(self):
    452         """Gather boot time statistics.
    453 
    454         Every shutdown and boot creates `bootstat` files with
    455         summary statistics for time elapsed and disk usage.  Gather
    456         the values reported for shutdown, boot time and network
    457         startup time, and record them as perf keyvals.
    458 
    459         Additionally, gather information about firmware startup time
    460         from various sources, and record the times as perf keyvals.
    461 
    462         Finally, copy the raw data files to the results directory
    463         for reference.
    464 
    465         """
    466         # `results` is the keyvals dictionary
    467         results = {}
    468 
    469         self._gather_time_keyvals(results)
    470         self._gather_disk_keyvals(results)
    471         self._gather_firmware_boot_time(results)
    472         self._gather_vboot_times(results)
    473         self._gather_reboot_keyvals(results)
    474 
    475         self._copy_timestamp_files()
    476         self._copy_console_ramoops()
    477 
    478         self.write_perf_keyval(results)
    479