Home | History | Annotate | Download | only in platform_BootPerf
      1 # Copyright (c) 2009 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import glob
      6 import logging
      7 import os
      8 import re
      9 import shutil
     10 import time
     11 import utils
     12 
     13 from autotest_lib.client.bin import test
     14 from autotest_lib.client.common_lib import error
     15 
     16 class platform_BootPerf(test.test):
     17     """Test to gather recorded boot time statistics.
     18 
     19     The primary function of this test is to gather a rather large
     20     assortment of performance keyvals that capture timing and disk
     21     usage statistics associated with the most recent boot or reboot.
     22 
     23     The test calculates some or all of the following keyvals:
     24       * seconds_kernel_to_startup
     25       * seconds_kernel_to_startup_done
     26       * seconds_kernel_to_x_started
     27       * seconds_kernel_to_chrome_exec
     28       * seconds_kernel_to_chrome_main
     29       * seconds_kernel_to_signin_start
     30       * seconds_kernel_to_signin_wait
     31       * seconds_kernel_to_signin_users
     32       * seconds_kernel_to_signin_seen
     33       * seconds_kernel_to_login
     34       * seconds_kernel_to_network
     35       * rdbytes_kernel_to_startup
     36       * rdbytes_kernel_to_startup_done
     37       * rdbytes_kernel_to_x_started
     38       * rdbytes_kernel_to_chrome_exec
     39       * rdbytes_kernel_to_chrome_main
     40       * rdbytes_kernel_to_login
     41       * seconds_power_on_to_lf_start
     42       * seconds_power_on_to_lf_end
     43       * seconds_power_on_to_lk_start
     44       * seconds_power_on_to_lk_end
     45       * seconds_power_on_to_kernel
     46       * seconds_shutdown_time
     47       * seconds_reboot_time
     48       * seconds_reboot_error
     49       * mhz_primary_cpu
     50 
     51     """
     52 
     53     version = 2
     54 
     55     # Names of keyvals, their associated bootstat events, 'needs_x' flag and
     56     # 'Required' flag.
     57     # Events that needs X are not checked in the freon world.
     58     # Test fails if a required event is not found.
     59     # Each event samples statistics measured since kernel startup
     60     # at a specific moment on the boot critical path:
     61     #   pre-startup - The start of the `chromeos_startup` script;
     62     #     roughly, the time when /sbin/init emits the `startup`
     63     #     Upstart event.
     64     #   post-startup - Completion of the `chromeos_startup` script.
     65     #   x-started - Completion of X server initialization.
     66     #   chrome-exec - The moment when session_manager exec's the
     67     #     first Chrome process.
     68     #   chrome-main - The moment when the first Chrome process
     69     #     begins executing in main().
     70     #   kernel_to_signin_start - The moment when LoadPage(loginSceenURL)
     71     #     is called, i.e. initialization starts.
     72     #   kernel_to_signin_wait - The moment when UI thread has finished signin
     73     #     screen initialization and now waits until JS sends "ready" event.
     74     #   kernel_to_signin_users - The moment when UI thread receives "ready" from
     75     #     JS code. So V8 is initialized and running, etc...
     76     #   kernel_to_signin_seen - The moment when user can actually see signin UI.
     77     #   boot-complete - Completion of boot after Chrome presents the
     78     #     login screen.
     79     _EVENT_KEYVALS = [
     80         # N.B.  Keyval attribute names go into a database that
     81         # truncates after 30 characters.  The key names below are
     82         # prefixed with 8 characters, either 'seconds_' or
     83         # 'rdbytes_', so we have 22 characters wiggle room.
     84         #
     85         # ----+----1----+----2--
     86         ('kernel_to_startup',       'pre-startup',               False, True),
     87         ('kernel_to_startup_done',  'post-startup',              False, True),
     88         ('kernel_to_x_started',     'x-started',                 True,  True),
     89         ('kernel_to_chrome_exec',   'chrome-exec',               False, True),
     90         ('kernel_to_chrome_main',   'chrome-main',               False, True),
     91         # These two events do not happen if device is in OOBE.
     92         ('kernel_to_signin_start',  'login-start-signin-screen', False, False),
     93         ('kernel_to_signin_wait',
     94             'login-wait-for-signin-state-initialize',            False, False),
     95         # This event doesn't happen if device has no users.
     96         ('kernel_to_signin_users',  'login-send-user-list',      False, False),
     97         ('kernel_to_signin_seen',   'login-prompt-visible',      False, True),
     98         ('kernel_to_login',         'boot-complete',             False, True)
     99     ]
    100 
    101     _CPU_FREQ_FILE = ('/sys/devices/system/cpu/cpu0'
    102                       '/cpufreq/cpuinfo_max_freq')
    103 
    104     _UPTIME_PREFIX = 'uptime-'
    105     _DISK_PREFIX = 'disk-'
    106 
    107     _FIRMWARE_TIME_FILE = '/tmp/firmware-boot-time'
    108 
    109     _BOOTSTAT_ARCHIVE_GLOB = '/var/log/metrics/shutdown.[0-9]*'
    110     _UPTIME_FILE_GLOB = os.path.join('/tmp', _UPTIME_PREFIX + '*')
    111     _DISK_FILE_GLOB = os.path.join('/tmp', _DISK_PREFIX + '*')
    112 
    113     _RAMOOPS_FILE = "/dev/pstore/console-ramoops"
    114 
    115 
    116     def _copy_timestamp_files(self):
    117         """Copy raw data files to the test results."""
    118         statlist = (glob.glob(self._UPTIME_FILE_GLOB) +
    119                             glob.glob(self._DISK_FILE_GLOB))
    120         for fname in statlist:
    121             shutil.copy(fname, self.resultsdir)
    122         try:
    123             shutil.copy(self._FIRMWARE_TIME_FILE, self.resultsdir)
    124         except Exception:
    125             pass
    126 
    127     def _copy_console_ramoops(self):
    128         """Copy console_ramoops from previous reboot."""
    129         # If reboot was misbehaving, looking at ramoops may provide clues.
    130         try:
    131             shutil.copy(self._RAMOOPS_FILE, self.resultsdir)
    132         except Exception:
    133             pass
    134 
    135     def _parse_bootstat(self, filename, fieldnum):
    136         """Read values from a bootstat event file.
    137 
    138         Each line of a bootstat event file represents one occurrence
    139         of the event.  Each line is a copy of the content of
    140         /proc/uptime ("uptime-" files) or /sys/block/<dev>/stat
    141         ("disk-" files), captured at the time of the occurrence.
    142         For either kind of file, each line is a blank separated list
    143         of fields.
    144 
    145         The given event file can contain either uptime or disk data.
    146         This function reads all lines (occurrences) in the event
    147         file, and returns the value of the given field
    148 
    149         @param filename         Filename of the bootstat event.
    150         @param fieldnum         Which field of the file.
    151         @return                 List of values of `fieldnum` for
    152                                 all occurrences in the file.
    153         @raises error.TestFail  Raised if the event file is missing,
    154                                 unreadable, or malformed.
    155 
    156         """
    157         try:
    158             with open(filename) as statfile:
    159                 values = map(lambda l: float(l.split()[fieldnum]),
    160                              statfile.readlines())
    161             return values
    162         except IOError:
    163             raise error.TestFail('Failed to read bootstat file "%s"' %
    164                                  filename)
    165 
    166 
    167     def _parse_uptime(self, eventname, bootstat_dir='/tmp', index=0):
    168         """Return time since boot for a bootstat event.
    169 
    170         @param eventname        Name of the bootstat event.
    171         @param boostat_dir      Directory containing the bootstat
    172                                 files.
    173         @param index            Index of which occurrence of the event
    174                                 to select.
    175         @return                 Time since boot for the selected
    176                                 event.
    177 
    178         """
    179         event_file = os.path.join(bootstat_dir,
    180                                   self._UPTIME_PREFIX) + eventname
    181         return self._parse_bootstat(event_file, 0)[index]
    182 
    183 
    184     def _parse_diskstat(self, eventname, bootstat_dir='/tmp', index=0):
    185         """Return sectors read since boot for a bootstat event.
    186 
    187         @param eventname        Name of the bootstat event.
    188         @param boostat_dir      Directory containing the bootstat files.
    189         @param index            Index of which occurrence of the event
    190                                 to select.
    191         @return                 Number of sectors read since boot for
    192                                 the selected event.
    193 
    194         """
    195         event_file = os.path.join(bootstat_dir,
    196                                   self._DISK_PREFIX) + eventname
    197         return self._parse_bootstat(event_file, 2)[index]
    198 
    199 
    200     def _gather_vboot_times(self, results):
    201         """Read and report firmware internal timestamps.
    202 
    203         The firmware for all Chrome platforms except Mario records
    204         the ticks since power on at selected times during startup.
    205         These timestamps can be extracted from the `crossystem`
    206         command.
    207 
    208         If the timestamps are available, convert the tick times to
    209         seconds and record the following keyvals in `results`:
    210           * seconds_power_on_to_lf_start
    211           * seconds_power_on_to_lf_end
    212           * seconds_power_on_to_lk_start
    213           * seconds_power_on_to_lk_end
    214 
    215         The frequency of the recorded tick timestamps is determined
    216         by reading `_CPU_FREQ_FILE` and is recorded in the keyval
    217         mhz_primary_cpu.
    218 
    219         @param results  Keyvals dictionary.
    220 
    221         """
    222         try:
    223             khz = int(utils.read_one_line(self._CPU_FREQ_FILE))
    224         except IOError:
    225             logging.info('Test is unable to read "%s", not calculating the '
    226                          'vboot times.', self._CPU_FREQ_FILE)
    227             return
    228         hertz = khz * 1000.0
    229         results['mhz_primary_cpu'] = khz / 1000.0
    230         try:
    231             out = utils.system_output('crossystem')
    232         except error.CmdError:
    233             logging.info('Unable to run crossystem, not calculating the vboot '
    234                          'times.')
    235             return
    236         # Parse the crossystem output, we are looking for vdat_timers
    237         items = out.splitlines()
    238         for item in items:
    239             times_re = re.compile(r'LF=(\d+),(\d+) LK=(\d+),(\d+)')
    240             match = re.findall(times_re, item)
    241             if match:
    242                 times = map(lambda s: round(float(s) / hertz, 2), match[0])
    243                 results['seconds_power_on_to_lf_start'] = times[0]
    244                 results['seconds_power_on_to_lf_end'] = times[1]
    245                 results['seconds_power_on_to_lk_start'] = times[2]
    246                 results['seconds_power_on_to_lk_end'] = times[3]
    247 
    248 
    249     def _gather_firmware_boot_time(self, results):
    250         """Read and report firmware startup time.
    251 
    252         The boot process writes the firmware startup time to the
    253         file named in `_FIRMWARE_TIME_FILE`.  Read the time from that
    254         file, and record it in `results` as the keyval
    255         seconds_power_on_to_kernel.
    256 
    257         @param results  Keyvals dictionary.
    258 
    259         """
    260         try:
    261             # If the firmware boot time is not available, the file
    262             # will not exist.
    263             data = utils.read_one_line(self._FIRMWARE_TIME_FILE)
    264         except IOError:
    265             return
    266         firmware_time = float(data)
    267         boot_time = results['seconds_kernel_to_login']
    268         results['seconds_power_on_to_kernel'] = firmware_time
    269         results['seconds_power_on_to_login'] = (
    270                 round(firmware_time + boot_time, 2))
    271 
    272 
    273     def _gather_time_keyvals(self, results):
    274         """Read and report boot time keyvals.
    275 
    276         Read "seconds since kernel startup" from the bootstat files
    277         for the events named in `_EVENT_KEYVALS`, and store the
    278         values as perf keyvals.  The following keyvals are recorded:
    279           * seconds_kernel_to_startup
    280           * seconds_kernel_to_startup_done
    281           * seconds_kernel_to_x_started
    282           * seconds_kernel_to_chrome_exec
    283           * seconds_kernel_to_chrome_main
    284           * seconds_kernel_to_login
    285           * seconds_kernel_to_network
    286         All of these keyvals are considered mandatory, except
    287         for seconds_kernel_to_network.
    288 
    289         @param results          Keyvals dictionary.
    290         @raises error.TestFail  Raised if any mandatory keyval can't
    291                                 be determined.
    292 
    293         """
    294         for keyval_name, event_name, needs_x, required in self._EVENT_KEYVALS:
    295             if needs_x and utils.is_freon():
    296                 continue
    297             key = 'seconds_' + keyval_name
    298             try:
    299                 results[key] = self._parse_uptime(event_name)
    300             except error.TestFail:
    301                 if required:
    302                     raise;
    303 
    304         # Not all 'uptime-network-*-ready' files necessarily exist;
    305         # probably there's only one.  We go through a list of
    306         # possibilities and pick the first one we find.  We're not
    307         # looking for 3G here, so we're not guaranteed to find any
    308         # file.
    309         network_ready_events = [
    310             'network-wifi-ready',
    311             'network-ethernet-ready'
    312         ]
    313 
    314         for event_name in network_ready_events:
    315             try:
    316                 network_time = self._parse_uptime(event_name)
    317                 results['seconds_kernel_to_network'] = network_time
    318                 break
    319             except error.TestFail:
    320                 pass
    321 
    322 
    323     def _gather_disk_keyvals(self, results):
    324         """Read and report disk read keyvals.
    325 
    326         Read "sectors read since kernel startup" from the bootstat
    327         files for the events named in `_EVENT_KEYVALS`, convert the
    328         values to "bytes read since boot", and store the values as
    329         perf keyvals.  The following keyvals are recorded:
    330           * rdbytes_kernel_to_startup
    331           * rdbytes_kernel_to_startup_done
    332           * rdbytes_kernel_to_x_started
    333           * rdbytes_kernel_to_chrome_exec
    334           * rdbytes_kernel_to_chrome_main
    335           * rdbytes_kernel_to_login
    336 
    337         Disk statistics are reported in units of 512 byte sectors;
    338         we convert the keyvals to bytes so that downstream consumers
    339         don't have to ask "How big is a sector?".
    340 
    341         @param results  Keyvals dictionary.
    342 
    343         """
    344         # We expect an error when reading disk statistics for the
    345         # "chrome-main" event because Chrome (not bootstat) generates
    346         # that event, and it doesn't include the disk statistics.
    347         # We get around that by ignoring all errors.
    348         for keyval_name, event_name, needs_x, required in self._EVENT_KEYVALS:
    349             if needs_x and utils.is_freon():
    350                 continue
    351             try:
    352                 key = 'rdbytes_' + keyval_name
    353                 results[key] = 512 * self._parse_diskstat(event_name)
    354             except Exception:
    355                 pass
    356 
    357 
    358     def _calculate_timeval(self, event, t0, t1, t_uptime):
    359         """Estimate the absolute time of a time since boot.
    360 
    361         Input values `event` and `t_uptime` are times measured as
    362         seconds since boot (for the same boot event, as from
    363         /proc/uptime).  The input values `t0` and `t1` are two
    364         values measured as seconds since the epoch.  The three "t"
    365         values were sampled in the order `t0`, `t_uptime`, `t1`.
    366 
    367         Estimate the time of `event` measured as seconds since the
    368         epoch.  Also estimate the worst-case error based on the time
    369         elapsed between `t0` and `t1`.
    370 
    371         All values are floats.  The precision of `event` and
    372         `t_uptime` is expected to be kernel jiffies (i.e. one
    373         centisecond).  The output result is rounded to the nearest
    374         jiffy.
    375 
    376         @param event    A time to be converted from "seconds since
    377                         boot" into "seconds since the epoch".
    378         @param t0       A reference time earlier than time `t1`, and
    379                         measured as "seconds since the epoch".
    380         @param t1       A reference time later than time `t0`, and
    381                         measured as "seconds since the epoch".
    382         @param t_uptime A reference time measured as "seconds since
    383                         boot", in between time `t0` and `t1`.
    384 
    385         @return         An estimate of the time of `event` measured
    386                         as seconds since the epoch, rounded to the
    387                         nearest jiffy.
    388 
    389         """
    390         # Floating point geeks may argue that these calculations
    391         # don't guarantee the promised precision:  I don't care;
    392         # it's good enough.
    393         boot_timeval = round((t0 + t1) / 2, 2) - t_uptime
    394         error = (t1 - t0) / 2
    395         return boot_timeval + event, error
    396 
    397 
    398     def _gather_reboot_keyvals(self, results):
    399         """Read and report shutdown and reboot times.
    400 
    401         The shutdown process saves all bootstat files in /var/log,
    402         plus it saves a timestamp file that can be used to convert
    403         "time since boot" into times in UTC.  Read the saved files
    404         from the most recent shutdown, and use them to calculate
    405         the time spent from the start of that shutdown until the
    406         completion of the most recent boot.  Record these keyvals:
    407           * seconds_shutdown_time
    408           * seconds_reboot_time
    409           * seconds_reboot_error
    410 
    411         @param results  Keyvals dictionary.
    412 
    413         """
    414         bootstat_archives = glob.glob(self._BOOTSTAT_ARCHIVE_GLOB)
    415         if not bootstat_archives:
    416             return
    417         bootstat_dir = max(bootstat_archives)
    418         boot_id = open("/proc/sys/kernel/random/boot_id", "r").read()
    419         didrun_path = os.path.join(bootstat_dir, "bootperf_ran")
    420         if not os.path.exists(didrun_path):
    421             with open(didrun_path, "w") as didrun:
    422                 didrun.write(boot_id)
    423         elif open(didrun_path, "r").read() != boot_id:
    424             logging.warning("Ignoring reboot based on stale shutdown %s",
    425                          os.path.basename(bootstat_dir))
    426             return
    427         timestamp_path = os.path.join(bootstat_dir, 'timestamp')
    428         try:
    429             with open(timestamp_path) as timestamp:
    430                 archive_t0 = float(timestamp.readline())
    431                 archive_t1 = float(timestamp.readline())
    432         except IOError:
    433             raise error.TestFail('Failed to read "%s"' % timestamp_path)
    434         archive_uptime = self._parse_uptime('archive',
    435                                             bootstat_dir=bootstat_dir)
    436         shutdown_uptime = self._parse_uptime('ui-post-stop',
    437                                              bootstat_dir=bootstat_dir,
    438                                              index=-1)
    439         shutdown_timeval, shutdown_error = self._calculate_timeval(
    440                 shutdown_uptime, archive_t0, archive_t1, archive_uptime)
    441         boot_t0 = time.time()
    442         with open('/proc/uptime') as uptime_file:
    443             uptime = float(uptime_file.readline().split()[0])
    444         boot_t1 = time.time()
    445         boot_timeval, boot_error = self._calculate_timeval(
    446                 results['seconds_kernel_to_login'],
    447                 boot_t0, boot_t1, uptime)
    448         reboot_time = round(boot_timeval - shutdown_timeval, 2)
    449         poweron_time = results['seconds_power_on_to_login']
    450         shutdown_time = round(reboot_time - poweron_time, 2)
    451         results['seconds_reboot_time'] = reboot_time
    452         results['seconds_reboot_error'] = shutdown_error + boot_error
    453         results['seconds_shutdown_time'] = shutdown_time
    454 
    455 
    456     def run_once(self):
    457         """Gather boot time statistics.
    458 
    459         Every shutdown and boot creates `bootstat` files with
    460         summary statistics for time elapsed and disk usage.  Gather
    461         the values reported for shutdown, boot time and network
    462         startup time, and record them as perf keyvals.
    463 
    464         Additionally, gather information about firmware startup time
    465         from various sources, and record the times as perf keyvals.
    466 
    467         Finally, copy the raw data files to the results directory
    468         for reference.
    469 
    470         """
    471         # `results` is the keyvals dictionary
    472         results = {}
    473 
    474         self._gather_time_keyvals(results)
    475         self._gather_disk_keyvals(results)
    476         self._gather_firmware_boot_time(results)
    477         self._gather_vboot_times(results)
    478         self._gather_reboot_keyvals(results)
    479 
    480         self._copy_timestamp_files()
    481         self._copy_console_ramoops()
    482 
    483         self.write_perf_keyval(results)
    484