1 # Copyright 2015 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 # pylint: disable=module-missing-docstring 6 # pylint: disable=docstring-section-name 7 8 import csv 9 import glob 10 import httplib 11 import json 12 import logging 13 import os 14 import re 15 import shutil 16 import time 17 import urllib 18 import urllib2 19 20 from autotest_lib.client.bin import test 21 from autotest_lib.client.bin import utils 22 from autotest_lib.client.common_lib import error 23 from autotest_lib.client.cros import constants 24 25 # TODO(scunningham): Return to 72000 (20 hrs) after server-side stabilizes. 26 TEST_DURATION = 10800 # Duration of test (3 hrs) in seconds. 27 SAMPLE_INTERVAL = 60 # Length of measurement samples in seconds. 28 METRIC_INTERVAL = 3600 # Length between metric calculation in seconds. 29 STABILIZATION_DURATION = 60 # Time for test stabilization in seconds. 30 TMP_DIRECTORY = '/tmp/' 31 EXIT_FLAG_FILE = TMP_DIRECTORY + 'longevity_terminate' 32 PERF_FILE_NAME_PREFIX = 'perf' 33 OLD_FILE_AGE = 14400 # Age of old files to be deleted in minutes = 10 days. 34 # The manifest.json file for a Chrome Extension contains the app name, id, 35 # version, and other app info. It is accessible by the OS only when the app 36 # is running, and thus it's cryptohome directory mounted. Only one Kiosk app 37 # can be running at a time. 38 MANIFEST_PATTERN = '/home/.shadow/*/mount/user/Extensions/%s/*/manifest.json' 39 VERSION_PATTERN = r'^(\d+)\.(\d+)\.(\d+)\.(\d+)$' 40 DASHBOARD_UPLOAD_URL = 'https://chromeperf.appspot.com/add_point' 41 42 43 class PerfUploadingError(Exception): 44 """Exception raised in perf_uploader.""" 45 pass 46 47 48 class longevity_Tracker(test.test): 49 """Monitor device and App stability over long periods of time.""" 50 51 version = 1 52 53 def initialize(self): 54 self.temp_dir = os.path.split(self.tmpdir)[0] 55 56 def _get_cpu_usage(self): 57 """Compute percent CPU in active use over the sample interval. 58 59 Note: This method introduces a sleep period into the test, equal to 60 90% of the sample interval. 61 62 @returns float of percent active use of CPU. 63 """ 64 # Time between measurements is ~90% of the sample interval. 65 measurement_time_delta = SAMPLE_INTERVAL * 0.90 66 cpu_usage_start = utils.get_cpu_usage() 67 time.sleep(measurement_time_delta) 68 cpu_usage_end = utils.get_cpu_usage() 69 return utils.compute_active_cpu_time(cpu_usage_start, 70 cpu_usage_end) * 100 71 72 def _get_mem_usage(self): 73 """Compute percent memory in active use. 74 75 @returns float of percent memory in use. 76 """ 77 total_memory = utils.get_mem_total() 78 free_memory = utils.get_mem_free() 79 return ((total_memory - free_memory) / total_memory) * 100 80 81 def _get_max_temperature(self): 82 """Get temperature of hottest sensor in Celsius. 83 84 @returns float of temperature of hottest sensor. 85 """ 86 temperature = utils.get_current_temperature_max() 87 if not temperature: 88 temperature = 0 89 return temperature 90 91 def _get_hwid(self): 92 """Get hwid of test device, e.g., 'WOLF C4A-B2B-A47'. 93 94 @returns string of hwid (Hardware ID) of device under test. 95 """ 96 with os.popen('crossystem hwid 2>/dev/null', 'r') as hwid_proc: 97 hwid = hwid_proc.read() 98 if not hwid: 99 hwid = 'undefined' 100 return hwid 101 102 def elapsed_time(self, mark_time): 103 """Get time elapsed since |mark_time|. 104 105 @param mark_time: point in time from which elapsed time is measured. 106 @returns time elapsed since the marked time. 107 """ 108 return time.time() - mark_time 109 110 def modulo_time(self, timer, interval): 111 """Get time eplased on |timer| for the |interval| modulus. 112 113 Value returned is used to adjust the timer so that it is synchronized 114 with the current interval. 115 116 @param timer: time on timer, in seconds. 117 @param interval: period of time in seconds. 118 @returns time elapsed from the start of the current interval. 119 """ 120 return timer % int(interval) 121 122 def syncup_time(self, timer, interval): 123 """Get time remaining on |timer| for the |interval| modulus. 124 125 Value returned is used to induce sleep just long enough to put the 126 process back in sync with the timer. 127 128 @param timer: time on timer, in seconds. 129 @param interval: period of time in seconds. 130 @returns time remaining till the end of the current interval. 131 """ 132 return interval - (timer % int(interval)) 133 134 def _record_perf_measurements(self, perf_values, perf_writer): 135 """Record attribute performance measurements, and write to file. 136 137 @param perf_values: dict of attribute performance values. 138 @param perf_writer: file to write performance measurements. 139 """ 140 # Get performance measurements. 141 cpu_usage = '%.3f' % self._get_cpu_usage() 142 mem_usage = '%.3f' % self._get_mem_usage() 143 max_temp = '%.3f' % self._get_max_temperature() 144 145 # Append measurements to attribute lists in perf values dictionary. 146 perf_values['cpu'].append(cpu_usage) 147 perf_values['mem'].append(mem_usage) 148 perf_values['temp'].append(max_temp) 149 150 # Write performance measurements to perf timestamped file. 151 time_stamp = time.strftime('%Y/%m/%d %H:%M:%S') 152 perf_writer.writerow([time_stamp, cpu_usage, mem_usage, max_temp]) 153 logging.info('Time: %s, CPU: %s, Mem: %s, Temp: %s', 154 time_stamp, cpu_usage, mem_usage, max_temp) 155 156 def _record_90th_metrics(self, perf_values, perf_metrics): 157 """Record 90th percentile metric of attribute performance values. 158 159 @param perf_values: dict attribute performance values. 160 @param perf_metrics: dict attribute 90%-ile performance metrics. 161 """ 162 # Calculate 90th percentile for each attribute. 163 cpu_values = perf_values['cpu'] 164 mem_values = perf_values['mem'] 165 temp_values = perf_values['temp'] 166 cpu_metric = sorted(cpu_values)[(len(cpu_values) * 9) // 10] 167 mem_metric = sorted(mem_values)[(len(mem_values) * 9) // 10] 168 temp_metric = sorted(temp_values)[(len(temp_values) * 9) // 10] 169 logging.info('== Performance values: %s', perf_values) 170 logging.info('== 90th percentile: cpu: %s, mem: %s, temp: %s', 171 cpu_metric, mem_metric, temp_metric) 172 173 # Append 90th percentile to each attribute performance metric. 174 perf_metrics['cpu'].append(cpu_metric) 175 perf_metrics['mem'].append(mem_metric) 176 perf_metrics['temp'].append(temp_metric) 177 178 def _get_median_metrics(self, metrics): 179 """Returns median of each attribute performance metric. 180 181 If no metric values were recorded, return 0 for each metric. 182 183 @param metrics: dict of attribute performance metric lists. 184 @returns dict of attribute performance metric medians. 185 """ 186 if len(metrics['cpu']): 187 cpu_metric = sorted(metrics['cpu'])[len(metrics['cpu']) // 2] 188 mem_metric = sorted(metrics['mem'])[len(metrics['mem']) // 2] 189 temp_metric = sorted(metrics['temp'])[len(metrics['temp']) // 2] 190 else: 191 cpu_metric = 0 192 mem_metric = 0 193 temp_metric = 0 194 logging.info('== Median: cpu: %s, mem: %s, temp: %s', 195 cpu_metric, mem_metric, temp_metric) 196 return {'cpu': cpu_metric, 'mem': mem_metric, 'temp': temp_metric} 197 198 def _append_to_aggregated_file(self, ts_file, ag_file): 199 """Append contents of perf timestamp file to perf aggregated file. 200 201 @param ts_file: file handle for performance timestamped file. 202 @param ag_file: file handle for performance aggregated file. 203 """ 204 next(ts_file) # Skip fist line (the header) of timestamped file. 205 for line in ts_file: 206 ag_file.write(line) 207 208 def _copy_aggregated_to_resultsdir(self, aggregated_fpath): 209 """Copy perf aggregated file to results dir for AutoTest results. 210 211 Note: The AutoTest results default directory is located at /usr/local/ 212 autotest/results/default/longevity_Tracker/results 213 214 @param aggregated_fpath: file path to Aggregated performance values. 215 """ 216 results_fpath = os.path.join(self.resultsdir, 'perf.csv') 217 shutil.copy(aggregated_fpath, results_fpath) 218 logging.info('Copied %s to %s)', aggregated_fpath, results_fpath) 219 220 def _write_perf_keyvals(self, perf_results): 221 """Write perf results to keyval file for AutoTest results. 222 223 @param perf_results: dict of attribute performance metrics. 224 """ 225 perf_keyval = {} 226 perf_keyval['cpu_usage'] = perf_results['cpu'] 227 perf_keyval['memory_usage'] = perf_results['mem'] 228 perf_keyval['temperature'] = perf_results['temp'] 229 self.write_perf_keyval(perf_keyval) 230 231 def _write_perf_results(self, perf_results): 232 """Write perf results to results-chart.json file for Perf Dashboard. 233 234 @param perf_results: dict of attribute performance metrics. 235 """ 236 cpu_metric = perf_results['cpu'] 237 mem_metric = perf_results['mem'] 238 ec_metric = perf_results['temp'] 239 self.output_perf_value(description='cpu_usage', value=cpu_metric, 240 units='%', higher_is_better=False) 241 self.output_perf_value(description='mem_usage', value=mem_metric, 242 units='%', higher_is_better=False) 243 self.output_perf_value(description='max_temp', value=ec_metric, 244 units='Celsius', higher_is_better=False) 245 246 def _read_perf_results(self): 247 """Read perf results from results-chart.json file for Perf Dashboard. 248 249 @returns dict of perf results, formatted as JSON chart data. 250 """ 251 results_file = os.path.join(self.resultsdir, 'results-chart.json') 252 with open(results_file, 'r') as fp: 253 contents = fp.read() 254 chart_data = json.loads(contents) 255 return chart_data 256 257 def _get_point_id(self, cros_version, epoch_minutes): 258 """Compute point ID from ChromeOS version number and epoch minutes. 259 260 @param cros_version: String of ChromeOS version number. 261 @param epoch_minutes: String of minutes since 1970. 262 263 @return unique integer ID computed from given version and epoch. 264 """ 265 # Number of digits from each part of the Chrome OS version string. 266 cros_version_col_widths = [0, 4, 3, 2] 267 268 def get_digits(version_num, column_widths): 269 if re.match(VERSION_PATTERN, version_num): 270 computed_string = '' 271 version_parts = version_num.split('.') 272 for i, version_part in enumerate(version_parts): 273 if column_widths[i]: 274 computed_string += version_part.zfill(column_widths[i]) 275 return computed_string 276 else: 277 return None 278 279 cros_digits = get_digits(cros_version, cros_version_col_widths) 280 epoch_digits = epoch_minutes[-8:] 281 if not cros_digits: 282 return None 283 return int(epoch_digits + cros_digits) 284 285 def _get_kiosk_app_info(self, app_id): 286 """Get kiosk app name and version from manifest.json file. 287 288 Get the Kiosk App name and version strings from the manifest file of 289 the specified |app_id| Extension in the currently running session. If 290 |app_id| is empty or None, then return 'none' for the kiosk app info. 291 292 Raise an error if no manifest is found (ie, |app_id| is not running), 293 or if multiple manifest files are found (ie, |app_id| is running, but 294 the |app_id| dir contains multiple versions or manifest files). 295 296 @param app_id: string kiosk application identification. 297 @returns dict of Kiosk name and version number strings. 298 @raises: An error.TestError if single manifest is not found. 299 """ 300 kiosk_app_info = {'name': 'none', 'version': 'none'} 301 if not app_id: 302 return kiosk_app_info 303 304 # Get path to manifest file of the running Kiosk app_id. 305 app_manifest_pattern = (MANIFEST_PATTERN % app_id) 306 logging.info('app_manifest_pattern: %s', app_manifest_pattern) 307 file_paths = glob.glob(app_manifest_pattern) 308 # Raise error if current session has no Kiosk Apps running. 309 if len(file_paths) == 0: 310 raise error.TestError('Kiosk App ID=%s is not running.' % app_id) 311 # Raise error if running Kiosk App has multiple manifest files. 312 if len(file_paths) > 1: 313 raise error.TestError('Kiosk App ID=%s has multiple manifest ' 314 'files.' % app_id) 315 kiosk_manifest = open(file_paths[0], 'r').read() 316 manifest_json = json.loads(kiosk_manifest) 317 # If manifest is missing name or version key, set to 'undefined'. 318 kiosk_app_info['name'] = manifest_json.get('name', 'undefined') 319 kiosk_app_info['version'] = manifest_json.get('version', 'undefined') 320 return kiosk_app_info 321 322 def _format_data_for_upload(self, chart_data): 323 """Collect chart data into an uploadable data JSON object. 324 325 @param chart_data: performance results formatted as chart data. 326 """ 327 perf_values = { 328 'format_version': '1.0', 329 'benchmark_name': self.test_suite_name, 330 'charts': chart_data, 331 } 332 333 dash_entry = { 334 'master': 'ChromeOS_Enterprise', 335 'bot': 'cros-%s' % self.board_name, 336 'point_id': self.point_id, 337 'versions': { 338 'cros_version': self.chromeos_version, 339 'chrome_version': self.chrome_version, 340 }, 341 'supplemental': { 342 'default_rev': 'r_cros_version', 343 'hardware_identifier': 'a_' + self.hw_id, 344 'kiosk_app_name': 'a_' + self.kiosk_app_name, 345 'kiosk_app_version': 'r_' + self.kiosk_app_version 346 }, 347 'chart_data': perf_values 348 } 349 return {'data': json.dumps(dash_entry)} 350 351 def _send_to_dashboard(self, data_obj): 352 """Send formatted perf data to the perf dashboard. 353 354 @param data_obj: data object as returned by _format_data_for_upload(). 355 356 @raises PerfUploadingError if an exception was raised when uploading. 357 """ 358 logging.debug('data_obj: %s', data_obj) 359 encoded = urllib.urlencode(data_obj) 360 req = urllib2.Request(DASHBOARD_UPLOAD_URL, encoded) 361 try: 362 urllib2.urlopen(req) 363 except urllib2.HTTPError as e: 364 raise PerfUploadingError('HTTPError: %d %s for JSON %s\n' % 365 (e.code, e.msg, data_obj['data'])) 366 except urllib2.URLError as e: 367 raise PerfUploadingError('URLError: %s for JSON %s\n' % 368 (str(e.reason), data_obj['data'])) 369 except httplib.HTTPException: 370 raise PerfUploadingError('HTTPException for JSON %s\n' % 371 data_obj['data']) 372 373 def _get_chrome_version(self): 374 """Get the Chrome version number and milestone as strings. 375 376 Invoke "chrome --version" to get the version number and milestone. 377 378 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the 379 current Chrome version number as a string (in the form "W.X.Y.Z") 380 and "milestone" is the first component of the version number 381 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed 382 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output 383 of "chrome --version" and the milestone will be the empty string. 384 """ 385 chrome_version = utils.system_output(constants.CHROME_VERSION_COMMAND, 386 ignore_status=True) 387 chrome_version = utils.parse_chrome_version(chrome_version) 388 return chrome_version 389 390 def _open_perf_file(self, file_path): 391 """Open a perf file. Write header line if new. Return file object. 392 393 If the file on |file_path| already exists, then open file for 394 appending only. Otherwise open for writing only. 395 396 @param file_path: file path for perf file. 397 @returns file object for the perf file. 398 """ 399 # If file exists, open it for appending. Do not write header. 400 if os.path.isfile(file_path): 401 perf_file = open(file_path, 'a+') 402 # Otherwise, create it for writing. Write header on first line. 403 else: 404 perf_file = open(file_path, 'w') # Erase if existing file. 405 perf_file.write('Time,CPU,Memory,Temperature (C)\r\n') 406 return perf_file 407 408 def _run_test_cycle(self): 409 """Track performance of Chrome OS over a long period of time. 410 411 This method collects performance measurements, and calculates metrics 412 to upload to the performance dashboard. It creates two files to 413 collect and store performance values and results: perf_<timestamp>.csv 414 and perf_aggregated.csv. 415 416 At the start, it creates a unique perf timestamped file in the test's 417 temp_dir. As the cycle runs, it saves a time-stamped performance 418 value after each sample interval. Periodically, it calculates 419 the 90th percentile performance metrics from these values. 420 421 The perf_<timestamp> files on the device will survive multiple runs 422 of the longevity_Tracker by the server-side test, and will also 423 survive multiple runs of the server-side test. The script will 424 delete them after 10 days, to prevent filling up the SSD. 425 426 At the end, it opens the perf aggregated file in the test's temp_dir, 427 and appends the contents of the perf timestamped file. It then 428 copies the perf aggregated file to the results directory as perf.csv. 429 This perf.csv file will be consumed by the AutoTest backend when the 430 server-side test ends. 431 432 Note that the perf_aggregated.csv file will grow larger with each run 433 of longevity_Tracker on the device by the server-side test. However, 434 the server-side test will delete file in the end. 435 436 This method also calculates 90th percentile and median metrics, and 437 returns the median metrics. Median metrics will be pushed to the perf 438 dashboard with a unique point_id. 439 440 @returns list of median performance metrics. 441 """ 442 # Allow system to stabilize before start taking measurements. 443 test_start_time = time.time() 444 time.sleep(STABILIZATION_DURATION) 445 446 perf_values = {'cpu': [], 'mem': [], 'temp': []} 447 perf_metrics = {'cpu': [], 'mem': [], 'temp': []} 448 449 # Create perf_<timestamp> file and writer. 450 timestamp_fname = (PERF_FILE_NAME_PREFIX + 451 time.strftime('_%Y-%m-%d_%H-%M') + '.csv') 452 timestamp_fpath = os.path.join(self.temp_dir, timestamp_fname) 453 timestamp_file = self._open_perf_file(timestamp_fpath) 454 timestamp_writer = csv.writer(timestamp_file) 455 456 # Align time of loop start with the sample interval. 457 test_elapsed_time = self.elapsed_time(test_start_time) 458 time.sleep(self.syncup_time(test_elapsed_time, SAMPLE_INTERVAL)) 459 test_elapsed_time = self.elapsed_time(test_start_time) 460 461 metric_start_time = time.time() 462 metric_prev_time = metric_start_time 463 464 metric_elapsed_prev_time = self.elapsed_time(metric_prev_time) 465 offset = self.modulo_time(metric_elapsed_prev_time, METRIC_INTERVAL) 466 metric_timer = metric_elapsed_prev_time + offset 467 while self.elapsed_time(test_start_time) <= TEST_DURATION: 468 if os.path.isfile(EXIT_FLAG_FILE): 469 logging.info('Exit flag file detected. Exiting test.') 470 break 471 self._record_perf_measurements(perf_values, timestamp_writer) 472 473 # Periodically calculate and record 90th percentile metrics. 474 metric_elapsed_prev_time = self.elapsed_time(metric_prev_time) 475 metric_timer = metric_elapsed_prev_time + offset 476 if metric_timer >= METRIC_INTERVAL: 477 self._record_90th_metrics(perf_values, perf_metrics) 478 perf_values = {'cpu': [], 'mem': [], 'temp': []} 479 480 # Set previous time to current time. 481 metric_prev_time = time.time() 482 metric_elapsed_prev_time = self.elapsed_time(metric_prev_time) 483 484 # Calculate offset based on the original start time. 485 metric_elapsed_time = self.elapsed_time(metric_start_time) 486 offset = self.modulo_time(metric_elapsed_time, METRIC_INTERVAL) 487 488 # Set the timer to time elapsed plus offset to next interval. 489 metric_timer = metric_elapsed_prev_time + offset 490 491 # Sync the loop time to the sample interval. 492 test_elapsed_time = self.elapsed_time(test_start_time) 493 time.sleep(self.syncup_time(test_elapsed_time, SAMPLE_INTERVAL)) 494 495 # Close perf timestamp file. 496 timestamp_file.close() 497 498 # Open perf timestamp file to read, and aggregated file to append. 499 timestamp_file = open(timestamp_fpath, 'r') 500 aggregated_fname = (PERF_FILE_NAME_PREFIX + '_aggregated.csv') 501 aggregated_fpath = os.path.join(self.temp_dir, aggregated_fname) 502 aggregated_file = self._open_perf_file(aggregated_fpath) 503 504 # Append contents of perf timestamp file to perf aggregated file. 505 self._append_to_aggregated_file(timestamp_file, aggregated_file) 506 timestamp_file.close() 507 aggregated_file.close() 508 509 # Copy perf aggregated file to test results directory. 510 self._copy_aggregated_to_resultsdir(aggregated_fpath) 511 512 # Return median of each attribute performance metric. 513 return self._get_median_metrics(perf_metrics) 514 515 def run_once(self, kiosk_app_attributes=None): 516 if kiosk_app_attributes: 517 app_name, app_id, ext_page = ( 518 kiosk_app_attributes.rstrip().split(':')) 519 self.subtest_name = app_name 520 self.board_name = utils.get_board() 521 self.hw_id = self._get_hwid() 522 self.chrome_version = self._get_chrome_version()[0] 523 self.chromeos_version = '0.' + utils.get_chromeos_release_version() 524 self.epoch_minutes = str(int(time.time() / 60)) # Minutes since 1970. 525 self.point_id = self._get_point_id(self.chromeos_version, 526 self.epoch_minutes) 527 528 kiosk_info = self._get_kiosk_app_info(app_id) 529 self.kiosk_app_name = kiosk_info['name'] 530 self.kiosk_app_version = kiosk_info['version'] 531 self.test_suite_name = self.tagged_testname 532 if self.subtest_name: 533 self.test_suite_name += '.' + self.subtest_name 534 535 # Delete exit flag file at start of test run. 536 if os.path.isfile(EXIT_FLAG_FILE): 537 os.remove(EXIT_FLAG_FILE) 538 539 # Run a single test cycle. 540 self.perf_results = {'cpu': '0', 'mem': '0', 'temp': '0'} 541 self.perf_results = self._run_test_cycle() 542 543 # Write results for AutoTest to pick up at end of test. 544 self._write_perf_keyvals(self.perf_results) 545 self._write_perf_results(self.perf_results) 546 547 # Post perf results directly to performance dashboard. You may view 548 # uploaded data at https://chromeperf.appspot.com/new_points, 549 # with test path pattern=ChromeOS_Enterprise/cros-*/longevity*/* 550 chart_data = self._read_perf_results() 551 data_obj = self._format_data_for_upload(chart_data) 552 self._send_to_dashboard(data_obj) 553 554 def cleanup(self): 555 """Delete aged perf data files and the exit flag file.""" 556 cmd = ('find %s -name %s* -type f -mmin +%s -delete' % 557 (self.temp_dir, PERF_FILE_NAME_PREFIX, OLD_FILE_AGE)) 558 os.system(cmd) 559 if os.path.isfile(EXIT_FLAG_FILE): 560 os.remove(EXIT_FLAG_FILE) 561