site_tests/graphics_PiglitBVT/generate_scripts.py

#!/usr/bin/python
# Copyright 2014 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import print_function
from collections import namedtuple
import json, os, re, sys

AUTOTEST_NAME = 'graphics_PiglitBVT'
INPUT_DIR = './piglit_logs/'
OUTPUT_DIR = './test_scripts/'
OUTPUT_FILE_PATTERN = OUTPUT_DIR + '/%s/' + AUTOTEST_NAME + '_%d.sh'
OUTPUT_FILE_SLICES = 20
PIGLIT_PATH = '/usr/local/piglit/lib/piglit/'
PIGLIT64_PATH = '/usr/local/piglit/lib64/piglit/'

# Do not generate scripts with "bash -e" as we want to handle errors ourself.
FILE_HEADER = '#!/bin/bash\n\n'

# Script fragment function that kicks off individual piglit tests.
FILE_RUN_TEST = '\n\
function run_test()\n\
{\n\
  local name="$1"\n\
  local time="$2"\n\
  local command="$3"\n\
  echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"\n\
  echo "+ Running test [$name] of expected runtime $time sec: [$command]"\n\
  sync\n\
  $command\n\
  if [ $? == 0 ] ; then\n\
    let "need_pass--"\n\
    echo "+ pass :: $name"\n\
  else\n\
    let "failures++"\n\
    echo "+ fail :: $name"\n\
  fi\n\
}\n\
'

# Script fragment that sumarizes the overall status.
FILE_SUMMARY = 'popd\n\
\n\
if [ $need_pass == 0 ] ; then\n\
  echo "+---------------------------------------------+"\n\
  echo "| Overall pass, as all %d tests have passed. |"\n\
  echo "+---------------------------------------------+"\n\
else\n\
  echo "+-----------------------------------------------------------+"\n\
  echo "| Overall failure, as $need_pass tests did not pass and $failures failed. |"\n\
  echo "+-----------------------------------------------------------+"\n\
fi\n\
exit $need_pass\n\
'

# Control file template for executing a slice.
CONTROL_FILE = "\
# Copyright 2014 The Chromium OS Authors. All rights reserved.\n\
# Use of this source code is governed by a BSD-style license that can be\n\
# found in the LICENSE file.\n\
\n\
NAME = '" + AUTOTEST_NAME + "'\n\
AUTHOR = 'chromeos-gfx'\n\
PURPOSE = 'Collection of automated tests for OpenGL implementations.'\n\
CRITERIA = 'All tests in a slice have to pass, otherwise it will fail.'\n\
TIME='SHORT'\n\
TEST_CATEGORY = 'Functional'\n\
TEST_CLASS = 'graphics'\n\
TEST_TYPE = 'client'\n\
JOB_RETRIES = 2\n\
\n\
BUG_TEMPLATE = {\n\
    'labels': ['Cr-OS-Kernel-Graphics'],\n\
}\n\
\n\
DOC = \"\"\"\n\
Piglit is a collection of automated tests for OpenGL implementations.\n\
\n\
The goal of Piglit is to help improve the quality of open source OpenGL drivers\n\
by providing developers with a simple means to perform regression tests.\n\
\n\
This control file runs slice %d out of %d slices of a passing subset of the\n\
original collection.\n\
\n\
http://piglit.freedesktop.org\n\
\"\"\"\n\
\n\
job.run_test('" + AUTOTEST_NAME + "', test_slice=%d)\
"

def output_control_file(sl, slices):
  """
  Write control file for slice sl to disk.
  """
  filename = 'control.%d' % sl
  with open(filename, 'w+') as f:
    print(CONTROL_FILE % (sl, slices, sl), file=f)


def append_script_header(f, need_pass, piglit_path):
  """
  Write the beginning of the test script to f.
  """
  print(FILE_HEADER, file=f)
  # need_pass is the script variable that counts down to zero and gets returned.
  print('need_pass=%d' % need_pass, file=f)
  print('failures=0', file=f)
  print('PIGLIT_PATH=%s' % piglit_path, file=f)
  print('export PIGLIT_SOURCE_DIR=%s' % piglit_path, file=f)
  print('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PIGLIT_PATH/lib', file=f)
  print('export DISPLAY=:0', file=f)
  print('export XAUTHORITY=/home/chronos/.Xauthority', file=f)
  print('', file=f)
  print(FILE_RUN_TEST, file=f)
  print('', file=f)
  print('pushd $PIGLIT_PATH', file=f)


def append_script_summary(f, need_pass):
  """
  Append the summary to the test script f with a required pass count.
  """
  print(FILE_SUMMARY % need_pass, file=f)


def mkdir_p(path):
  """
  Create all directories in path.
  """
  try:
    os.makedirs(path)
  except OSError:
    if os.path.isdir(path):
      pass
    else:
      raise

def get_filepaths(family_root, regex):
  """
  Find all files that were placed into family_root.
  Used to find regular log files (*results.json) and expectations*.json.
  """
  main_files = []
  for root, _, files in os.walk(family_root):
    for filename in files:
      if re.search(regex, filename):
        main_files.append(os.path.join(root, filename))
  return main_files


def load_files(main_files):
  """
  The log files are just python dictionaries, load them from disk.
  """
  d = {}
  for main_file in main_files:
    d[main_file] = json.loads(open(main_file).read())
  return d


# Define a Test data structure containing the command line and runtime.
Test = namedtuple('Test', 'command time passing_count not_passing_count')

def get_test_statistics(log_dict):
  """
  Figures out for each test how often is passed/failed, the command line and
  how long it runs.
  """
  statistics = {}
  for main_file in log_dict:
    for test in log_dict[main_file]['tests']:
      # Initialize for all known test names to zero stats.
      statistics[test] = Test(None, 0.0, 0, 0)

  for main_file in log_dict:
    print('Updating statistics from %s.' % main_file, file=sys.stderr)
    tests = log_dict[main_file]['tests']
    for test in tests:
      command = statistics[test].command
      # Verify that each board uses the same command.
      if 'command' in tests[test]:
        if command:
          assert(command == tests[test]['command'])
        else:
          command = tests[test]['command']
      # Bump counts.
      if tests[test]['result'] == 'pass':
        statistics[test] = Test(command,
                                max(tests[test]['time'],
                                    statistics[test].time),
                                statistics[test].passing_count + 1,
                                statistics[test].not_passing_count)
      else:
        statistics[test] = Test(command,
                                statistics[test].time,
                                statistics[test].passing_count,
                                statistics[test].not_passing_count + 1)

  return statistics


def get_max_passing(statistics):
  """
  Gets the maximum count of passes a test has.
  """
  max_passing_count = 0
  for test in statistics:
    max_passing_count = max(statistics[test].passing_count, max_passing_count)
  return max_passing_count


def get_passing_tests(statistics, expectations):
  """
  Gets a list of all tests that never failed and have a maximum pass count.
  """
  tests = []
  max_passing_count = get_max_passing(statistics)
  for test in statistics:
    if (statistics[test].passing_count == max_passing_count and
        statistics[test].not_passing_count == 0):
      if test not in expectations:
        tests.append(test)
  return sorted(tests)


def get_intermittent_tests(statistics):
  """
  Gets tests that failed at least once and passed at least once.
  """
  tests = []
  max_passing_count = get_max_passing(statistics)
  for test in statistics:
    if (statistics[test].passing_count > 0 and
        statistics[test].passing_count < max_passing_count and
        statistics[test].not_passing_count > 0):
      tests.append(test)
  return sorted(tests)


def cleanup_command(cmd, piglit_path):
  """
  Make script less location dependent by stripping path from commands.
  """
  cmd = cmd.replace(piglit_path, '')
  cmd = cmd.replace('framework/../', '')
  cmd = cmd.replace('tests/../', '')
  return cmd

def process_gpu_family(family, family_root):
  """
  This takes a directory with log files from the same gpu family and processes
  the result log into |slices| runable scripts.
  """
  print('--> Processing "%s".' % family, file=sys.stderr)
  piglit_path = PIGLIT_PATH
  if family == 'other':
    piglit_path = PIGLIT64_PATH

  log_dict = load_files(get_filepaths(family_root, 'results\.json$'))
  # Load all expectations but ignore suggested.
  exp_dict = load_files(get_filepaths(family_root, 'expectations.*\.json$'))
  statistics = get_test_statistics(log_dict)
  expectations = compute_expectations(exp_dict, statistics, family, piglit_path)
  # Try to help the person updating piglit by collecting the variance
  # across different log files into one expectations file per family.
  output_suggested_expectations(expectations, family, family_root)

  # Now start computing the new test scripts.
  passing_tests = get_passing_tests(statistics, expectations)

  slices = OUTPUT_FILE_SLICES
  current_slice = 1
  slice_tests = []
  time_slice = 0
  num_processed = 0
  num_pass_total = len(passing_tests)
  time_total = 0
  for test in passing_tests:
    time_total += statistics[test].time

  # Generate one script containing all tests. This can be used as a simpler way
  # to run everything, but also to have an easier diff when updating piglit.
  filename = OUTPUT_FILE_PATTERN % (family, 0)
  # Ensure the output directory for this family exists.
  mkdir_p(os.path.dirname(os.path.realpath(filename)))
  if passing_tests:
    with open(filename, 'w+') as f:
      append_script_header(f, num_pass_total, piglit_path)
      for test in passing_tests:
        cmd = cleanup_command(statistics[test].command, piglit_path)
        time_test = statistics[test].time
        print('run_test "%s" %.1f "%s"' % (test, 0.0, cmd), file=f)
      append_script_summary(f, num_pass_total)

  # Slice passing tests into several pieces to get below BVT's 20 minute limit.
  # TODO(ihf): If we ever get into the situation that one test takes more than
  # time_total / slice we would get an empty slice afterward. Fortunately the
  # stderr spew should warn the operator of this.
  for test in passing_tests:
    # We are still writing all the tests that belong in the current slice.
    if time_slice < time_total / slices:
      slice_tests.append(test)
      time_test = statistics[test].time
      time_slice += time_test
      num_processed += 1

    # We finished the slice. Now output the file with all tests in this slice.
    if time_slice >= time_total / slices or num_processed == num_pass_total:
      filename = OUTPUT_FILE_PATTERN % (family, current_slice)
      with open(filename, 'w+') as f:
        need_pass = len(slice_tests)
        append_script_header(f, need_pass, piglit_path)
        for test in slice_tests:
          # Make script less location dependent by stripping path from commands.
          cmd = cleanup_command(statistics[test].command, piglit_path)
          time_test = statistics[test].time
          # TODO(ihf): Pass proper time_test instead of 0.0 once we can use it.
          print('run_test "%s" %.1f "%s"'
                % (test, 0.0, cmd), file=f)
        append_script_summary(f, need_pass)
        output_control_file(current_slice, slices)

      print('Slice %d: max runtime for %d passing tests is %.1f seconds.'
            % (current_slice, need_pass, time_slice), file=sys.stderr)
      current_slice += 1
      slice_tests = []
      time_slice = 0

  print('Total max runtime on "%s" for %d passing tests is %.1f seconds.' %
          (family, num_pass_total, time_total), file=sys.stderr)


def insert_expectation(expectations, test, expectation):
  """
  Insert test with expectation into expectations directory.
  """
  if not test in expectations:
    # Just copy the whole expectation.
    expectations[test] = expectation
  else:
    # Copy over known fields one at a time but don't overwrite existing.
    expectations[test]['result'] = expectation['result']
    if (not 'crbug' in expectations[test] and 'crbug' in expectation):
      expectations[test]['crbug'] = expectation['crbug']
    if (not 'comment' in expectations[test] and 'comment' in expectation):
      expectations[test]['comment'] = expectation['comment']
    if (not 'command' in expectations[test] and 'command' in expectation):
      expectations[test]['command'] = expectation['command']
    if (not 'pass rate' in expectations[test] and 'pass rate' in expectation):
      expectations[test]['pass rate'] = expectation['pass rate']


def compute_expectations(exp_dict, statistics, family, piglit_path):
  """
  Analyze intermittency and output suggested test expectations.
  The suggested test expectation
  Test expectations are dictionaries with roughly the same structure as logs.
  """
  flaky_tests = get_intermittent_tests(statistics)
  print('Encountered %d tests that do not always pass in "%s" logs.' %
        (len(flaky_tests), family), file=sys.stderr)

  max_passing = get_max_passing(statistics)
  expectations = {}
  # Merge exp_dict which we loaded from disk into new expectations.
  for filename in exp_dict:
    for test in exp_dict[filename]['tests']:
      expectation = exp_dict[filename]['tests'][test]
      # Historic results not considered flaky as pass rate makes no sense
      # without current logs.
      expectation['result'] = 'skip'
      if 'pass rate' in expectation:
        expectation.pop('pass rate')
      # Overwrite historic commands with recently observed ones.
      if test in statistics:
        expectation['command'] = cleanup_command(statistics[test].command,
                                                 piglit_path)
        insert_expectation(expectations, test, expectation)
      else:
        print ('Historic test [%s] not found in new logs. '
               'Dropping it from expectations.' % test, file=sys.stderr)

  # Handle the computed flakiness from the result logs that we just processed.
  for test in flaky_tests:
    pass_rate = statistics[test].passing_count / float(max_passing)
    command = statistics[test].command
    # Loading a json converts everything to string anyways, so save it as such
    # and make it only 2 significiant digits.
    expectation = {'result': 'flaky',
                   'pass rate': '%.2f' % pass_rate,
                   'command': command}
    insert_expectation(expectations, test, expectation)

  return expectations


def output_suggested_expectations(expectations, family, family_root):
  filename = os.path.join(family_root,
                          'suggested_exp_to_rename_%s.json' % family)
  with open(filename, 'w+') as f:
    json.dump({'tests': expectations}, f, indent=2, sort_keys=True,
              separators=(',', ': '))


def get_gpu_families(root):
  """
  We consider each directory under root a possible gpu family.
  """
  files = os.listdir(root)
  families = []
  for f in files:
    if os.path.isdir(os.path.join(root, f)):
      families.append(f)
  return families


def generate_scripts(root):
  """
  For each family under root create the corresponding set of passing test
  scripts.
  """
  families = get_gpu_families(root)
  for family in families:
    process_gpu_family(family, os.path.join(root, family))


# We check the log files in as highly compressed binaries.
print('Uncompressing log files...', file=sys.stderr)
os.system('bunzip2 ' + INPUT_DIR + '/*/*/*results.json.bz2')

# Generate the scripts.
generate_scripts(INPUT_DIR)

# Binary should remain the same, otherwise use
#   git checkout -- piglit_output
# or similar to reverse.
print('Recompressing log files...', file=sys.stderr)
os.system('bzip2 -9 ' + INPUT_DIR + '/*/*/*results.json')