Home | History | Annotate | Download | only in eas
      1 # SPDX-License-Identifier: Apache-2.0
      2 #
      3 # Copyright (C) 2016, ARM Limited and contributors.
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License"); you may
      6 # not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 # http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     13 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 #
     17 
     18 from math import isnan
     19 
     20 import numpy as np
     21 import pandas as pd
     22 
     23 from bart.common.Utils import area_under_curve
     24 
     25 from energy_model import EnergyModel, EnergyModelCapacityError
     26 from perf_analysis import PerfAnalysis
     27 from test import LisaTest, experiment_test
     28 from trace import Trace
     29 from unittest import SkipTest
     30 
     31 
     32 WORKLOAD_PERIOD_MS =  10
     33 SET_IS_BIG_LITTLE = True
     34 SET_INITIAL_TASK_UTIL = True
     35 
     36 class _EnergyModelTest(LisaTest):
     37     """
     38     "Abstract" base class for generic EAS tests using the EnergyModel class
     39 
     40     Subclasses should provide a .workloads member to populate the 'wloads' field
     41     of the experiments_conf for the Executor. A set of helper methods are
     42     provided for making assertions about behaviour, most importantly the _test*
     43     methods which make assertions in a generic way.
     44     """
     45 
     46     test_conf = {
     47         "ftrace" : {
     48             "events" : [
     49                 "sched_overutilized",
     50                 "sched_energy_diff",
     51                 "sched_load_avg_task",
     52                 "sched_load_avg_cpu",
     53                 "sched_migrate_task",
     54                 "sched_switch",
     55                 "cpu_frequency",
     56                 "cpu_idle",
     57                 "cpu_capacity",
     58             ],
     59         },
     60         "modules": ["cgroups"],
     61     }
     62 
     63     negative_slack_allowed_pct = 15
     64     """Percentage of RT-App task activations with negative slack allowed"""
     65 
     66     energy_est_threshold_pct = 20
     67     """
     68     Allowed margin for error in estimated energy cost for task placement,
     69     compared to optimal placment.
     70     """
     71 
     72     @classmethod
     73     def setUpClass(cls, *args, **kwargs):
     74         super(_EnergyModelTest, cls).runExperiments(*args, **kwargs)
     75 
     76     @classmethod
     77     def _getExperimentsConf(cls, test_env):
     78         if not test_env.nrg_model:
     79             try:
     80                 test_env.nrg_model = EnergyModel.from_target(test_env.target)
     81             except Exception as e:
     82                 raise SkipTest(
     83                     'This test requires an EnergyModel for the platform. '
     84                     'Either provide one manually or ensure it can be read '
     85                     'from the filesystem: {}'.format(e))
     86 
     87         conf = {
     88             'tag' : 'energy_aware',
     89             'flags' : ['ftrace', 'freeze_userspace'],
     90             'sched_features' : 'ENERGY_AWARE',
     91         }
     92 
     93         if 'cpufreq' in test_env.target.modules:
     94             available_govs = test_env.target.cpufreq.list_governors(0)
     95             if 'schedutil' in available_govs:
     96                 conf['cpufreq'] = {'governor' : 'schedutil'}
     97             elif 'sched' in available_govs:
     98                 conf['cpufreq'] = {'governor' : 'sched'}
     99 
    100         return {
    101             'wloads' : cls.workloads,
    102             'confs' : [conf],
    103         }
    104 
    105     @classmethod
    106     def _experimentsInit(cls, *args, **kwargs):
    107         super(_EnergyModelTest, cls)._experimentsInit(*args, **kwargs)
    108 
    109         if SET_IS_BIG_LITTLE:
    110             # This flag doesn't exist on mainline-integration kernels, so
    111             # don't worry if the file isn't present (hence verify=False)
    112             cls.target.write_value(
    113                 "/proc/sys/kernel/sched_is_big_little", 1, verify=False)
    114 
    115         if SET_INITIAL_TASK_UTIL:
    116             # This flag doesn't exist on all kernels, so don't worry if the file
    117             # isn't present (hence verify=False)
    118             cls.target.write_value(
    119                 "/proc/sys/kernel/sched_initial_task_util", 1024, verify=False)
    120 
    121 
    122     def get_task_utils_df(self, experiment):
    123         """
    124         Get a DataFrame with the *expected* utilization of each task over time
    125 
    126         :param experiment: The :class:Experiment to examine
    127         :returns: A Pandas DataFrame with a column for each task, showing how
    128                   the utilization of that task varies over time
    129         """
    130         util_scale = self.te.nrg_model.capacity_scale
    131 
    132         transitions = {}
    133         def add_transition(time, task, util):
    134             if time not in transitions:
    135                 transitions[time] = {task: util}
    136             else:
    137                 transitions[time][task] = util
    138 
    139         # First we'll build a dict D {time: {task_name: util}} where D[t][n] is
    140         # the expected utilization of task n from time t.
    141         for task, params in experiment.wload.params['profile'].iteritems():
    142             time = self.get_start_time(experiment) + params['delay']
    143             add_transition(time, task, 0)
    144             for _ in range(params.get('loops', 1)):
    145                 for phase in params['phases']:
    146                     util = (phase.duty_cycle_pct * util_scale / 100.)
    147                     add_transition(time, task, util)
    148                     time += phase.duration_s
    149             add_transition(time, task, 0)
    150 
    151         index = sorted(transitions.keys())
    152         df = pd.DataFrame([transitions[k] for k in index], index=index)
    153         return df.fillna(method='ffill')
    154 
    155     def get_task_cpu_df(self, experiment):
    156         """
    157         Get a DataFrame mapping task names to the CPU they ran on
    158 
    159         Use the sched_switch trace event to find which CPU each task ran
    160         on. Does not reflect idleness - tasks not running are shown as running
    161         on the last CPU they woke on.
    162 
    163         :param experiment: The :class:Experiment to examine
    164         :returns: A Pandas DataFrame with a column for each task, showing the
    165                   CPU that the task was "on" at each moment in time
    166         """
    167         tasks = experiment.wload.tasks.keys()
    168         trace = self.get_trace(experiment)
    169 
    170         df = trace.ftrace.sched_switch.data_frame[['next_comm', '__cpu']]
    171         df = df[df['next_comm'].isin(tasks)]
    172         df = df.pivot(index=df.index, columns='next_comm').fillna(method='ffill')
    173         cpu_df = df['__cpu']
    174         # Drop consecutive duplicates
    175         cpu_df = cpu_df[(cpu_df.shift(+1) != cpu_df).any(axis=1)]
    176         return cpu_df
    177 
    178     def _sort_power_df_columns(self, df):
    179         """
    180         Helper method to re-order the columns of a power DataFrame
    181 
    182         This has no significance for code, but when examining DataFrames by hand
    183         they are easier to understand if the columns are in a logical order.
    184         """
    185         node_cpus = [node.cpus for node in self.te.nrg_model.root.iter_nodes()]
    186         return pd.DataFrame(df, columns=[c for c in node_cpus if c in df])
    187 
    188     def get_power_df(self, experiment):
    189         """
    190         Considering only the task placement, estimate power usage over time
    191 
    192         Examine a trace and use :meth:EnergyModel.estimate_from_cpu_util to get
    193         a DataFrame showing the estimated power usage over time. This assumes
    194         perfect cpuidle and cpufreq behaviour.
    195 
    196         :param experiment: The :class:Experiment to examine
    197         :returns: A Pandas DataFrame with a column node in the energy model
    198                   (keyed with a tuple of the CPUs contained by that node) Shows
    199                   the estimated power over time.
    200         """
    201         task_cpu_df = self.get_task_cpu_df(experiment)
    202         task_utils_df = self.get_task_utils_df(experiment)
    203 
    204         tasks = experiment.wload.tasks.keys()
    205 
    206         # Create a combined DataFrame with the utilization of a task and the CPU
    207         # it was running on at each moment. Looks like:
    208         #                       utils                  cpus
    209         #          task_wmig0 task_wmig1 task_wmig0 task_wmig1
    210         # 2.375056      102.4      102.4        NaN        NaN
    211         # 2.375105      102.4      102.4        2.0        NaN
    212 
    213         df = pd.concat([task_utils_df, task_cpu_df],
    214                        axis=1, keys=['utils', 'cpus'])
    215         df = df.sort_index().fillna(method='ffill')
    216         nrg_model = self.executor.te.nrg_model
    217 
    218         # Now make a DataFrame with the estimated power at each moment.
    219         def est_power(row):
    220             cpu_utils = [0 for cpu in nrg_model.cpus]
    221             for task in tasks:
    222                 cpu = row['cpus'][task]
    223                 util = row['utils'][task]
    224                 if not isnan(cpu):
    225                     cpu_utils[int(cpu)] += util
    226             power = nrg_model.estimate_from_cpu_util(cpu_utils)
    227             columns = power.keys()
    228             return pd.Series([power[c] for c in columns], index=columns)
    229         return self._sort_power_df_columns(df.apply(est_power, axis=1))
    230 
    231     def get_expected_power_df(self, experiment):
    232         """
    233         Estimate *optimal* power usage over time
    234 
    235         Examine a trace and use :meth:get_optimal_placements and
    236         :meth:EnergyModel.estimate_from_cpu_util to get a DataFrame showing the
    237         estimated power usage over time under ideal EAS behaviour.
    238 
    239         :param experiment: The :class:Experiment to examine
    240         :returns: A Pandas DataFrame with a column each node in the energy model
    241                   (keyed with a tuple of the CPUs contained by that node) and a
    242                   "power" column with the sum of other columns. Shows the
    243                   estimated *optimal* power over time.
    244         """
    245         task_utils_df = self.get_task_utils_df(experiment)
    246 
    247         nrg_model = self.te.nrg_model
    248 
    249         def exp_power(row):
    250             task_utils = row.to_dict()
    251             expected_utils = nrg_model.get_optimal_placements(task_utils)
    252             power = nrg_model.estimate_from_cpu_util(expected_utils[0])
    253             columns = power.keys()
    254             return pd.Series([power[c] for c in columns], index=columns)
    255         return self._sort_power_df_columns(
    256             task_utils_df.apply(exp_power, axis=1))
    257 
    258     def _test_slack(self, experiment, tasks):
    259         """
    260         Assert that the RTApp workload was given enough performance
    261 
    262         Use :class:PerfAnalysis to find instances where the experiment's RT-App
    263         workload wasn't able to complete its activations (i.e. its reported
    264         "slack" was negative). Assert that this happened less that
    265         ``negative_slack_allowed_pct`` percent of the time.
    266 
    267         :meth:_test_task_placement asserts that estimated energy usage was
    268         low. That will pass for runs where too *little* energy was used,
    269         compromising performance. This method provides a separate test to
    270         counteract that problem.
    271         """
    272 
    273         pa = PerfAnalysis(experiment.out_dir)
    274         for task in tasks:
    275             slack = pa.df(task)["Slack"]
    276 
    277             bad_activations_pct = len(slack[slack < 0]) * 100. / len(slack)
    278             if bad_activations_pct > self.negative_slack_allowed_pct:
    279                 raise AssertionError("task {} missed {}% of activations".format(
    280                     task, bad_activations_pct))
    281 
    282     def _test_task_placement(self, experiment, tasks):
    283         """
    284         Test that task placement was energy-efficient
    285 
    286         Use :meth:get_expected_power_df and :meth:get_power_df to estimate
    287         optimal and observed power usage for task placements of the experiment's
    288         workload. Assert that the observed power does not exceed the optimal
    289         power by more than 20%.
    290         """
    291         exp_power = self.get_expected_power_df(experiment)
    292         est_power = self.get_power_df(experiment)
    293 
    294         exp_energy = area_under_curve(exp_power.sum(axis=1), method='rect')
    295         est_energy = area_under_curve(est_power.sum(axis=1), method='rect')
    296 
    297         msg = 'Estimated {} bogo-Joules to run workload, expected {}'.format(
    298             est_energy, exp_energy)
    299         threshold = exp_energy * (1 + (self.energy_est_threshold_pct / 100.))
    300         self.assertLess(est_energy, threshold, msg=msg)
    301 
    302 class OneSmallTask(_EnergyModelTest):
    303     """
    304     Test EAS for a single 20% task over 2 seconds
    305     """
    306     workloads = {
    307         'one_small' : {
    308             'type' : 'rt-app',
    309             'conf' : {
    310                 'class' : 'periodic',
    311                 'params' : {
    312                     'duty_cycle_pct': 20,
    313                     'duration_s': 2,
    314                     'period_ms': 10,
    315                 },
    316                 'tasks' : 1,
    317                 'prefix' : 'many',
    318             },
    319         },
    320     }
    321     @experiment_test
    322     def test_slack(self, experiment, tasks):
    323         self._test_slack(experiment, tasks)
    324     @experiment_test
    325     def test_task_placement(self, experiment, tasks):
    326         self._test_task_placement(experiment, tasks)
    327 
    328 class ThreeSmallTasks(_EnergyModelTest):
    329     """
    330     Test EAS for 3 20% tasks over 2 seconds
    331     """
    332     workloads = {
    333         'three_small' : {
    334             'type' : 'rt-app',
    335             'conf' : {
    336                 'class' : 'periodic',
    337                 'params' : {
    338                     'duty_cycle_pct': 20,
    339                     'duration_s': 2,
    340                     'period_ms': 10,
    341                 },
    342                 'tasks' : 3,
    343                 'prefix' : 'many',
    344             },
    345         },
    346     }
    347     @experiment_test
    348     def test_slack(self, experiment, tasks):
    349         self._test_slack(experiment, tasks)
    350     @experiment_test
    351     def test_task_placement(self, experiment, tasks):
    352         self._test_task_placement(experiment, tasks)
    353 
    354 class TwoBigTasks(_EnergyModelTest):
    355     """
    356     Test EAS for 2 80% tasks over 2 seconds
    357     """
    358     workloads = {
    359         'two_big' : {
    360             'type' : 'rt-app',
    361             'conf' : {
    362                 'class' : 'periodic',
    363                 'params' : {
    364                     'duty_cycle_pct': 80,
    365                     'duration_s': 2,
    366                     'period_ms': 10,
    367                 },
    368                 'tasks' : 2,
    369                 'prefix' : 'many',
    370             },
    371         },
    372     }
    373     @experiment_test
    374     def test_slack(self, experiment, tasks):
    375         self._test_slack(experiment, tasks)
    376     @experiment_test
    377     def test_task_placement(self, experiment, tasks):
    378         self._test_task_placement(experiment, tasks)
    379 
    380 class TwoBigThreeSmall(_EnergyModelTest):
    381     """
    382     Test EAS for 2 70% tasks and 3 10% tasks over 2 seconds
    383     """
    384     workloads = {
    385         'two_big_three_small' : {
    386             'type' : 'rt-app',
    387             'conf' : {
    388                 'class' : 'profile',
    389                 'params' : {
    390                     'large' : {
    391                         'kind' : 'Periodic',
    392                         'params' : {
    393                             'duty_cycle_pct': 70,
    394                             'duration_s': 2,
    395                             'period_ms': WORKLOAD_PERIOD_MS,
    396                         },
    397                         'tasks' : 2,
    398                     },
    399                     'small' : {
    400                         'kind' : 'Periodic',
    401                         'params' : {
    402                             'duty_cycle_pct': 10,
    403                             'duration_s': 2,
    404                             'period_ms': WORKLOAD_PERIOD_MS,
    405                         },
    406                         'tasks' : 3,
    407                     },
    408                 },
    409             },
    410         },
    411     }
    412     @experiment_test
    413     def test_slack(self, experiment, tasks):
    414         self._test_slack(experiment, tasks)
    415     @experiment_test
    416     def test_task_placement(self, experiment, tasks):
    417         self._test_task_placement(experiment, tasks)
    418 
    419 class RampUp(_EnergyModelTest):
    420     """
    421     Test EAS for a task ramping from 5% up to 70% over 2 seconds
    422     """
    423     workloads = {
    424         "ramp_up" : {
    425             "type": "rt-app",
    426             "conf" : {
    427                 "class"  : "profile",
    428                 "params"  : {
    429                     "r5_10-60" : {
    430                         "kind"   : "Ramp",
    431                         "params" : {
    432                             "period_ms" : 16,
    433                             "start_pct" :  5,
    434                             "end_pct"   : 70,
    435                             "delta_pct" :  5,
    436                             "time_s"    :  2,
    437                          },
    438                     },
    439                 },
    440             },
    441         },
    442     }
    443 
    444     @experiment_test
    445     def test_slack(self, experiment, tasks):
    446         self._test_slack(experiment, tasks)
    447     @experiment_test
    448     def test_task_placement(self, experiment, tasks):
    449         self._test_task_placement(experiment, tasks)
    450 
    451 class RampDown(_EnergyModelTest):
    452     """
    453     Test EAS for a task ramping from 70% down to 5% over 2 seconds
    454     """
    455     workloads = {
    456         "ramp_down" : {
    457             "type": "rt-app",
    458             "conf" : {
    459                 "class"  : "profile",
    460                 "params"  : {
    461                     "r5_10-60" : {
    462                         "kind"   : "Ramp",
    463                         "params" : {
    464                             "period_ms" : 16,
    465                             "start_pct" : 70,
    466                             "end_pct"   :  5,
    467                             "delta_pct" :  5,
    468                             "time_s"    :  2,
    469                          },
    470                     },
    471                 },
    472             },
    473         },
    474     }
    475 
    476     @experiment_test
    477     def test_slack(self, experiment, tasks):
    478         self._test_slack(experiment, tasks)
    479     @experiment_test
    480     def test_task_placement(self, experiment, tasks):
    481         self._test_task_placement(experiment, tasks)
    482 
    483 class EnergyModelWakeMigration(_EnergyModelTest):
    484     """
    485     Test EAS for tasks alternating beetween 10% and 50%
    486     """
    487     workloads = {
    488         'em_wake_migration' : {
    489             'type' : 'rt-app',
    490             'conf' : {
    491                 'class' : 'profile',
    492                 'params' : {
    493                     'wmig' : {
    494                         'kind' : 'Step',
    495                         'params' : {
    496                             'start_pct': 10,
    497                             'end_pct': 50,
    498                             'time_s': 2,
    499                             'loops': 2
    500                         },
    501                         # Create one task for each big cpu
    502                         'tasks' : 'big',
    503                     },
    504                 },
    505             },
    506         },
    507     }
    508     @experiment_test
    509     def test_slack(self, experiment, tasks):
    510         self._test_slack(experiment, tasks)
    511     @experiment_test
    512     def test_task_placement(self, experiment, tasks):
    513         self._test_task_placement(experiment, tasks)
    514