Home | History | Annotate | Download | only in stats
      1 #    Copyright 2015-2017 ARM Limited
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 #
     15 
     16 """Grammar module allows the user to easily define relations
     17 between data events and perform basic logical and arithmetic
     18 operations on the data. The parser also handles super-indexing
     19 and variable forwarding.
     20 """
     21 from pyparsing import Literal, delimitedList, Optional, oneOf, nums,\
     22     alphas, alphanums, Forward, Word, opAssoc, operatorPrecedence, Combine, Group
     23 import importlib
     24 import pandas as pd
     25 import types
     26 import numpy as np
     27 from trappy.stats.Topology import Topology
     28 from trappy.stats import StatConf
     29 from trappy.utils import handle_duplicate_index, listify
     30 
     31 
     32 def parse_num(tokens):
     33     """Parser function for numerical data
     34 
     35     :param tokens: The grammar tokens
     36     :type tokens: list
     37     """
     38     return float(tokens[0])
     39 
     40 # Suppressed Literals
     41 LPAREN = Literal("(").suppress()
     42 RPAREN = Literal(")").suppress()
     43 COLON = Literal(":").suppress()
     44 EXP_START = Literal("[").suppress()
     45 EXP_END = Literal("]").suppress()
     46 
     47 # Grammar Tokens
     48 
     49 # DataFrame Accessor
     50 INTEGER = Combine(Optional(oneOf("+ -")) + Word(nums))\
     51     .setParseAction(parse_num)
     52 REAL = Combine(Optional(oneOf("+ -")) + Word(nums) + "." +
     53                Optional(Word(nums)) +
     54                Optional(oneOf("e E") + Optional(oneOf("+ -")) + Word(nums)))\
     55     .setParseAction(parse_num)
     56 
     57 # Generic Identifier
     58 IDENTIFIER = Word(alphas + '_', alphanums + '_')
     59 # Python Like Function Name
     60 FUNC_NAME = delimitedList(IDENTIFIER, delim=".", combine=True)
     61 # Exponentiation operators
     62 EXPONENTIATION_OPS = "**"
     63 # Unary Operators
     64 UNARY_OPS = oneOf("+ -")
     65 # Multiplication/Division Operators
     66 MULT_OPS = oneOf("* / // %")
     67 # Addition/Subtraction Operators
     68 SUM_OPS = oneOf("+ -")
     69 # Relational Operators
     70 REL_OPS = oneOf("> < >= <= == !=")
     71 # Logical Operators
     72 LOGICAL_OPS = oneOf("&& || & |")
     73 
     74 # Operator to function mapping
     75 OPERATOR_MAP = {
     76     "+": lambda a, b: a + b,
     77     "-": lambda a, b: a - b,
     78     "*": lambda a, b: a * b,
     79     "/": lambda a, b: a / b,
     80     "//": lambda a, b: a // b,
     81     "%": lambda a, b: a % b,
     82     "**": lambda a, b: a ** b,
     83     ">": lambda a, b: a > b,
     84     "<": lambda a, b: a < b,
     85     ">=": lambda a, b: a >= b,
     86     "<=": lambda a, b: a <= b,
     87     "||": lambda a, b: a or b,
     88     "&&": lambda a, b: a and b,
     89     "|": lambda a, b: a | b,
     90     "==": lambda a, b: a == b,
     91     "!=": lambda a, b: a != b,
     92     "&": lambda a, b: a & b
     93 }
     94 
     95 
     96 def eval_unary_op(tokens):
     97     """Unary Op Evaluation
     98 
     99     :param tokens: The grammar tokens
    100     :type tokens: list
    101     """
    102 
    103     params = tokens[0]
    104     if params[0] == "-":
    105         return -1 * params[1]
    106     else:
    107         return params[1]
    108 
    109 
    110 def iterate_binary_ops(tokens):
    111     """An iterator for Binary Operation tokens
    112 
    113     :param tokens: The grammar tokens
    114     :type tokens: list
    115     """
    116 
    117     itr = iter(tokens)
    118     while True:
    119         try:
    120             yield(itr.next(), itr.next())
    121         except StopIteration:
    122             break
    123 
    124 
    125 def eval_binary_op(tokens):
    126     """Evaluate Binary operators
    127 
    128     :param tokens: The grammar tokens
    129     :type tokens: list
    130     """
    131 
    132     params = tokens[0]
    133     result = params[0]
    134 
    135     for opr, val in iterate_binary_ops(params[1:]):
    136         result = OPERATOR_MAP[opr](result, val)
    137 
    138     return result
    139 
    140 
    141 def str_to_attr(cls_str):
    142     """Bring the attr specified into current scope
    143        and return a handler
    144 
    145     :param cls_str: A string representing the class
    146     :type cls_str: str
    147 
    148     :return: A class object
    149     """
    150     attr_name = cls_str.rsplit(".", 1)
    151     if len(attr_name) == 2:
    152         module_name, attr_name = attr_name
    153         mod = importlib.import_module(module_name)
    154         return getattr(mod, attr_name)
    155     else:
    156         attr_name = attr_name[0]
    157         return globals()[attr_name]
    158 
    159 
    160 def get_parse_expression(parse_func, parse_var_id):
    161     """return a parse expression with for the
    162     input parseActions
    163     """
    164 
    165     var_id = Group(
    166         FUNC_NAME + COLON + IDENTIFIER) | REAL | INTEGER | IDENTIFIER
    167     var_id.setParseAction(parse_var_id)
    168 
    169     # Forward declaration for an Arithmetic Expression
    170     arith_expr = Forward()
    171     func_call = Group(
    172         FUNC_NAME +
    173         LPAREN +
    174         Optional(
    175             Group(
    176                 delimitedList(arith_expr))) +
    177         RPAREN)
    178     # An Arithmetic expression can have a var_id or
    179     # a function call as an operand
    180     # pylint: disable=expression-not-assigned
    181     arith_expr << operatorPrecedence(func_call | var_id,
    182                                      [
    183                                          (EXPONENTIATION_OPS, 2, opAssoc.LEFT,
    184                                           eval_binary_op),
    185                                          (UNARY_OPS, 1,
    186                                           opAssoc.RIGHT, eval_unary_op),
    187                                          (MULT_OPS, 2, opAssoc.LEFT,
    188                                           eval_binary_op),
    189                                          (SUM_OPS, 2, opAssoc.LEFT,
    190                                           eval_binary_op),
    191                                          (REL_OPS, 2, opAssoc.LEFT,
    192                                           eval_binary_op),
    193                                          (LOGICAL_OPS, 2,
    194                                           opAssoc.LEFT, eval_binary_op)
    195                                      ])
    196 
    197     # pylint: enable=expression-not-assigned
    198     # Argument expression for a function call
    199     # An argument to a function can be an
    200     # IDENTIFIER, Arithmetic expression, REAL number, INTEGER or a
    201     # Function call itself
    202     func_call.setParseAction(parse_func)
    203     return arith_expr
    204 
    205 
    206 class Parser(object):
    207 
    208     """A parser class for solving simple
    209     data accesses and super-indexing data
    210 
    211     :param data: Trace Object
    212     :type data: instance of :mod:`trappy.ftrace.BareTrace` or a child
    213         class (like :mod:`trappy.ftrace.FTrace`)
    214 
    215     :param pvars: A dictionary of variables that need to be
    216         accessed from within the grammar
    217     :type pvars: dict
    218 
    219     :param method: The method to be used for reindexing data
    220         This can be one of the standas :mod:`pandas.DataFrame`
    221         methods (eg. pad, bfill, nearest). The default is pad
    222         or use the last valid observation.
    223     :type method: str
    224 
    225     :param limit: The number of indices a value will be propagated
    226         when reindexing. The default is None
    227     :type limit: int
    228 
    229     :param fill: Whether to fill the NaNs in the data.
    230         The default value is True.
    231     :type fill: bool
    232 
    233     :param window: A window of time in which to apply the data
    234         accesses.  By default the data accesses happen accross the
    235         whole trace.  With the window parameter you can limit it to a
    236         window of time inside the trace.  The first element of the
    237         tuple is the starting time and the second the ending time (set
    238         to None for end of trace).
    239 
    240     :type window: tuple
    241 
    242     :param filters: Restrict the parsing to the rows that match the
    243         specified criteria. For Example:
    244         ::
    245 
    246             filters =
    247                     {
    248                         "pid": 3338,
    249                         "cpu": [0, 2, 4],
    250                     }
    251 
    252         will only consider rows whose pid column is 3338 and cpu is
    253         either 0, 2 or 4.
    254     :type filters: dict
    255 
    256     - **Operators**
    257 
    258         +----------------+----------------------+---------------+
    259         | Operation      |      operator        | Associativity |
    260         +================+======================+===============+
    261         | Exponentiation | \*\*                 |    Left       |
    262         +----------------+----------------------+---------------+
    263         |Unary           | \-                   |    Right      |
    264         +----------------+----------------------+---------------+
    265         | Multiply/Divide| \*, /, //, %         |    Left       |
    266         +----------------+----------------------+---------------+
    267         | Add/Subtract   | +, \-,               |    Left       |
    268         +----------------+----------------------+---------------+
    269         | Comparison     | >, <, >=, <=, ==, != |    Left       |
    270         +----------------+----------------------+---------------+
    271         | Logical        | &&, ||, \|, &        |    Left       |
    272         +----------------+----------------------+---------------+
    273 
    274     - **Data Accessors**
    275 
    276         Since the goal of the grammar is to provide an
    277         easy language to access and compare data
    278         from a :mod:`trappy.trace.FTrace` object. The parser provides
    279         a simple notation to access this data.
    280 
    281         *Statically Defined Events*
    282         ::
    283 
    284             import trappy
    285             from trappy.stats.grammar import Parser
    286 
    287             trace = trappy.FTrace("path/to/trace/file")
    288             parser = Parser(trace)
    289             parser.solve("trappy.thermal.Thermal:temp * 2")
    290 
    291         *Aliasing*
    292         ::
    293 
    294             import trappy
    295             from trappy.stats.grammar import Parser
    296 
    297             pvars = {"THERMAL": trappy.thermal.Thermal}
    298             trace = trappy.FTrace("path/to/trace/file")
    299             parser = Parser(trace, pvars=pvars)
    300             parser.solve("THERMAL:temp * 2")
    301 
    302         *Using Event Name*
    303         ::
    304 
    305             import trappy
    306             from trappy.stats.grammar import Parser
    307             trace = trappy.FTrace("path/to/trace/file")
    308             parser = Parser(trace)
    309             parser.solve("thermal:temp * 2")
    310 
    311         The event :mod:`trappy.thermal.Thermal` is aliased
    312         as **thermal** in the grammar
    313 
    314         *Dynamic Events*
    315         ::
    316 
    317             import trappy
    318             from trappy.stats.grammar import Parser
    319 
    320             # Register Dynamic Event
    321             cls = trappy.register_dynamic_ftrace("my_unique_word", "event_name")
    322 
    323             pvars = {"CUSTOM": cls}
    324             trace = trappy.FTrace("path/to/trace/file")
    325             parser = Parser(trace, pvars=pvars)
    326             parser.solve("CUSTOM:col * 2")
    327 
    328         .. seealso:: :mod:`trappy.dynamic.register_dynamic_ftrace`
    329 
    330     """
    331 
    332     def __init__(self, data, pvars=None, window=(0, None), filters=None, **kwargs):
    333         if pvars is None:
    334             pvars = {}
    335 
    336         self.data = data
    337         self._pvars = pvars
    338         self._accessor = Group(
    339             FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._pre_process)
    340         self._inspect = Group(
    341             FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._parse_for_info)
    342         self._parse_expr = get_parse_expression(
    343             self._parse_func, self._parse_var_id)
    344         self._agg_df = pd.DataFrame()
    345         self._pivot_set = set()
    346         self._limit = kwargs.get("limit", StatConf.REINDEX_LIMIT_DEFAULT)
    347         self._method = kwargs.get("method", StatConf.REINDEX_METHOD_DEFAULT)
    348         self._fill = kwargs.get("fill", StatConf.NAN_FILL_DEFAULT)
    349         self._window = window
    350         self._filters = filters
    351 
    352     def solve(self, expr):
    353         """Parses and solves the input expression
    354 
    355         :param expr: The input expression
    356         :type expr: str
    357 
    358         :return: The return type may vary depending on
    359             the expression. For example:
    360 
    361             **Vector**
    362             ::
    363 
    364                 import trappy
    365                 from trappy.stats.grammar import Parser
    366 
    367                 trace = trappy.FTrace("path/to/trace/file")
    368                 parser = Parser(trace)
    369                 parser.solve("trappy.thermal.Thermal:temp * 2")
    370 
    371             **Scalar**
    372             ::
    373 
    374                 import trappy
    375                 from trappy.stats.grammar import Parser
    376 
    377                 trace = trappy.FTrace("path/to/trace/file")
    378                 parser = Parser(trace)
    379                 parser.solve("numpy.mean(trappy.thermal.Thermal:temp)")
    380 
    381             **Vector Mask**
    382             ::
    383 
    384                 import trappy
    385                 from trappy.stats.grammar import Parser
    386 
    387                 trace = trappy.FTrace("path/to/trace/file")
    388                 parser = Parser(trace)
    389                 parser.solve("trappy.thermal.Thermal:temp > 65000")
    390         """
    391 
    392         # Pre-process accessors for indexing
    393         self._accessor.searchString(expr)
    394         return self._parse_expr.parseString(expr)[0]
    395 
    396 
    397         """
    398 
    399         # Pre-process accessors for indexing
    400         self._accessor.searchString(expr)
    401         return self._parse_expr.parseString(expr)[0]
    402 
    403 
    404         """
    405 
    406         # Pre-process accessors for indexing
    407         self._accessor.searchString(expr)
    408         return self._parse_expr.parseString(expr)[0]
    409 
    410     def _pivot(self, cls, column):
    411         """Pivot Data for concatenation"""
    412 
    413         data_frame = self._get_data_frame(cls)
    414         if data_frame.empty:
    415             raise ValueError("No events found for {}".format(cls.name))
    416 
    417         data_frame = handle_duplicate_index(data_frame)
    418         new_index = self._agg_df.index.union(data_frame.index)
    419 
    420         if hasattr(cls, "pivot") and cls.pivot:
    421             pivot = cls.pivot
    422             pivot_vals = list(np.unique(data_frame[pivot].values))
    423             data = {}
    424 
    425 
    426             for val in pivot_vals:
    427                 data[val] = data_frame[data_frame[pivot] == val][[column]]
    428                 if len(self._agg_df):
    429                     data[val] = data[val].reindex(
    430                         index=new_index,
    431                         method=self._method,
    432                         limit=self._limit)
    433 
    434             return pd.concat(data, axis=1).swaplevel(0, 1, axis=1)
    435 
    436         if len(self._agg_df):
    437             data_frame = data_frame.reindex(
    438                 index=new_index,
    439                 method=self._method,
    440                 limit=self._limit)
    441 
    442         return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[
    443                          [column]]}, axis=1).swaplevel(0, 1, axis=1)
    444 
    445     def _pre_process(self, tokens):
    446         """Pre-process accessors for super-indexing"""
    447 
    448         params = tokens[0]
    449         if params[1] in self._agg_df.columns:
    450             return self._agg_df[params[1]]
    451 
    452         event = params[0]
    453         column = params[1]
    454 
    455         if event in self._pvars:
    456             cls = self._pvars[event]
    457         elif event in self.data.class_definitions:
    458             cls = self.data.class_definitions[event]
    459         else:
    460             try:
    461                 cls = str_to_attr(event)
    462             except KeyError:
    463                 raise ValueError(
    464                     "Can't find parser class for event {}".format(event))
    465 
    466         data_frame = self._pivot(cls, column)
    467         self._agg_df = pd.concat(
    468             [self._agg_df, data_frame], axis=1)
    469 
    470         if self._fill:
    471             self._agg_df = self._agg_df.fillna(method="pad")
    472 
    473         return self._agg_df[params[1]]
    474 
    475     def _parse_for_info(self, tokens):
    476         """Parse Action for inspecting data accessors"""
    477 
    478         params = tokens[0]
    479         cls = params[0]
    480         column = params[1]
    481         info = {}
    482         info["pivot"] = None
    483         info["pivot_values"] = None
    484 
    485         if cls in self._pvars:
    486             cls = self._pvars[cls]
    487         elif cls in self.data.class_definitions:
    488             cls = self.data.class_definitions[cls]
    489         else:
    490             cls = str_to_attr(cls)
    491 
    492         data_frame = self._get_data_frame(cls)
    493 
    494         info["class"] = cls
    495         info["length"] = len(data_frame)
    496         if hasattr(cls, "pivot") and cls.pivot:
    497             info["pivot"] = cls.pivot
    498             info["pivot_values"] = list(np.unique(data_frame[cls.pivot]))
    499         info["column"] = column
    500         info["column_present"] = column in data_frame.columns
    501         return info
    502 
    503     def _parse_var_id(self, tokens):
    504         """A function to parse a variable identifier
    505         """
    506 
    507         params = tokens[0]
    508         try:
    509             return float(params)
    510         except (ValueError, TypeError):
    511             try:
    512                 return self._pvars[params]
    513             except KeyError:
    514                 return self._agg_df[params[1]]
    515 
    516     def _parse_func(self, tokens):
    517         """A function to parse a function string"""
    518 
    519         params = tokens[0]
    520         func_name = params[0]
    521         if func_name in self._pvars and isinstance(
    522                 self._pvars[func_name],
    523                 types.FunctionType):
    524             func = self._pvars[func_name]
    525         else:
    526             func = str_to_attr(params[0])
    527         return func(*params[1])
    528 
    529     def _get_data_frame(self, cls):
    530         """Get the data frame from the BareTrace object, applying the window
    531         and the filters"""
    532 
    533         data_frame = getattr(self.data, cls.name).data_frame
    534 
    535         if data_frame.empty:
    536             return data_frame
    537         elif self._window[1] is None:
    538             data_frame = data_frame.loc[self._window[0]:]
    539         else:
    540             data_frame = data_frame.loc[self._window[0]:self._window[1]]
    541 
    542         if self._filters:
    543             criterion = pd.Series([True] * len(data_frame),
    544                                   index=data_frame.index)
    545 
    546             for filter_col, wanted_vals in self._filters.iteritems():
    547                 try:
    548                     dfr_col = data_frame[filter_col]
    549                 except KeyError:
    550                     continue
    551 
    552                 criterion &= dfr_col.isin(listify(wanted_vals))
    553 
    554             data_frame = data_frame[criterion]
    555 
    556         return data_frame
    557 
    558     def ref(self, mask):
    559         """Reference super indexed data with a boolean mask
    560 
    561         :param mask: A boolean :mod:`pandas.Series` that
    562             can be used to reference the aggregated data in
    563             the parser
    564         :type mask: :mod:`pandas.Series`
    565 
    566         :return: aggregated_data[mask]
    567         """
    568 
    569         return self._agg_df[mask]
    570 
    571     def inspect(self, accessor):
    572         """A function to inspect the accessor for information
    573 
    574         :param accessor: A data accessor of the format
    575             <event>:<column>
    576         :type accessor: str
    577 
    578         :return: A dictionary of information
    579         """
    580         return self._inspect.parseString(accessor)[0]
    581