Home | History | Annotate | Download | only in trappy
      1 #    Copyright 2015-2017 ARM Limited
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 #
     15 
     16 """Base class to parse trace.dat dumps"""
     17 
     18 import re
     19 import pandas as pd
     20 import warnings
     21 
     22 from resource import getrusage, RUSAGE_SELF
     23 
     24 def _get_free_memory_kb():
     25     try:
     26         with open("/proc/meminfo") as f:
     27             memfree_line = [l for l in f.readlines() if "MemFree" in l][0]
     28             _, num_kb, _ = memfree_line.split()
     29             return int(num_kb)
     30     except:
     31         # Probably either not running on Linux (no /proc/meminfo), or format has
     32         # changed (we didn't find num_kb).
     33         return None
     34 
     35 def trace_parser_explode_array(string, array_lengths):
     36     """Explode an array in the trace into individual elements for easy parsing
     37 
     38     Basically, turn :code:`load={1 1 2 2}` into :code:`load0=1 load1=1 load2=2
     39     load3=2`.
     40 
     41     :param string: Input string from the trace
     42     :type string: str
     43 
     44     :param array_lengths: A dictionary of array names and their
     45         expected length.  If we get array that's shorter than the expected
     46         length, additional keys have to be introduced with value 0 to
     47         compensate.
     48     :type array_lengths: dict
     49 
     50     For example:
     51     ::
     52 
     53         trace_parser_explode_array(string="load={1 2}",
     54                                    array_lengths={"load": 4})
     55         "load0=1 load1=2 load2=0 load3=0"
     56     """
     57 
     58     while True:
     59         match = re.search(r"[^ ]+={[^}]+}", string)
     60         if match is None:
     61             break
     62 
     63         to_explode = match.group()
     64         col_basename = re.match(r"([^=]+)=", to_explode).groups()[0]
     65         vals_str = re.search(r"{(.+)}", to_explode).groups()[0]
     66         vals_array = vals_str.split(' ')
     67 
     68         exploded_str = ""
     69         for (idx, val) in enumerate(vals_array):
     70             exploded_str += "{}{}={} ".format(col_basename, idx, val)
     71 
     72         vals_added = len(vals_array)
     73         if vals_added < array_lengths[col_basename]:
     74             for idx in range(vals_added, array_lengths[col_basename]):
     75                 exploded_str += "{}{}=0 ".format(col_basename, idx)
     76 
     77         exploded_str = exploded_str[:-1]
     78         begin_idx = match.start()
     79         end_idx = match.end()
     80 
     81         string = string[:begin_idx] + exploded_str + string[end_idx:]
     82 
     83     return string
     84 
     85 class Base(object):
     86     """Base class to parse trace.dat dumps.
     87 
     88     Don't use directly, create a subclass that has a unique_word class
     89     variable.  unique_word is a string that can uniquely identify
     90     lines in the trace that correspond to this event.  This is usually
     91     the trace_name (optionally followed by a semicolong,
     92     e.g. "sched_switch:") but it can be anything else for trace points
     93     generated using trace_printk().
     94 
     95     :param parse_raw: If :code:`True`, raw trace data (-r option) to
     96         trace-cmd will be used
     97 
     98     :param fallback: If :code:`True`, the parsing class will be used
     99         only if no other candidate class's unique_word matched. subclasses
    100         should override this (for ex. TracingMarkWrite uses it)
    101 
    102     This class acts as a base class for all TRAPpy events
    103 
    104     """
    105     def __init__(self, parse_raw=False, fallback=False):
    106         self.fallback = fallback
    107         self.tracer = None
    108         self.data_frame = pd.DataFrame()
    109         self.line_array = []
    110         self.data_array = []
    111         self.time_array = []
    112         self.comm_array = []
    113         self.pid_array = []
    114         self.tgid_array = []
    115         self.cpu_array = []
    116         self.parse_raw = parse_raw
    117         self.cached = False
    118 
    119     def finalize_object(self):
    120         pass
    121 
    122     def __get_trace_array_lengths(self):
    123         """Calculate the lengths of all arrays in the trace
    124 
    125         Returns a dict with the name of each array found in the trace
    126         as keys and their corresponding length as value
    127 
    128         """
    129         from collections import defaultdict
    130 
    131         pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}")
    132 
    133         ret = defaultdict(int)
    134 
    135         for line in self.data_array:
    136             while True:
    137                 match = re.search(pat_array, line)
    138                 if not match:
    139                     break
    140 
    141                 (array_name, array_elements) = match.groups()
    142 
    143                 array_len = len(array_elements.split(' '))
    144 
    145                 if array_len > ret[array_name]:
    146                     ret[array_name] = array_len
    147 
    148                 line = line[match.end():]
    149 
    150             # Stop scanning if the trace doesn't have arrays
    151             if len(ret) == 0:
    152                 break
    153 
    154         return ret
    155 
    156     def append_data(self, time, comm, pid, tgid, cpu, line, data):
    157         """Append data parsed from a line to the corresponding arrays
    158 
    159         The :mod:`DataFrame` will be created from this when the whole trace
    160         has been parsed.
    161 
    162         :param time: The time for the line that was printed in the trace
    163         :type time: float
    164 
    165         :param comm: The command name or the execname from which the trace
    166             line originated
    167         :type comm: str
    168 
    169         :param pid: The PID of the process from which the trace
    170             line originated
    171         :type pid: int
    172 
    173         :param data: The data for matching line in the trace
    174         :type data: str
    175         """
    176 
    177         self.time_array.append(time)
    178         self.comm_array.append(comm)
    179         self.pid_array.append(pid)
    180         self.tgid_array.append(tgid)
    181         self.cpu_array.append(cpu)
    182         self.line_array.append(line)
    183         self.data_array.append(data)
    184 
    185     def string_cast(self, string, type):
    186         """ Attempt to convert string to another type
    187 
    188         Here we attempt to cast string to a type. Currently only
    189         integer conversion is supported with future expansion
    190         left open to other types.
    191 
    192         :param string: The value to convert.
    193         :type string: str
    194 
    195         :param type: The type to convert to.
    196         :type type: type
    197         """
    198         # Currently this function only supports int conversion
    199         if type != int:
    200             return
    201         # Handle false-positives for negative numbers
    202         if not string.lstrip("-").isdigit():
    203             return string
    204         return int(string)
    205 
    206     def generate_data_dict(self, data_str):
    207         data_dict = {}
    208         prev_key = None
    209         for field in data_str.split():
    210             if "=" not in field:
    211                 # Concatenation is supported only for "string" values
    212                 if type(data_dict[prev_key]) is not str:
    213                     continue
    214                 data_dict[prev_key] += ' ' + field
    215                 continue
    216             (key, value) = field.split('=', 1)
    217             value = self.string_cast(value, int)
    218             data_dict[key] = value
    219             prev_key = key
    220         return data_dict
    221 
    222     def generate_parsed_data(self):
    223 
    224         # Get a rough idea of how much memory we have to play with
    225         CHECK_MEM_COUNT = 10000
    226         kb_free = _get_free_memory_kb()
    227         starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss
    228         check_memory_usage = True
    229         check_memory_count = 1
    230 
    231         for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
    232                                               self.tgid_array, self.cpu_array,
    233                                               self.line_array, self.data_array):
    234             data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
    235             data_dict.update(self.generate_data_dict(data_str))
    236 
    237             # When running out of memory, Pandas has been observed to segfault
    238             # rather than throwing a proper Python error.
    239             # Look at how much memory our process is using and warn if we seem
    240             # to be getting close to the system's limit, check it only once
    241             # in the beginning and then every CHECK_MEM_COUNT events
    242             check_memory_count -= 1
    243             if check_memory_usage and check_memory_count == 0:
    244                 kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss)
    245                 if kb_free and kb_used > kb_free * 0.9:
    246                     warnings.warn("TRAPpy: Appear to be low on memory. "
    247                                   "If errors arise, try providing more RAM")
    248                     check_memory_usage = False
    249                 check_memory_count = CHECK_MEM_COUNT
    250 
    251             yield data_dict
    252 
    253     def create_dataframe(self):
    254         """Create the final :mod:`pandas.DataFrame`"""
    255         if not self.time_array:
    256             return
    257 
    258         trace_arr_lengths = self.__get_trace_array_lengths()
    259 
    260         if trace_arr_lengths.items():
    261             for (idx, val) in enumerate(self.data_array):
    262                 expl_val = trace_parser_explode_array(val, trace_arr_lengths)
    263                 self.data_array[idx] = expl_val
    264 
    265         time_idx = pd.Index(self.time_array, name="Time")
    266         self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx)
    267 
    268         self.time_array = []
    269         self.line_array = []
    270         self.comm_array = []
    271         self.pid_array = []
    272         self.cpu_array = []
    273         self.data_array = []
    274 
    275     def write_csv(self, fname):
    276         """Write the csv info into a CSV file
    277 
    278         :param fname: The name of the CSV file
    279         :type fname: str
    280         """
    281         self.data_frame.to_csv(fname)
    282 
    283     def read_csv(self, fname):
    284         """Read the csv data into a DataFrame
    285 
    286         :param fname: The name of the CSV file
    287         :type fname: str
    288         """
    289         self.data_frame = pd.read_csv(fname, index_col = 0)
    290 
    291     def normalize_time(self, basetime):
    292         """Substract basetime from the Time of the data frame
    293 
    294         :param basetime: The offset which needs to be subtracted from
    295             the time index
    296         :type basetime: float
    297         """
    298         if basetime and not self.data_frame.empty:
    299             self.data_frame.reset_index(inplace=True)
    300             self.data_frame["Time"] = self.data_frame["Time"] - basetime
    301             self.data_frame.set_index("Time", inplace=True)
    302