Home | History | Annotate | Download | only in trappy
      1 #    Copyright 2015-2017 ARM Limited
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 #
     15 
     16 import pandas as pd
     17 import numpy as np
     18 
     19 """Generic functions that can be used in multiple places in trappy
     20 """
     21 
     22 def listify(to_select):
     23     """Utitlity function to handle both single and
     24     list inputs
     25     """
     26 
     27     if not isinstance(to_select, list):
     28         to_select = [to_select]
     29 
     30     return to_select
     31 
     32 def handle_duplicate_index(data,
     33                            max_delta=0.000001):
     34     """Handle duplicate values in index
     35 
     36     :param data: The timeseries input
     37     :type data: :mod:`pandas.Series`
     38 
     39     :param max_delta: Maximum interval adjustment value that
     40         will be added to duplicate indices
     41     :type max_delta: float
     42 
     43     Consider the following case where a series needs to be reindexed
     44     to a new index (which can be required when different series need to
     45     be combined and compared):
     46     ::
     47 
     48         import pandas
     49         values = [0, 1, 2, 3, 4]
     50         index = [0.0, 1.0, 1.0, 6.0, 7.0]
     51         series = pandas.Series(values, index=index)
     52         new_index = [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0]
     53         series.reindex(new_index)
     54 
     55     The above code fails with:
     56     ::
     57 
     58         ValueError: cannot reindex from a duplicate axis
     59 
     60     The function :func:`handle_duplicate_axis` changes the duplicate values
     61     to
     62     ::
     63 
     64         >>> import pandas
     65         >>> from trappy.utils import handle_duplicate_index
     66 
     67         >>> values = [0, 1, 2, 3, 4]
     68         index = [0.0, 1.0, 1.0, 6.0, 7.0]
     69         series = pandas.Series(values, index=index)
     70         series = handle_duplicate_index(series)
     71         print series.index.values
     72         >>> [ 0.        1.        1.000001  6.        7.      ]
     73 
     74     """
     75 
     76     index = data.index
     77     new_index = index.values
     78 
     79     dups = index.get_duplicates()
     80 
     81     for dup in dups:
     82         # Leave one of the values intact
     83         dup_index_left = index.searchsorted(dup, side="left")
     84         dup_index_right = index.searchsorted(dup, side="right") - 1
     85         num_dups = dup_index_right - dup_index_left + 1
     86 
     87         # Calculate delta that needs to be added to each duplicate
     88         # index
     89         try:
     90             delta = (index[dup_index_right + 1] - dup) / num_dups
     91         except IndexError:
     92             # dup_index_right + 1 is outside of the series (i.e. the
     93             # dup is at the end of the series).
     94             delta = max_delta
     95 
     96         # Clamp the maximum delta added to max_delta
     97         if delta > max_delta:
     98             delta = max_delta
     99 
    100         # Add a delta to the others
    101         dup_index_left += 1
    102         while dup_index_left <= dup_index_right:
    103             new_index[dup_index_left] += delta
    104             delta += delta
    105             dup_index_left += 1
    106 
    107     return data.reindex(new_index)
    108 
    109 # Iterate fast over all rows in a data frame and apply fn
    110 def apply_callback(df, fn, *kwargs):
    111     iters = df.itertuples()
    112     event_tuple = iters.next()
    113 
    114     # Column names beginning with underscore will not be preserved in tuples
    115     # due to constraints on namedtuple field names, so store mappings from
    116     # column name to column number for each trace event.
    117     col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }
    118 
    119     while True:
    120         if not event_tuple:
    121             break
    122         event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }
    123 
    124         if kwargs:
    125             fn(event_dict, kwargs)
    126         else:
    127             fn(event_dict)
    128 
    129         event_tuple = next(iters, None)
    130 
    131 
    132 def merge_dfs(pr_df, sec_df, pivot):
    133     # Keep track of last secondary event
    134     pivot_map = {}
    135 
    136     # An array accumating dicts with merged data
    137     merged_data = []
    138     def df_fn(data):
    139         # Store the latest secondary info
    140         if data['Time'][0] == 'secondary':
    141             pivot_map[data[pivot]] = data
    142             # Get rid of primary/secondary labels
    143             data['Time'] = data['Time'][1]
    144             return
    145 
    146         # Propogate latest secondary info
    147         for key, value in data.iteritems():
    148             if key == pivot:
    149                 continue
    150             # Fast check for if value is nan (faster than np.isnan + try/except)
    151             if value != value and pivot_map.has_key(data[pivot]):
    152                 data[key] = pivot_map[data[pivot]][key]
    153 
    154         # Get rid of primary/secondary labels
    155         data['Time'] = data['Time'][1]
    156         merged_data.append(data)
    157 
    158     df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort_values(by='__line')
    159     apply_callback(df, df_fn)
    160     merged_df = pd.DataFrame.from_dict(merged_data)
    161     merged_df.set_index('Time', inplace=True)
    162 
    163     return merged_df
    164