Home | History | Annotate | Download | only in runtime_memusage
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2017 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Outputs quantitative information about Address Sanitizer traces."""
     18 
     19 from __future__ import absolute_import
     20 from __future__ import division
     21 from __future__ import print_function
     22 
     23 from collections import Counter
     24 from datetime import datetime
     25 import argparse
     26 import bisect
     27 import os
     28 import re
     29 
     30 
     31 def find_match(list_substrings, big_string):
     32     """Returns the category a trace belongs to by searching substrings."""
     33     for ind, substr in enumerate(list_substrings):
     34         if big_string.find(substr) != -1:
     35             return ind
     36     return list_substrings.index("Uncategorized")
     37 
     38 
     39 def absolute_to_relative(data_lists, symbol_traces):
     40     """Address changed to Dex File offset and shifting time to 0 min in ms."""
     41 
     42     offsets = data_lists["offsets"]
     43     time_offsets = data_lists["times"]
     44 
     45     # Format of time provided by logcat
     46     time_format_str = "%H:%M:%S.%f"
     47     first_access_time = datetime.strptime(data_lists["plot_list"][0][0],
     48                                           time_format_str)
     49     for ind, elem in enumerate(data_lists["plot_list"]):
     50         elem_date_time = datetime.strptime(elem[0], time_format_str)
     51         # Shift time values so that first access is at time 0 milliseconds
     52         elem[0] = int((elem_date_time - first_access_time).total_seconds() *
     53                       1000)
     54         address_access = int(elem[1], 16)
     55         # For each poisoned address, find highest Dex File starting address less
     56         # than address_access
     57         dex_start_list, dex_size_list = zip(*data_lists["dex_ends_list"])
     58         dex_file_ind = bisect.bisect(dex_start_list, address_access) - 1
     59         dex_offset = address_access - dex_start_list[dex_file_ind]
     60         # Assumes that offsets is already sorted and constrains offset to be
     61         # within range of the dex_file
     62         max_offset = min(offsets[1], dex_size_list[dex_file_ind])
     63         # Meant to nullify data that does not meet offset criteria if specified
     64         if (dex_offset >= offsets[0] and dex_offset < max_offset and
     65                 elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]):
     66 
     67             elem.insert(1, dex_offset)
     68             # Category that a data point belongs to
     69             elem.insert(2, data_lists["cat_list"][ind])
     70         else:
     71             elem[:] = 4 * [None]
     72             symbol_traces[ind] = None
     73             data_lists["cat_list"][ind] = None
     74 
     75 
     76 def print_category_info(cat_split, outname, out_dir_name, title):
     77     """Prints information of category and puts related traces in a files."""
     78     trace_counts_dict = Counter(cat_split)
     79     trace_counts_list_ordered = trace_counts_dict.most_common()
     80     print(53 * "-")
     81     print(title)
     82     print("\tNumber of distinct traces: " +
     83           str(len(trace_counts_list_ordered)))
     84     print("\tSum of trace counts: " +
     85           str(sum([trace[1] for trace in trace_counts_list_ordered])))
     86     print("\n\tCount: How many traces appeared with count\n\t", end="")
     87     print(Counter([trace[1] for trace in trace_counts_list_ordered]))
     88     with open(os.path.join(out_dir_name, outname), "w") as output_file:
     89         for trace in trace_counts_list_ordered:
     90             output_file.write("\n\nNumber of times appeared: " +
     91                               str(trace[1]) +
     92                               "\n")
     93             output_file.write(trace[0].strip())
     94 
     95 
     96 def print_categories(categories, symbol_file_split, out_dir_name):
     97     """Prints details of all categories."""
     98     symbol_file_split = [trace for trace in symbol_file_split
     99                          if trace is not None]
    100     # Info of traces containing a call to current category
    101     for cat_num, cat_name in enumerate(categories[1:]):
    102         print("\nCategory #%d" % (cat_num + 1))
    103         cat_split = [trace for trace in symbol_file_split
    104                      if cat_name in trace]
    105         cat_file_name = cat_name.lower() + "cat_output"
    106         print_category_info(cat_split, cat_file_name, out_dir_name,
    107                             "Traces containing: " + cat_name)
    108         noncat_split = [trace for trace in symbol_file_split
    109                         if cat_name not in trace]
    110         print_category_info(noncat_split, "non" + cat_file_name,
    111                             out_dir_name,
    112                             "Traces not containing: " +
    113                             cat_name)
    114 
    115     # All traces (including uncategorized) together
    116     print_category_info(symbol_file_split, "allcat_output",
    117                         out_dir_name,
    118                         "All traces together:")
    119     # Traces containing none of keywords
    120     # Only used if categories are passed in
    121     if len(categories) > 1:
    122         noncat_split = [trace for trace in symbol_file_split if
    123                         all(cat_name not in trace
    124                             for cat_name in categories)]
    125         print_category_info(noncat_split, "noncat_output",
    126                             out_dir_name,
    127                             "Uncategorized calls")
    128 
    129 
    130 def is_directory(path_name):
    131     """Checks if a path is an actual directory."""
    132     if not os.path.isdir(path_name):
    133         dir_error = "%s is not a directory" % (path_name)
    134         raise argparse.ArgumentTypeError(dir_error)
    135     return path_name
    136 
    137 
    138 def parse_args(argv):
    139     """Parses arguments passed in."""
    140     parser = argparse.ArgumentParser()
    141     parser.add_argument("-d", action="store",
    142                         default="", dest="out_dir_name", type=is_directory,
    143                         help="Output Directory")
    144     parser.add_argument("--dex-file", action="store",
    145                         default=None, dest="dex_file",
    146                         type=argparse.FileType("r"),
    147                         help="Baksmali Dex File Dump")
    148     parser.add_argument("--offsets", action="store", nargs=2,
    149                         default=[float(0), float("inf")],
    150                         dest="offsets",
    151                         metavar="OFFSET",
    152                         type=float,
    153                         help="Filters out accesses not between provided"
    154                              " offsets if provided. Can provide 'inf'"
    155                              " for infinity")
    156     parser.add_argument("--times", action="store", nargs=2,
    157                         default=[float(0), float("inf")],
    158                         dest="times",
    159                         metavar="TIME",
    160                         type=float,
    161                         help="Filters out accesses not between provided"
    162                              " time offsets if provided. Can provide 'inf'"
    163                              " for infinity")
    164     parser.add_argument("sanitizer_trace", action="store",
    165                         type=argparse.FileType("r"),
    166                         help="File containing sanitizer traces filtered by "
    167                              "prune_sanitizer_output.py")
    168     parser.add_argument("symbol_trace", action="store",
    169                         type=argparse.FileType("r"),
    170                         help="File containing symbolized traces that match "
    171                              "sanitizer_trace")
    172     parser.add_argument("dex_starts", action="store",
    173                         type=argparse.FileType("r"),
    174                         help="File containing starting addresses of Dex Files")
    175     parser.add_argument("categories", action="store", nargs="*",
    176                         help="Keywords expected to show in large amounts of"
    177                              " symbolized traces")
    178 
    179     return parser.parse_args(argv)
    180 
    181 
    182 def get_dex_offset_data(line, dex_file_item):
    183     """ Returns a tuple of dex file offset, item name, and data of a line."""
    184     return (int(line[:line.find(":")], 16),
    185             (dex_file_item,
    186              line.split("|")[1].strip())
    187             )
    188 
    189 
    190 def read_data(parsed_argv):
    191     """Reads data from filepath arguments and parses them into lists."""
    192     # Using a dictionary to establish relation between lists added
    193     data_lists = {}
    194     categories = parsed_argv.categories
    195     # Makes sure each trace maps to some category
    196     categories.insert(0, "Uncategorized")
    197 
    198     data_lists["offsets"] = parsed_argv.offsets
    199     data_lists["offsets"].sort()
    200 
    201     data_lists["times"] = parsed_argv.times
    202     data_lists["times"].sort()
    203 
    204     logcat_file_data = parsed_argv.sanitizer_trace.readlines()
    205     parsed_argv.sanitizer_trace.close()
    206 
    207     symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")
    208     # Removes text before first trace
    209     symbol_file_split = symbol_file_split[1:]
    210     parsed_argv.symbol_trace.close()
    211 
    212     dex_start_file_data = parsed_argv.dex_starts.readlines()
    213     parsed_argv.dex_starts.close()
    214 
    215     if parsed_argv.dex_file is not None:
    216         dex_file_data = parsed_argv.dex_file.read()
    217         parsed_argv.dex_file.close()
    218         # Splits baksmali dump by each item
    219         item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ",
    220                                                        dex_file_data)]
    221         # Splits each item by line and creates a list of offsets and a
    222         # corresponding list of the data associated with that line
    223         offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0])
    224                                          for item in item_split
    225                                          for line in item[1:]
    226                                          if re.search("[0-9a-f]{6}:", line)
    227                                          is not None and
    228                                          line.find("|") != -1])
    229         data_lists["offset_list"] = offset_list
    230         data_lists["offset_data"] = offset_data
    231     else:
    232         dex_file_data = None
    233 
    234     # Each element is a tuple of time and address accessed
    235     data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
    236                                 if elem[0] in (1, 11)
    237                                 ]
    238                                for line in logcat_file_data
    239                                if "use-after-poison" in line or
    240                                "unknown-crash" in line
    241                                ]
    242     # Contains a mapping between traces and the category they belong to
    243     # based on arguments
    244     data_lists["cat_list"] = [categories[find_match(categories, trace)]
    245                               for trace in symbol_file_split]
    246 
    247     # Contains a list of starting address of all dex files to calculate dex
    248     # offsets
    249     data_lists["dex_ends_list"] = [(int(line.split()[9], 16),
    250                                     int(line.split()[12])
    251                                     )
    252                                    for line in dex_start_file_data
    253                                    if "RegisterDexFile" in line
    254                                    ]
    255     # Dex File Starting addresses must be sorted because bisect requires sorted
    256     # lists.
    257     data_lists["dex_ends_list"].sort()
    258 
    259     return data_lists, categories, symbol_file_split
    260 
    261 
    262 def main():
    263     """Takes in trace information and outputs details about them."""
    264     parsed_argv = parse_args(None)
    265     data_lists, categories, symbol_file_split = read_data(parsed_argv)
    266 
    267     # Formats plot_list such that each element is a data point
    268     absolute_to_relative(data_lists, symbol_file_split)
    269     for file_ext, cat_name in enumerate(categories):
    270         out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
    271                                      str(file_ext) +
    272                                      ".dat")
    273         with open(out_file_name, "w") as output_file:
    274             output_file.write("# Category: " + cat_name + "\n")
    275             output_file.write("# Time, Dex File Offset_10, Dex File Offset_16,"
    276                               " Address, Item Accessed, Item Member Accessed"
    277                               " Unaligned\n")
    278             for time, dex_offset, category, address in data_lists["plot_list"]:
    279                 if category == cat_name:
    280                     output_file.write(
    281                         str(time) +
    282                         " " +
    283                         str(dex_offset) +
    284                         " #" +
    285                         hex(dex_offset) +
    286                         " " +
    287                         str(address))
    288                     if "offset_list" in data_lists:
    289                         dex_offset_index = bisect.bisect(
    290                             data_lists["offset_list"],
    291                             dex_offset) - 1
    292                         aligned_dex_offset = (data_lists["offset_list"]
    293                                                         [dex_offset_index])
    294                         dex_offset_data = (data_lists["offset_data"]
    295                                                      [dex_offset_index])
    296                         output_file.write(
    297                             " " +
    298                             "|".join(dex_offset_data) +
    299                             " " +
    300                             str(aligned_dex_offset != dex_offset))
    301                     output_file.write("\n")
    302     print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)
    303 
    304 
    305 if __name__ == "__main__":
    306     main()
    307