1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2017 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """Outputs quantitative information about Address Sanitizer traces.""" 18 19 from __future__ import absolute_import 20 from __future__ import division 21 from __future__ import print_function 22 23 from collections import Counter 24 from datetime import datetime 25 import argparse 26 import bisect 27 import os 28 import re 29 30 31 def find_match(list_substrings, big_string): 32 """Returns the category a trace belongs to by searching substrings.""" 33 for ind, substr in enumerate(list_substrings): 34 if big_string.find(substr) != -1: 35 return ind 36 return list_substrings.index("Uncategorized") 37 38 39 def absolute_to_relative(data_lists, symbol_traces): 40 """Address changed to Dex File offset and shifting time to 0 min in ms.""" 41 42 offsets = data_lists["offsets"] 43 time_offsets = data_lists["times"] 44 45 # Format of time provided by logcat 46 time_format_str = "%H:%M:%S.%f" 47 first_access_time = datetime.strptime(data_lists["plot_list"][0][0], 48 time_format_str) 49 for ind, elem in enumerate(data_lists["plot_list"]): 50 elem_date_time = datetime.strptime(elem[0], time_format_str) 51 # Shift time values so that first access is at time 0 milliseconds 52 elem[0] = int((elem_date_time - first_access_time).total_seconds() * 53 1000) 54 address_access = int(elem[1], 16) 55 # For each poisoned address, find highest Dex File starting address less 56 # than address_access 57 dex_start_list, dex_size_list = zip(*data_lists["dex_ends_list"]) 58 dex_file_ind = bisect.bisect(dex_start_list, address_access) - 1 59 dex_offset = address_access - dex_start_list[dex_file_ind] 60 # Assumes that offsets is already sorted and constrains offset to be 61 # within range of the dex_file 62 max_offset = min(offsets[1], dex_size_list[dex_file_ind]) 63 # Meant to nullify data that does not meet offset criteria if specified 64 if (dex_offset >= offsets[0] and dex_offset < max_offset and 65 elem[0] >= time_offsets[0] and elem[0] < time_offsets[1]): 66 67 elem.insert(1, dex_offset) 68 # Category that a data point belongs to 69 elem.insert(2, data_lists["cat_list"][ind]) 70 else: 71 elem[:] = 4 * [None] 72 symbol_traces[ind] = None 73 data_lists["cat_list"][ind] = None 74 75 76 def print_category_info(cat_split, outname, out_dir_name, title): 77 """Prints information of category and puts related traces in a files.""" 78 trace_counts_dict = Counter(cat_split) 79 trace_counts_list_ordered = trace_counts_dict.most_common() 80 print(53 * "-") 81 print(title) 82 print("\tNumber of distinct traces: " + 83 str(len(trace_counts_list_ordered))) 84 print("\tSum of trace counts: " + 85 str(sum([trace[1] for trace in trace_counts_list_ordered]))) 86 print("\n\tCount: How many traces appeared with count\n\t", end="") 87 print(Counter([trace[1] for trace in trace_counts_list_ordered])) 88 with open(os.path.join(out_dir_name, outname), "w") as output_file: 89 for trace in trace_counts_list_ordered: 90 output_file.write("\n\nNumber of times appeared: " + 91 str(trace[1]) + 92 "\n") 93 output_file.write(trace[0].strip()) 94 95 96 def print_categories(categories, symbol_file_split, out_dir_name): 97 """Prints details of all categories.""" 98 symbol_file_split = [trace for trace in symbol_file_split 99 if trace is not None] 100 # Info of traces containing a call to current category 101 for cat_num, cat_name in enumerate(categories[1:]): 102 print("\nCategory #%d" % (cat_num + 1)) 103 cat_split = [trace for trace in symbol_file_split 104 if cat_name in trace] 105 cat_file_name = cat_name.lower() + "cat_output" 106 print_category_info(cat_split, cat_file_name, out_dir_name, 107 "Traces containing: " + cat_name) 108 noncat_split = [trace for trace in symbol_file_split 109 if cat_name not in trace] 110 print_category_info(noncat_split, "non" + cat_file_name, 111 out_dir_name, 112 "Traces not containing: " + 113 cat_name) 114 115 # All traces (including uncategorized) together 116 print_category_info(symbol_file_split, "allcat_output", 117 out_dir_name, 118 "All traces together:") 119 # Traces containing none of keywords 120 # Only used if categories are passed in 121 if len(categories) > 1: 122 noncat_split = [trace for trace in symbol_file_split if 123 all(cat_name not in trace 124 for cat_name in categories)] 125 print_category_info(noncat_split, "noncat_output", 126 out_dir_name, 127 "Uncategorized calls") 128 129 130 def is_directory(path_name): 131 """Checks if a path is an actual directory.""" 132 if not os.path.isdir(path_name): 133 dir_error = "%s is not a directory" % (path_name) 134 raise argparse.ArgumentTypeError(dir_error) 135 return path_name 136 137 138 def parse_args(argv): 139 """Parses arguments passed in.""" 140 parser = argparse.ArgumentParser() 141 parser.add_argument("-d", action="store", 142 default="", dest="out_dir_name", type=is_directory, 143 help="Output Directory") 144 parser.add_argument("--dex-file", action="store", 145 default=None, dest="dex_file", 146 type=argparse.FileType("r"), 147 help="Baksmali Dex File Dump") 148 parser.add_argument("--offsets", action="store", nargs=2, 149 default=[float(0), float("inf")], 150 dest="offsets", 151 metavar="OFFSET", 152 type=float, 153 help="Filters out accesses not between provided" 154 " offsets if provided. Can provide 'inf'" 155 " for infinity") 156 parser.add_argument("--times", action="store", nargs=2, 157 default=[float(0), float("inf")], 158 dest="times", 159 metavar="TIME", 160 type=float, 161 help="Filters out accesses not between provided" 162 " time offsets if provided. Can provide 'inf'" 163 " for infinity") 164 parser.add_argument("sanitizer_trace", action="store", 165 type=argparse.FileType("r"), 166 help="File containing sanitizer traces filtered by " 167 "prune_sanitizer_output.py") 168 parser.add_argument("symbol_trace", action="store", 169 type=argparse.FileType("r"), 170 help="File containing symbolized traces that match " 171 "sanitizer_trace") 172 parser.add_argument("dex_starts", action="store", 173 type=argparse.FileType("r"), 174 help="File containing starting addresses of Dex Files") 175 parser.add_argument("categories", action="store", nargs="*", 176 help="Keywords expected to show in large amounts of" 177 " symbolized traces") 178 179 return parser.parse_args(argv) 180 181 182 def get_dex_offset_data(line, dex_file_item): 183 """ Returns a tuple of dex file offset, item name, and data of a line.""" 184 return (int(line[:line.find(":")], 16), 185 (dex_file_item, 186 line.split("|")[1].strip()) 187 ) 188 189 190 def read_data(parsed_argv): 191 """Reads data from filepath arguments and parses them into lists.""" 192 # Using a dictionary to establish relation between lists added 193 data_lists = {} 194 categories = parsed_argv.categories 195 # Makes sure each trace maps to some category 196 categories.insert(0, "Uncategorized") 197 198 data_lists["offsets"] = parsed_argv.offsets 199 data_lists["offsets"].sort() 200 201 data_lists["times"] = parsed_argv.times 202 data_lists["times"].sort() 203 204 logcat_file_data = parsed_argv.sanitizer_trace.readlines() 205 parsed_argv.sanitizer_trace.close() 206 207 symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace") 208 # Removes text before first trace 209 symbol_file_split = symbol_file_split[1:] 210 parsed_argv.symbol_trace.close() 211 212 dex_start_file_data = parsed_argv.dex_starts.readlines() 213 parsed_argv.dex_starts.close() 214 215 if parsed_argv.dex_file is not None: 216 dex_file_data = parsed_argv.dex_file.read() 217 parsed_argv.dex_file.close() 218 # Splits baksmali dump by each item 219 item_split = [s.splitlines() for s in re.split(r"\|\[[0-9]+\] ", 220 dex_file_data)] 221 # Splits each item by line and creates a list of offsets and a 222 # corresponding list of the data associated with that line 223 offset_list, offset_data = zip(*[get_dex_offset_data(line, item[0]) 224 for item in item_split 225 for line in item[1:] 226 if re.search("[0-9a-f]{6}:", line) 227 is not None and 228 line.find("|") != -1]) 229 data_lists["offset_list"] = offset_list 230 data_lists["offset_data"] = offset_data 231 else: 232 dex_file_data = None 233 234 # Each element is a tuple of time and address accessed 235 data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split()) 236 if elem[0] in (1, 11) 237 ] 238 for line in logcat_file_data 239 if "use-after-poison" in line or 240 "unknown-crash" in line 241 ] 242 # Contains a mapping between traces and the category they belong to 243 # based on arguments 244 data_lists["cat_list"] = [categories[find_match(categories, trace)] 245 for trace in symbol_file_split] 246 247 # Contains a list of starting address of all dex files to calculate dex 248 # offsets 249 data_lists["dex_ends_list"] = [(int(line.split()[9], 16), 250 int(line.split()[12]) 251 ) 252 for line in dex_start_file_data 253 if "RegisterDexFile" in line 254 ] 255 # Dex File Starting addresses must be sorted because bisect requires sorted 256 # lists. 257 data_lists["dex_ends_list"].sort() 258 259 return data_lists, categories, symbol_file_split 260 261 262 def main(): 263 """Takes in trace information and outputs details about them.""" 264 parsed_argv = parse_args(None) 265 data_lists, categories, symbol_file_split = read_data(parsed_argv) 266 267 # Formats plot_list such that each element is a data point 268 absolute_to_relative(data_lists, symbol_file_split) 269 for file_ext, cat_name in enumerate(categories): 270 out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" + 271 str(file_ext) + 272 ".dat") 273 with open(out_file_name, "w") as output_file: 274 output_file.write("# Category: " + cat_name + "\n") 275 output_file.write("# Time, Dex File Offset_10, Dex File Offset_16," 276 " Address, Item Accessed, Item Member Accessed" 277 " Unaligned\n") 278 for time, dex_offset, category, address in data_lists["plot_list"]: 279 if category == cat_name: 280 output_file.write( 281 str(time) + 282 " " + 283 str(dex_offset) + 284 " #" + 285 hex(dex_offset) + 286 " " + 287 str(address)) 288 if "offset_list" in data_lists: 289 dex_offset_index = bisect.bisect( 290 data_lists["offset_list"], 291 dex_offset) - 1 292 aligned_dex_offset = (data_lists["offset_list"] 293 [dex_offset_index]) 294 dex_offset_data = (data_lists["offset_data"] 295 [dex_offset_index]) 296 output_file.write( 297 " " + 298 "|".join(dex_offset_data) + 299 " " + 300 str(aligned_dex_offset != dex_offset)) 301 output_file.write("\n") 302 print_categories(categories, symbol_file_split, parsed_argv.out_dir_name) 303 304 305 if __name__ == "__main__": 306 main() 307