1 #!/usr/bin/env python 2 # Copyright 2016 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 import argparse 7 import bisect 8 import collections 9 import gzip 10 import json 11 import os 12 import re 13 import subprocess 14 import sys 15 16 _SYMBOLS_PATH = os.path.abspath(os.path.join( 17 os.path.dirname(os.path.realpath(__file__)), 18 '..', 19 'third_party', 20 'symbols')) 21 sys.path.append(_SYMBOLS_PATH) 22 # pylint: disable=import-error 23 import symbols.elf_symbolizer as elf_symbolizer 24 25 26 # Relevant trace event phases from Chromium's 27 # src/base/trace_event/common/trace_event_common.h. 28 TRACE_EVENT_PHASE_METADATA = 'M' 29 TRACE_EVENT_PHASE_MEMORY_DUMP = 'v' 30 31 32 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) 33 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available 34 # via 'name' group. 35 ANDROID_PATH_MATCHER = re.compile( 36 r'^/data/(?:app/[^/]+/lib/[^/]+/|app-lib/[^/]+/)(?P<name>.*\.so)') 37 38 # Subpath of output path where unstripped libraries are stored. 39 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' 40 41 42 def FindInSystemPath(binary_name): 43 paths = os.environ['PATH'].split(os.pathsep) 44 for path in paths: 45 binary_path = os.path.join(path, binary_name) 46 if os.path.isfile(binary_path): 47 return binary_path 48 return None 49 50 51 def IsSymbolizableFile(file_path): 52 result = subprocess.check_output(['file', '-0', file_path]) 53 type_string = result[result.find('\0') + 1:] 54 return bool(re.match(r'\: (ELF|Mach-O) (32|64)-bit\b', type_string)) 55 56 57 class ProcessMemoryMaps(object): 58 """Represents 'process_mmaps' trace file entry.""" 59 60 class Region(object): 61 def __init__(self, start_address, size, file_path): 62 self._start_address = start_address 63 self._size = size 64 self._file_path = file_path 65 66 @property 67 def start_address(self): 68 return self._start_address 69 70 @property 71 def end_address(self): 72 return self._start_address + self._size 73 74 @property 75 def size(self): 76 return self._size 77 78 @property 79 def file_path(self): 80 return self._file_path 81 82 def __cmp__(self, other): 83 if isinstance(other, type(self)): 84 return long(self._start_address).__cmp__(long(other._start_address)) 85 elif isinstance(other, (long, int)): 86 return long(self._start_address).__cmp__(long(other)) 87 else: 88 raise Exception('Cannot compare with %s' % type(other)) 89 90 def __repr__(self): 91 return 'Region(0x{:X} - 0x{:X}, {})'.format( 92 self.start_address, self.end_address, self.file_path) 93 94 def __init__(self, process_mmaps): 95 """Parses 'process_mmaps' dictionary.""" 96 97 regions = [] 98 for region_value in process_mmaps['vm_regions']: 99 regions.append(self.Region( 100 long(region_value['sa'], 16), 101 long(region_value['sz'], 16), 102 region_value['mf'])) 103 regions.sort() 104 105 # Copy regions without duplicates and check for overlaps. 106 self._regions = [] 107 previous_region = None 108 for region in regions: 109 if previous_region is not None: 110 if region == previous_region: 111 continue 112 assert region.start_address >= previous_region.end_address, \ 113 'Regions {} and {} overlap.'.format(previous_region, region) 114 previous_region = region 115 self._regions.append(region) 116 117 @property 118 def regions(self): 119 return self._regions 120 121 def FindRegion(self, address): 122 """Finds region containing |address|. Returns None if none found.""" 123 124 region_index = bisect.bisect_right(self._regions, address) - 1 125 if region_index >= 0: 126 region = self._regions[region_index] 127 if address >= region.start_address and address < region.end_address: 128 return region 129 return None 130 131 132 class StackFrames(object): 133 """Represents 'stackFrames' trace file entry.""" 134 135 class PCFrame(object): 136 def __init__(self, pc, frame): 137 self._modified = False 138 self._pc = pc 139 self._frame = frame 140 141 @property 142 def modified(self): 143 return self._modified 144 145 @property 146 def pc(self): 147 return self._pc 148 149 @property 150 def name(self): 151 return self._frame['name'] 152 153 @name.setter 154 def name(self, value): 155 self._modified = True 156 self._frame['name'] = value 157 158 def __init__(self, stack_frames): 159 """Constructs object using 'stackFrames' dictionary.""" 160 self._pc_frames = [] 161 for frame in stack_frames.itervalues(): 162 pc_frame = self._ParsePCFrame(frame) 163 if pc_frame: 164 self._pc_frames.append(pc_frame) 165 166 @property 167 def pc_frames(self): 168 return self._pc_frames 169 170 @property 171 def modified(self): 172 return any(f.modified for f in self._pc_frames) 173 174 _PC_TAG = 'pc:' 175 176 @classmethod 177 def _ParsePCFrame(self, frame): 178 name = frame['name'] 179 if not name.startswith(self._PC_TAG): 180 return None 181 pc = long(name[len(self._PC_TAG):], 16) 182 return self.PCFrame(pc, frame) 183 184 185 class Process(object): 186 """Holds various bits of information about a process in a trace file.""" 187 188 def __init__(self, pid): 189 self.pid = pid 190 self.name = None 191 self.mmaps = None 192 self.stack_frames = None 193 194 195 def CollectProcesses(trace): 196 """Parses trace dictionary and returns pid->Process map of all processes 197 suitable for symbolization (which have both mmaps and stack_frames). 198 """ 199 200 process_map = {} 201 202 # Android traces produced via 'chrome://inspect/?tracing#devices' are 203 # just list of events. 204 events = trace if isinstance(trace, list) else trace['traceEvents'] 205 for event in events: 206 name = event.get('name') 207 if not name: 208 continue 209 210 pid = event['pid'] 211 process = process_map.get(pid) 212 if process is None: 213 process = Process(pid) 214 process_map[pid] = process 215 216 phase = event['ph'] 217 if phase == TRACE_EVENT_PHASE_METADATA: 218 if name == 'process_name': 219 process.name = event['args']['name'] 220 elif name == 'stackFrames': 221 process.stack_frames = StackFrames(event['args']['stackFrames']) 222 elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP: 223 process_mmaps = event['args']['dumps'].get('process_mmaps') 224 if process_mmaps: 225 # TODO(dskiba): this parses all process_mmaps, but retains only the 226 # last one. We need to parse only once (lazy parsing?). 227 process.mmaps = ProcessMemoryMaps(process_mmaps) 228 229 return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames] 230 231 232 class SymbolizableFile(object): 233 """Holds file path, addresses to symbolize and stack frames to update. 234 235 This class is a link between ELFSymbolizer and a trace file: it specifies 236 what to symbolize (addresses) and what to update with the symbolization 237 result (frames). 238 """ 239 def __init__(self, file_path): 240 self.path = file_path 241 self.frames_by_address = collections.defaultdict(list) 242 243 244 def ResolveSymbolizableFiles(processes): 245 """Resolves and groups PCs into list of SymbolizableFiles. 246 247 As part of the grouping process, this function resolves PC from each stack 248 frame to the corresponding mmap region. Stack frames that failed to resolve 249 are symbolized with '<unresolved>'. 250 """ 251 symfile_by_path = {} 252 for process in processes: 253 for frame in process.stack_frames.pc_frames: 254 region = process.mmaps.FindRegion(frame.pc) 255 if region is None: 256 frame.name = '<unresolved>' 257 continue 258 259 symfile = symfile_by_path.get(region.file_path) 260 if symfile is None: 261 symfile = SymbolizableFile(region.file_path) 262 symfile_by_path[symfile.path] = symfile 263 264 relative_pc = frame.pc - region.start_address 265 symfile.frames_by_address[relative_pc].append(frame) 266 return symfile_by_path.values() 267 268 269 def SymbolizeFiles(symfiles, addr2line_path): 270 """Symbolizes each file in the given list of SymbolizableFiles 271 and updates stack frames with symbolization results.""" 272 print 'Symbolizing...' 273 274 def _SubPrintf(message, *args): 275 print (' ' + message).format(*args) 276 277 symbolized = False 278 for symfile in symfiles: 279 unsymbolized_name = '<{}>'.format( 280 symfile.path if symfile.path else 'unnamed') 281 282 problem = None 283 if not os.path.isabs(symfile.path): 284 problem = 'not a file' 285 elif not os.path.isfile(symfile.path): 286 problem = "file doesn't exist" 287 elif not IsSymbolizableFile(symfile.path): 288 problem = 'file is not symbolizable' 289 if problem: 290 _SubPrintf("Won't symbolize {} PCs for '{}': {}.", 291 len(symfile.frames_by_address), 292 symfile.path, 293 problem) 294 for frames in symfile.frames_by_address.itervalues(): 295 for frame in frames: 296 frame.name = unsymbolized_name 297 continue 298 299 def _SymbolizerCallback(sym_info, frames): 300 # Unwind inline chain to the top. 301 while sym_info.inlined_by: 302 sym_info = sym_info.inlined_by 303 304 symbolized_name = sym_info.name if sym_info.name else unsymbolized_name 305 for frame in frames: 306 frame.name = symbolized_name 307 308 symbolizer = elf_symbolizer.ELFSymbolizer(symfile.path, 309 addr2line_path, 310 _SymbolizerCallback, 311 inlines=True) 312 313 _SubPrintf('Symbolizing {} PCs from {}...', 314 len(symfile.frames_by_address), 315 symfile.path) 316 317 for address, frames in symfile.frames_by_address.iteritems(): 318 # SymbolizeAsync() asserts that the type of address is int. We operate 319 # on longs (since they are raw pointers possibly from 64-bit processes). 320 # It's OK to cast here because we're passing relative PC, which should 321 # always fit into int. 322 symbolizer.SymbolizeAsync(int(address), frames) 323 324 symbolizer.Join() 325 symbolized = True 326 327 return symbolized 328 329 330 def HaveFilesFromAndroid(symfiles): 331 return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) 332 333 334 def RemapAndroidFiles(symfiles, output_path): 335 for symfile in symfiles: 336 match = ANDROID_PATH_MATCHER.match(symfile.path) 337 if match: 338 name = match.group('name') 339 symfile.path = os.path.join(output_path, ANDROID_UNSTRIPPED_SUBPATH, name) 340 341 342 # Suffix used for backup files. 343 BACKUP_FILE_TAG = '.BACKUP' 344 345 def main(): 346 parser = argparse.ArgumentParser() 347 parser.add_argument('file', 348 help='Trace file to symbolize (.json or .json.gz)') 349 parser.add_argument('--no-backup', 350 dest='backup', default='true', action='store_false', 351 help="Don't create {} files".format(BACKUP_FILE_TAG)) 352 parser.add_argument('--output-directory', 353 help='The path to the build output directory, such ' + 354 'as out/Debug. Only needed for Android.') 355 options = parser.parse_args() 356 357 trace_file_path = options.file 358 def _OpenTraceFile(mode): 359 if trace_file_path.endswith('.gz'): 360 return gzip.open(trace_file_path, mode + 'b') 361 else: 362 return open(trace_file_path, mode + 't') 363 364 addr2line_path = FindInSystemPath('addr2line') 365 if addr2line_path is None: 366 sys.exit("Can't symbolize - no addr2line in PATH.") 367 368 print 'Reading trace file...' 369 with _OpenTraceFile('r') as trace_file: 370 trace = json.load(trace_file) 371 372 processes = CollectProcesses(trace) 373 symfiles = ResolveSymbolizableFiles(processes) 374 375 # Android trace files don't have any indication they are from Android. 376 # So we're checking for Android-specific paths. 377 if HaveFilesFromAndroid(symfiles): 378 if not options.output_directory: 379 parser.error('The trace file appears to be from Android. Please ' 380 "specify output directory (e.g. 'out/Debug') to properly " 381 'symbolize it.') 382 RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) 383 384 if SymbolizeFiles(symfiles, addr2line_path): 385 if options.backup: 386 backup_file_path = trace_file_path + BACKUP_FILE_TAG 387 print 'Backing up trace file to {}...'.format(backup_file_path) 388 os.rename(trace_file_path, backup_file_path) 389 390 print 'Updating trace file...' 391 with _OpenTraceFile('w') as trace_file: 392 json.dump(trace, trace_file) 393 else: 394 print 'No PCs symbolized - not updating trace file.' 395 396 397 if __name__ == '__main__': 398 main() 399