1 #!/usr/bin/env python 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3 # 4 # The LLVM Compiler Infrastructure 5 # 6 # This file is distributed under the University of Illinois Open Source 7 # License. See LICENSE.TXT for details. 8 # 9 #===------------------------------------------------------------------------===# 10 import argparse 11 import bisect 12 import getopt 13 import os 14 import pty 15 import re 16 import subprocess 17 import sys 18 import termios 19 20 symbolizers = {} 21 DEBUG = False 22 demangle = False 23 binutils_prefix = None 24 sysroot_path = None 25 binary_name_filter = None 26 fix_filename_patterns = None 27 logfile = sys.stdin 28 29 # FIXME: merge the code that calls fix_filename(). 30 def fix_filename(file_name): 31 if fix_filename_patterns: 32 for path_to_cut in fix_filename_patterns: 33 file_name = re.sub('.*' + path_to_cut, '', file_name) 34 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 35 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 36 return file_name 37 38 def sysroot_path_filter(binary_name): 39 return sysroot_path + binary_name 40 41 def guess_arch(addr): 42 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 43 if len(addr) > 10: 44 return 'x86_64' 45 else: 46 return 'i386' 47 48 class Symbolizer(object): 49 def __init__(self): 50 pass 51 52 def symbolize(self, addr, binary, offset): 53 """Symbolize the given address (pair of binary and offset). 54 55 Overriden in subclasses. 56 Args: 57 addr: virtual address of an instruction. 58 binary: path to executable/shared object containing this instruction. 59 offset: instruction offset in the @binary. 60 Returns: 61 list of strings (one string for each inlined frame) describing 62 the code locations for this instruction (that is, function name, file 63 name, line and column numbers). 64 """ 65 return None 66 67 68 class LLVMSymbolizer(Symbolizer): 69 def __init__(self, symbolizer_path, addr): 70 super(LLVMSymbolizer, self).__init__() 71 self.symbolizer_path = symbolizer_path 72 self.default_arch = guess_arch(addr) 73 self.pipe = self.open_llvm_symbolizer() 74 75 def open_llvm_symbolizer(self): 76 cmd = [self.symbolizer_path, 77 '--use-symbol-table=true', 78 '--demangle=%s' % demangle, 79 '--functions=short', 80 '--inlining=true', 81 '--default-arch=%s' % self.default_arch] 82 if DEBUG: 83 print ' '.join(cmd) 84 try: 85 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, 86 stdout=subprocess.PIPE) 87 except OSError: 88 result = None 89 return result 90 91 def symbolize(self, addr, binary, offset): 92 """Overrides Symbolizer.symbolize.""" 93 if not self.pipe: 94 return None 95 result = [] 96 try: 97 symbolizer_input = '%s %s' % (binary, offset) 98 if DEBUG: 99 print symbolizer_input 100 print >> self.pipe.stdin, symbolizer_input 101 while True: 102 function_name = self.pipe.stdout.readline().rstrip() 103 if not function_name: 104 break 105 file_name = self.pipe.stdout.readline().rstrip() 106 file_name = fix_filename(file_name) 107 if (not function_name.startswith('??') or 108 not file_name.startswith('??')): 109 # Append only non-trivial frames. 110 result.append('%s in %s %s' % (addr, function_name, 111 file_name)) 112 except Exception: 113 result = [] 114 if not result: 115 result = None 116 return result 117 118 119 def LLVMSymbolizerFactory(system, addr): 120 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 121 if not symbolizer_path: 122 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') 123 if not symbolizer_path: 124 # Assume llvm-symbolizer is in PATH. 125 symbolizer_path = 'llvm-symbolizer' 126 return LLVMSymbolizer(symbolizer_path, addr) 127 128 129 class Addr2LineSymbolizer(Symbolizer): 130 def __init__(self, binary): 131 super(Addr2LineSymbolizer, self).__init__() 132 self.binary = binary 133 self.pipe = self.open_addr2line() 134 135 def open_addr2line(self): 136 addr2line_tool = 'addr2line' 137 if binutils_prefix: 138 addr2line_tool = binutils_prefix + addr2line_tool 139 cmd = [addr2line_tool, '-f'] 140 if demangle: 141 cmd += ['--demangle'] 142 cmd += ['-e', self.binary] 143 if DEBUG: 144 print ' '.join(cmd) 145 return subprocess.Popen(cmd, 146 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 147 148 def symbolize(self, addr, binary, offset): 149 """Overrides Symbolizer.symbolize.""" 150 if self.binary != binary: 151 return None 152 try: 153 print >> self.pipe.stdin, offset 154 function_name = self.pipe.stdout.readline().rstrip() 155 file_name = self.pipe.stdout.readline().rstrip() 156 except Exception: 157 function_name = '' 158 file_name = '' 159 file_name = fix_filename(file_name) 160 return ['%s in %s %s' % (addr, function_name, file_name)] 161 162 163 class UnbufferedLineConverter(object): 164 """ 165 Wrap a child process that responds to each line of input with one line of 166 output. Uses pty to trick the child into providing unbuffered output. 167 """ 168 def __init__(self, args, close_stderr=False): 169 pid, fd = pty.fork() 170 if pid == 0: 171 # We're the child. Transfer control to command. 172 if close_stderr: 173 dev_null = os.open('/dev/null', 0) 174 os.dup2(dev_null, 2) 175 os.execvp(args[0], args) 176 else: 177 # Disable echoing. 178 attr = termios.tcgetattr(fd) 179 attr[3] = attr[3] & ~termios.ECHO 180 termios.tcsetattr(fd, termios.TCSANOW, attr) 181 # Set up a file()-like interface to the child process 182 self.r = os.fdopen(fd, "r", 1) 183 self.w = os.fdopen(os.dup(fd), "w", 1) 184 185 def convert(self, line): 186 self.w.write(line + "\n") 187 return self.readline() 188 189 def readline(self): 190 return self.r.readline().rstrip() 191 192 193 class DarwinSymbolizer(Symbolizer): 194 def __init__(self, addr, binary): 195 super(DarwinSymbolizer, self).__init__() 196 self.binary = binary 197 self.arch = guess_arch(addr) 198 self.open_atos() 199 200 def open_atos(self): 201 if DEBUG: 202 print 'atos -o %s -arch %s' % (self.binary, self.arch) 203 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 204 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 205 206 def symbolize(self, addr, binary, offset): 207 """Overrides Symbolizer.symbolize.""" 208 if self.binary != binary: 209 return None 210 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 211 while "got symbolicator for" in atos_line: 212 atos_line = self.atos.readline() 213 # A well-formed atos response looks like this: 214 # foo(type1, type2) (in object.name) (filename.cc:80) 215 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 216 if DEBUG: 217 print 'atos_line: ', atos_line 218 if match: 219 function_name = match.group(1) 220 function_name = re.sub('\(.*?\)', '', function_name) 221 file_name = fix_filename(match.group(3)) 222 return ['%s in %s %s' % (addr, function_name, file_name)] 223 else: 224 return ['%s in %s' % (addr, atos_line)] 225 226 227 # Chain several symbolizers so that if one symbolizer fails, we fall back 228 # to the next symbolizer in chain. 229 class ChainSymbolizer(Symbolizer): 230 def __init__(self, symbolizer_list): 231 super(ChainSymbolizer, self).__init__() 232 self.symbolizer_list = symbolizer_list 233 234 def symbolize(self, addr, binary, offset): 235 """Overrides Symbolizer.symbolize.""" 236 for symbolizer in self.symbolizer_list: 237 if symbolizer: 238 result = symbolizer.symbolize(addr, binary, offset) 239 if result: 240 return result 241 return None 242 243 def append_symbolizer(self, symbolizer): 244 self.symbolizer_list.append(symbolizer) 245 246 247 def BreakpadSymbolizerFactory(binary): 248 suffix = os.getenv('BREAKPAD_SUFFIX') 249 if suffix: 250 filename = binary + suffix 251 if os.access(filename, os.F_OK): 252 return BreakpadSymbolizer(filename) 253 return None 254 255 256 def SystemSymbolizerFactory(system, addr, binary): 257 if system == 'Darwin': 258 return DarwinSymbolizer(addr, binary) 259 elif system == 'Linux': 260 return Addr2LineSymbolizer(binary) 261 262 263 class BreakpadSymbolizer(Symbolizer): 264 def __init__(self, filename): 265 super(BreakpadSymbolizer, self).__init__() 266 self.filename = filename 267 lines = file(filename).readlines() 268 self.files = [] 269 self.symbols = {} 270 self.address_list = [] 271 self.addresses = {} 272 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 273 fragments = lines[0].rstrip().split() 274 self.arch = fragments[2] 275 self.debug_id = fragments[3] 276 self.binary = ' '.join(fragments[4:]) 277 self.parse_lines(lines[1:]) 278 279 def parse_lines(self, lines): 280 cur_function_addr = '' 281 for line in lines: 282 fragments = line.split() 283 if fragments[0] == 'FILE': 284 assert int(fragments[1]) == len(self.files) 285 self.files.append(' '.join(fragments[2:])) 286 elif fragments[0] == 'PUBLIC': 287 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 288 elif fragments[0] in ['CFI', 'STACK']: 289 pass 290 elif fragments[0] == 'FUNC': 291 cur_function_addr = int(fragments[1], 16) 292 if not cur_function_addr in self.symbols.keys(): 293 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 294 else: 295 # Line starting with an address. 296 addr = int(fragments[0], 16) 297 self.address_list.append(addr) 298 # Tuple of symbol address, size, line, file number. 299 self.addresses[addr] = (cur_function_addr, 300 int(fragments[1], 16), 301 int(fragments[2]), 302 int(fragments[3])) 303 self.address_list.sort() 304 305 def get_sym_file_line(self, addr): 306 key = None 307 if addr in self.addresses.keys(): 308 key = addr 309 else: 310 index = bisect.bisect_left(self.address_list, addr) 311 if index == 0: 312 return None 313 else: 314 key = self.address_list[index - 1] 315 sym_id, size, line_no, file_no = self.addresses[key] 316 symbol = self.symbols[sym_id] 317 filename = self.files[file_no] 318 if addr < key + size: 319 return symbol, filename, line_no 320 else: 321 return None 322 323 def symbolize(self, addr, binary, offset): 324 if self.binary != binary: 325 return None 326 res = self.get_sym_file_line(int(offset, 16)) 327 if res: 328 function_name, file_name, line_no = res 329 result = ['%s in %s %s:%d' % ( 330 addr, function_name, file_name, line_no)] 331 print result 332 return result 333 else: 334 return None 335 336 337 class SymbolizationLoop(object): 338 def __init__(self, binary_name_filter=None): 339 # Used by clients who may want to supply a different binary name. 340 # E.g. in Chrome several binaries may share a single .dSYM. 341 self.binary_name_filter = binary_name_filter 342 self.system = os.uname()[0] 343 if self.system not in ['Linux', 'Darwin', 'FreeBSD']: 344 raise Exception('Unknown system') 345 self.llvm_symbolizer = None 346 self.frame_no = 0 347 348 def symbolize_address(self, addr, binary, offset): 349 # Initialize llvm-symbolizer lazily. 350 if not self.llvm_symbolizer: 351 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) 352 # Use the chain of symbolizers: 353 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 354 # (fall back to next symbolizer if the previous one fails). 355 if not binary in symbolizers: 356 symbolizers[binary] = ChainSymbolizer( 357 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) 358 result = symbolizers[binary].symbolize(addr, binary, offset) 359 if result is None: 360 # Initialize system symbolizer only if other symbolizers failed. 361 symbolizers[binary].append_symbolizer( 362 SystemSymbolizerFactory(self.system, addr, binary)) 363 result = symbolizers[binary].symbolize(addr, binary, offset) 364 # The system symbolizer must produce some result. 365 assert result 366 return result 367 368 def get_symbolized_lines(self, symbolized_lines): 369 if not symbolized_lines: 370 return [self.current_line] 371 else: 372 result = [] 373 for symbolized_frame in symbolized_lines: 374 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) 375 self.frame_no += 1 376 return result 377 378 def process_logfile(self): 379 self.frame_no = 0 380 while True: 381 line = logfile.readline() 382 if not line: 383 break 384 processed = self.process_line(line) 385 print '\n'.join(processed) 386 387 def process_line(self, line): 388 self.current_line = line.rstrip() 389 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 390 stack_trace_line_format = ( 391 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 392 match = re.match(stack_trace_line_format, line) 393 if not match: 394 return [self.current_line] 395 if DEBUG: 396 print line 397 _, frameno_str, addr, binary, offset = match.groups() 398 if frameno_str == '0': 399 # Assume that frame #0 is the first frame of new stack trace. 400 self.frame_no = 0 401 original_binary = binary 402 if self.binary_name_filter: 403 binary = self.binary_name_filter(binary) 404 symbolized_line = self.symbolize_address(addr, binary, offset) 405 if not symbolized_line: 406 if original_binary != binary: 407 symbolized_line = self.symbolize_address(addr, binary, offset) 408 return self.get_symbolized_lines(symbolized_line) 409 410 411 if __name__ == '__main__': 412 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, 413 description='ASan symbolization script', 414 epilog='''Example of use: 415 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log''') 416 parser.add_argument('path_to_cut', nargs='*', 417 help='pattern to be cut from the result file path ') 418 parser.add_argument('-d','--demangle', action='store_true', 419 help='demangle function names') 420 parser.add_argument('-s', metavar='SYSROOT', 421 help='set path to sysroot for sanitized binaries') 422 parser.add_argument('-c', metavar='CROSS_COMPILE', 423 help='set prefix for binutils') 424 parser.add_argument('-l','--logfile', default=sys.stdin, type=argparse.FileType('r'), 425 help='set log file name to parse, default is stdin') 426 args = parser.parse_args() 427 if args.path_to_cut: 428 fix_filename_patterns = args.path_to_cut 429 if args.demangle: 430 demangle = True 431 if args.s: 432 binary_name_filter = sysroot_path_filter 433 sysroot_path = args.s 434 if args.c: 435 binutils_prefix = args.c 436 if args.logfile: 437 logfile = args.logfile 438 else: 439 logfile = sys.stdin 440 loop = SymbolizationLoop(binary_name_filter) 441 loop.process_logfile() 442