1 #!/usr/bin/env python 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3 # 4 # The LLVM Compiler Infrastructure 5 # 6 # This file is distributed under the University of Illinois Open Source 7 # License. See LICENSE.TXT for details. 8 # 9 #===------------------------------------------------------------------------===# 10 import bisect 11 import getopt 12 import os 13 import pty 14 import re 15 import subprocess 16 import sys 17 import termios 18 19 llvm_symbolizer = None 20 symbolizers = {} 21 DEBUG = False 22 demangle = False; 23 24 25 # FIXME: merge the code that calls fix_filename(). 26 def fix_filename(file_name): 27 for path_to_cut in sys.argv[1:]: 28 file_name = re.sub('.*' + path_to_cut, '', file_name) 29 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 30 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 31 return file_name 32 33 34 class Symbolizer(object): 35 def __init__(self): 36 pass 37 38 def symbolize(self, addr, binary, offset): 39 """Symbolize the given address (pair of binary and offset). 40 41 Overriden in subclasses. 42 Args: 43 addr: virtual address of an instruction. 44 binary: path to executable/shared object containing this instruction. 45 offset: instruction offset in the @binary. 46 Returns: 47 list of strings (one string for each inlined frame) describing 48 the code locations for this instruction (that is, function name, file 49 name, line and column numbers). 50 """ 51 return None 52 53 54 class LLVMSymbolizer(Symbolizer): 55 def __init__(self, symbolizer_path): 56 super(LLVMSymbolizer, self).__init__() 57 self.symbolizer_path = symbolizer_path 58 self.pipe = self.open_llvm_symbolizer() 59 60 def open_llvm_symbolizer(self): 61 if not os.path.exists(self.symbolizer_path): 62 return None 63 cmd = [self.symbolizer_path, 64 '--use-symbol-table=true', 65 '--demangle=%s' % demangle, 66 '--functions=true', 67 '--inlining=true'] 68 if DEBUG: 69 print ' '.join(cmd) 70 return subprocess.Popen(cmd, stdin=subprocess.PIPE, 71 stdout=subprocess.PIPE) 72 73 def symbolize(self, addr, binary, offset): 74 """Overrides Symbolizer.symbolize.""" 75 if not self.pipe: 76 return None 77 result = [] 78 try: 79 symbolizer_input = '%s %s' % (binary, offset) 80 if DEBUG: 81 print symbolizer_input 82 print >> self.pipe.stdin, symbolizer_input 83 while True: 84 function_name = self.pipe.stdout.readline().rstrip() 85 if not function_name: 86 break 87 file_name = self.pipe.stdout.readline().rstrip() 88 file_name = fix_filename(file_name) 89 if (not function_name.startswith('??') and 90 not file_name.startswith('??')): 91 # Append only valid frames. 92 result.append('%s in %s %s' % (addr, function_name, 93 file_name)) 94 except Exception: 95 result = [] 96 if not result: 97 result = None 98 return result 99 100 101 def LLVMSymbolizerFactory(system): 102 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 103 if not symbolizer_path: 104 # Assume llvm-symbolizer is in PATH. 105 symbolizer_path = 'llvm-symbolizer' 106 return LLVMSymbolizer(symbolizer_path) 107 108 109 class Addr2LineSymbolizer(Symbolizer): 110 def __init__(self, binary): 111 super(Addr2LineSymbolizer, self).__init__() 112 self.binary = binary 113 self.pipe = self.open_addr2line() 114 115 def open_addr2line(self): 116 cmd = ['addr2line', '-f'] 117 if demangle: 118 cmd += ['--demangle'] 119 cmd += ['-e', self.binary] 120 if DEBUG: 121 print ' '.join(cmd) 122 return subprocess.Popen(cmd, 123 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 124 125 def symbolize(self, addr, binary, offset): 126 """Overrides Symbolizer.symbolize.""" 127 if self.binary != binary: 128 return None 129 try: 130 print >> self.pipe.stdin, offset 131 function_name = self.pipe.stdout.readline().rstrip() 132 file_name = self.pipe.stdout.readline().rstrip() 133 except Exception: 134 function_name = '' 135 file_name = '' 136 file_name = fix_filename(file_name) 137 return ['%s in %s %s' % (addr, function_name, file_name)] 138 139 140 class UnbufferedLineConverter(object): 141 """ 142 Wrap a child process that responds to each line of input with one line of 143 output. Uses pty to trick the child into providing unbuffered output. 144 """ 145 def __init__(self, args, close_stderr=False): 146 pid, fd = pty.fork() 147 if pid == 0: 148 # We're the child. Transfer control to command. 149 if close_stderr: 150 dev_null = os.open('/dev/null', 0) 151 os.dup2(dev_null, 2) 152 os.execvp(args[0], args) 153 else: 154 # Disable echoing. 155 attr = termios.tcgetattr(fd) 156 attr[3] = attr[3] & ~termios.ECHO 157 termios.tcsetattr(fd, termios.TCSANOW, attr) 158 # Set up a file()-like interface to the child process 159 self.r = os.fdopen(fd, "r", 1) 160 self.w = os.fdopen(os.dup(fd), "w", 1) 161 162 def convert(self, line): 163 self.w.write(line + "\n") 164 return self.readline() 165 166 def readline(self): 167 return self.r.readline().rstrip() 168 169 170 class DarwinSymbolizer(Symbolizer): 171 def __init__(self, addr, binary): 172 super(DarwinSymbolizer, self).__init__() 173 self.binary = binary 174 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 175 if len(addr) > 10: 176 self.arch = 'x86_64' 177 else: 178 self.arch = 'i386' 179 self.open_atos() 180 181 def open_atos(self): 182 if DEBUG: 183 print 'atos -o %s -arch %s' % (self.binary, self.arch) 184 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 185 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 186 187 def symbolize(self, addr, binary, offset): 188 """Overrides Symbolizer.symbolize.""" 189 if self.binary != binary: 190 return None 191 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 192 while "got symbolicator for" in atos_line: 193 atos_line = self.atos.readline() 194 # A well-formed atos response looks like this: 195 # foo(type1, type2) (in object.name) (filename.cc:80) 196 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 197 if DEBUG: 198 print 'atos_line: ', atos_line 199 if match: 200 function_name = match.group(1) 201 function_name = re.sub('\(.*?\)', '', function_name) 202 file_name = fix_filename(match.group(3)) 203 return ['%s in %s %s' % (addr, function_name, file_name)] 204 else: 205 return ['%s in %s' % (addr, atos_line)] 206 207 208 # Chain several symbolizers so that if one symbolizer fails, we fall back 209 # to the next symbolizer in chain. 210 class ChainSymbolizer(Symbolizer): 211 def __init__(self, symbolizer_list): 212 super(ChainSymbolizer, self).__init__() 213 self.symbolizer_list = symbolizer_list 214 215 def symbolize(self, addr, binary, offset): 216 """Overrides Symbolizer.symbolize.""" 217 for symbolizer in self.symbolizer_list: 218 if symbolizer: 219 result = symbolizer.symbolize(addr, binary, offset) 220 if result: 221 return result 222 return None 223 224 def append_symbolizer(self, symbolizer): 225 self.symbolizer_list.append(symbolizer) 226 227 228 def BreakpadSymbolizerFactory(binary): 229 suffix = os.getenv('BREAKPAD_SUFFIX') 230 if suffix: 231 filename = binary + suffix 232 if os.access(filename, os.F_OK): 233 return BreakpadSymbolizer(filename) 234 return None 235 236 237 def SystemSymbolizerFactory(system, addr, binary): 238 if system == 'Darwin': 239 return DarwinSymbolizer(addr, binary) 240 elif system == 'Linux': 241 return Addr2LineSymbolizer(binary) 242 243 244 class BreakpadSymbolizer(Symbolizer): 245 def __init__(self, filename): 246 super(BreakpadSymbolizer, self).__init__() 247 self.filename = filename 248 lines = file(filename).readlines() 249 self.files = [] 250 self.symbols = {} 251 self.address_list = [] 252 self.addresses = {} 253 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 254 fragments = lines[0].rstrip().split() 255 self.arch = fragments[2] 256 self.debug_id = fragments[3] 257 self.binary = ' '.join(fragments[4:]) 258 self.parse_lines(lines[1:]) 259 260 def parse_lines(self, lines): 261 cur_function_addr = '' 262 for line in lines: 263 fragments = line.split() 264 if fragments[0] == 'FILE': 265 assert int(fragments[1]) == len(self.files) 266 self.files.append(' '.join(fragments[2:])) 267 elif fragments[0] == 'PUBLIC': 268 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 269 elif fragments[0] in ['CFI', 'STACK']: 270 pass 271 elif fragments[0] == 'FUNC': 272 cur_function_addr = int(fragments[1], 16) 273 if not cur_function_addr in self.symbols.keys(): 274 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 275 else: 276 # Line starting with an address. 277 addr = int(fragments[0], 16) 278 self.address_list.append(addr) 279 # Tuple of symbol address, size, line, file number. 280 self.addresses[addr] = (cur_function_addr, 281 int(fragments[1], 16), 282 int(fragments[2]), 283 int(fragments[3])) 284 self.address_list.sort() 285 286 def get_sym_file_line(self, addr): 287 key = None 288 if addr in self.addresses.keys(): 289 key = addr 290 else: 291 index = bisect.bisect_left(self.address_list, addr) 292 if index == 0: 293 return None 294 else: 295 key = self.address_list[index - 1] 296 sym_id, size, line_no, file_no = self.addresses[key] 297 symbol = self.symbols[sym_id] 298 filename = self.files[file_no] 299 if addr < key + size: 300 return symbol, filename, line_no 301 else: 302 return None 303 304 def symbolize(self, addr, binary, offset): 305 if self.binary != binary: 306 return None 307 res = self.get_sym_file_line(int(offset, 16)) 308 if res: 309 function_name, file_name, line_no = res 310 result = ['%s in %s %s:%d' % ( 311 addr, function_name, file_name, line_no)] 312 print result 313 return result 314 else: 315 return None 316 317 318 class SymbolizationLoop(object): 319 def __init__(self, binary_name_filter=None): 320 # Used by clients who may want to supply a different binary name. 321 # E.g. in Chrome several binaries may share a single .dSYM. 322 self.binary_name_filter = binary_name_filter 323 self.system = os.uname()[0] 324 if self.system in ['Linux', 'Darwin']: 325 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) 326 else: 327 raise Exception('Unknown system') 328 329 def symbolize_address(self, addr, binary, offset): 330 # Use the chain of symbolizers: 331 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 332 # (fall back to next symbolizer if the previous one fails). 333 if not binary in symbolizers: 334 symbolizers[binary] = ChainSymbolizer( 335 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) 336 result = symbolizers[binary].symbolize(addr, binary, offset) 337 if result is None: 338 # Initialize system symbolizer only if other symbolizers failed. 339 symbolizers[binary].append_symbolizer( 340 SystemSymbolizerFactory(self.system, addr, binary)) 341 result = symbolizers[binary].symbolize(addr, binary, offset) 342 # The system symbolizer must produce some result. 343 assert result 344 return result 345 346 def print_symbolized_lines(self, symbolized_lines): 347 if not symbolized_lines: 348 print self.current_line 349 else: 350 for symbolized_frame in symbolized_lines: 351 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() 352 self.frame_no += 1 353 354 def process_stdin(self): 355 self.frame_no = 0 356 while True: 357 line = sys.stdin.readline() 358 if not line: 359 break 360 self.current_line = line.rstrip() 361 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 362 stack_trace_line_format = ( 363 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 364 match = re.match(stack_trace_line_format, line) 365 if not match: 366 print self.current_line 367 continue 368 if DEBUG: 369 print line 370 _, frameno_str, addr, binary, offset = match.groups() 371 if frameno_str == '0': 372 # Assume that frame #0 is the first frame of new stack trace. 373 self.frame_no = 0 374 original_binary = binary 375 if self.binary_name_filter: 376 binary = self.binary_name_filter(binary) 377 symbolized_line = self.symbolize_address(addr, binary, offset) 378 if not symbolized_line: 379 if original_binary != binary: 380 symbolized_line = self.symbolize_address(addr, binary, offset) 381 self.print_symbolized_lines(symbolized_line) 382 383 384 if __name__ == '__main__': 385 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) 386 for o, a in opts: 387 if o in ("-d", "--demangle"): 388 demangle = True; 389 loop = SymbolizationLoop() 390 loop.process_stdin() 391