1 #!/usr/bin/env python 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3 # 4 # The LLVM Compiler Infrastructure 5 # 6 # This file is distributed under the University of Illinois Open Source 7 # License. See LICENSE.TXT for details. 8 # 9 #===------------------------------------------------------------------------===# 10 import bisect 11 import getopt 12 import os 13 import re 14 import subprocess 15 import sys 16 17 llvm_symbolizer = None 18 symbolizers = {} 19 filetypes = {} 20 vmaddrs = {} 21 DEBUG = False 22 demangle = False; 23 24 25 # FIXME: merge the code that calls fix_filename(). 26 def fix_filename(file_name): 27 for path_to_cut in sys.argv[1:]: 28 file_name = re.sub('.*' + path_to_cut, '', file_name) 29 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 30 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 31 return file_name 32 33 34 class Symbolizer(object): 35 def __init__(self): 36 pass 37 38 def symbolize(self, addr, binary, offset): 39 """Symbolize the given address (pair of binary and offset). 40 41 Overriden in subclasses. 42 Args: 43 addr: virtual address of an instruction. 44 binary: path to executable/shared object containing this instruction. 45 offset: instruction offset in the @binary. 46 Returns: 47 list of strings (one string for each inlined frame) describing 48 the code locations for this instruction (that is, function name, file 49 name, line and column numbers). 50 """ 51 return None 52 53 54 class LLVMSymbolizer(Symbolizer): 55 def __init__(self, symbolizer_path): 56 super(LLVMSymbolizer, self).__init__() 57 self.symbolizer_path = symbolizer_path 58 self.pipe = self.open_llvm_symbolizer() 59 60 def open_llvm_symbolizer(self): 61 if not os.path.exists(self.symbolizer_path): 62 return None 63 cmd = [self.symbolizer_path, 64 '--use-symbol-table=true', 65 '--demangle=%s' % demangle, 66 '--functions=true', 67 '--inlining=true'] 68 if DEBUG: 69 print ' '.join(cmd) 70 return subprocess.Popen(cmd, stdin=subprocess.PIPE, 71 stdout=subprocess.PIPE) 72 73 def symbolize(self, addr, binary, offset): 74 """Overrides Symbolizer.symbolize.""" 75 if not self.pipe: 76 return None 77 result = [] 78 try: 79 symbolizer_input = '%s %s' % (binary, offset) 80 if DEBUG: 81 print symbolizer_input 82 print >> self.pipe.stdin, symbolizer_input 83 while True: 84 function_name = self.pipe.stdout.readline().rstrip() 85 if not function_name: 86 break 87 file_name = self.pipe.stdout.readline().rstrip() 88 file_name = fix_filename(file_name) 89 if (not function_name.startswith('??') and 90 not file_name.startswith('??')): 91 # Append only valid frames. 92 result.append('%s in %s %s' % (addr, function_name, 93 file_name)) 94 except Exception: 95 result = [] 96 if not result: 97 result = None 98 return result 99 100 101 def LLVMSymbolizerFactory(system): 102 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 103 if not symbolizer_path: 104 # Assume llvm-symbolizer is in PATH. 105 symbolizer_path = 'llvm-symbolizer' 106 return LLVMSymbolizer(symbolizer_path) 107 108 109 class Addr2LineSymbolizer(Symbolizer): 110 def __init__(self, binary): 111 super(Addr2LineSymbolizer, self).__init__() 112 self.binary = binary 113 self.pipe = self.open_addr2line() 114 115 def open_addr2line(self): 116 cmd = ['addr2line', '-f'] 117 if demangle: 118 cmd += ['--demangle'] 119 cmd += ['-e', self.binary] 120 if DEBUG: 121 print ' '.join(cmd) 122 return subprocess.Popen(cmd, 123 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 124 125 def symbolize(self, addr, binary, offset): 126 """Overrides Symbolizer.symbolize.""" 127 if self.binary != binary: 128 return None 129 try: 130 print >> self.pipe.stdin, offset 131 function_name = self.pipe.stdout.readline().rstrip() 132 file_name = self.pipe.stdout.readline().rstrip() 133 except Exception: 134 function_name = '' 135 file_name = '' 136 file_name = fix_filename(file_name) 137 return ['%s in %s %s' % (addr, function_name, file_name)] 138 139 140 class DarwinSymbolizer(Symbolizer): 141 def __init__(self, addr, binary): 142 super(DarwinSymbolizer, self).__init__() 143 self.binary = binary 144 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 145 if len(addr) > 10: 146 self.arch = 'x86_64' 147 else: 148 self.arch = 'i386' 149 self.vmaddr = None 150 self.pipe = None 151 152 def write_addr_to_pipe(self, offset): 153 print >> self.pipe.stdin, '0x%x' % int(offset, 16) 154 155 def open_atos(self): 156 if DEBUG: 157 print 'atos -o %s -arch %s' % (self.binary, self.arch) 158 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 159 self.pipe = subprocess.Popen(cmdline, 160 stdin=subprocess.PIPE, 161 stdout=subprocess.PIPE, 162 stderr=subprocess.PIPE) 163 164 def symbolize(self, addr, binary, offset): 165 """Overrides Symbolizer.symbolize.""" 166 if self.binary != binary: 167 return None 168 self.open_atos() 169 self.write_addr_to_pipe(offset) 170 self.pipe.stdin.close() 171 atos_line = self.pipe.stdout.readline().rstrip() 172 # A well-formed atos response looks like this: 173 # foo(type1, type2) (in object.name) (filename.cc:80) 174 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 175 if DEBUG: 176 print 'atos_line: ', atos_line 177 if match: 178 function_name = match.group(1) 179 function_name = re.sub('\(.*?\)', '', function_name) 180 file_name = fix_filename(match.group(3)) 181 return ['%s in %s %s' % (addr, function_name, file_name)] 182 else: 183 return ['%s in %s' % (addr, atos_line)] 184 185 186 # Chain several symbolizers so that if one symbolizer fails, we fall back 187 # to the next symbolizer in chain. 188 class ChainSymbolizer(Symbolizer): 189 def __init__(self, symbolizer_list): 190 super(ChainSymbolizer, self).__init__() 191 self.symbolizer_list = symbolizer_list 192 193 def symbolize(self, addr, binary, offset): 194 """Overrides Symbolizer.symbolize.""" 195 for symbolizer in self.symbolizer_list: 196 if symbolizer: 197 result = symbolizer.symbolize(addr, binary, offset) 198 if result: 199 return result 200 return None 201 202 def append_symbolizer(self, symbolizer): 203 self.symbolizer_list.append(symbolizer) 204 205 206 def BreakpadSymbolizerFactory(binary): 207 suffix = os.getenv('BREAKPAD_SUFFIX') 208 if suffix: 209 filename = binary + suffix 210 if os.access(filename, os.F_OK): 211 return BreakpadSymbolizer(filename) 212 return None 213 214 215 def SystemSymbolizerFactory(system, addr, binary): 216 if system == 'Darwin': 217 return DarwinSymbolizer(addr, binary) 218 elif system == 'Linux': 219 return Addr2LineSymbolizer(binary) 220 221 222 class BreakpadSymbolizer(Symbolizer): 223 def __init__(self, filename): 224 super(BreakpadSymbolizer, self).__init__() 225 self.filename = filename 226 lines = file(filename).readlines() 227 self.files = [] 228 self.symbols = {} 229 self.address_list = [] 230 self.addresses = {} 231 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 232 fragments = lines[0].rstrip().split() 233 self.arch = fragments[2] 234 self.debug_id = fragments[3] 235 self.binary = ' '.join(fragments[4:]) 236 self.parse_lines(lines[1:]) 237 238 def parse_lines(self, lines): 239 cur_function_addr = '' 240 for line in lines: 241 fragments = line.split() 242 if fragments[0] == 'FILE': 243 assert int(fragments[1]) == len(self.files) 244 self.files.append(' '.join(fragments[2:])) 245 elif fragments[0] == 'PUBLIC': 246 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 247 elif fragments[0] in ['CFI', 'STACK']: 248 pass 249 elif fragments[0] == 'FUNC': 250 cur_function_addr = int(fragments[1], 16) 251 if not cur_function_addr in self.symbols.keys(): 252 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 253 else: 254 # Line starting with an address. 255 addr = int(fragments[0], 16) 256 self.address_list.append(addr) 257 # Tuple of symbol address, size, line, file number. 258 self.addresses[addr] = (cur_function_addr, 259 int(fragments[1], 16), 260 int(fragments[2]), 261 int(fragments[3])) 262 self.address_list.sort() 263 264 def get_sym_file_line(self, addr): 265 key = None 266 if addr in self.addresses.keys(): 267 key = addr 268 else: 269 index = bisect.bisect_left(self.address_list, addr) 270 if index == 0: 271 return None 272 else: 273 key = self.address_list[index - 1] 274 sym_id, size, line_no, file_no = self.addresses[key] 275 symbol = self.symbols[sym_id] 276 filename = self.files[file_no] 277 if addr < key + size: 278 return symbol, filename, line_no 279 else: 280 return None 281 282 def symbolize(self, addr, binary, offset): 283 if self.binary != binary: 284 return None 285 res = self.get_sym_file_line(int(offset, 16)) 286 if res: 287 function_name, file_name, line_no = res 288 result = ['%s in %s %s:%d' % ( 289 addr, function_name, file_name, line_no)] 290 print result 291 return result 292 else: 293 return None 294 295 296 class SymbolizationLoop(object): 297 def __init__(self, binary_name_filter=None): 298 # Used by clients who may want to supply a different binary name. 299 # E.g. in Chrome several binaries may share a single .dSYM. 300 self.binary_name_filter = binary_name_filter 301 self.system = os.uname()[0] 302 if self.system in ['Linux', 'Darwin']: 303 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) 304 else: 305 raise Exception('Unknown system') 306 307 def symbolize_address(self, addr, binary, offset): 308 # Use the chain of symbolizers: 309 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 310 # (fall back to next symbolizer if the previous one fails). 311 if not binary in symbolizers: 312 symbolizers[binary] = ChainSymbolizer( 313 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) 314 result = symbolizers[binary].symbolize(addr, binary, offset) 315 if result is None: 316 # Initialize system symbolizer only if other symbolizers failed. 317 symbolizers[binary].append_symbolizer( 318 SystemSymbolizerFactory(self.system, addr, binary)) 319 result = symbolizers[binary].symbolize(addr, binary, offset) 320 # The system symbolizer must produce some result. 321 assert result 322 return result 323 324 def print_symbolized_lines(self, symbolized_lines): 325 if not symbolized_lines: 326 print self.current_line 327 else: 328 for symbolized_frame in symbolized_lines: 329 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() 330 self.frame_no += 1 331 332 def process_stdin(self): 333 self.frame_no = 0 334 for line in sys.stdin: 335 self.current_line = line.rstrip() 336 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 337 stack_trace_line_format = ( 338 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 339 match = re.match(stack_trace_line_format, line) 340 if not match: 341 print self.current_line 342 continue 343 if DEBUG: 344 print line 345 _, frameno_str, addr, binary, offset = match.groups() 346 if frameno_str == '0': 347 # Assume that frame #0 is the first frame of new stack trace. 348 self.frame_no = 0 349 original_binary = binary 350 if self.binary_name_filter: 351 binary = self.binary_name_filter(binary) 352 symbolized_line = self.symbolize_address(addr, binary, offset) 353 if not symbolized_line: 354 if original_binary != binary: 355 symbolized_line = self.symbolize_address(addr, binary, offset) 356 self.print_symbolized_lines(symbolized_line) 357 358 359 if __name__ == '__main__': 360 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) 361 for o, a in opts: 362 if o in ("-d", "--demangle"): 363 demangle = True; 364 loop = SymbolizationLoop() 365 loop.process_stdin() 366