1 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """The ElfSymbolizer class for symbolizing Executable and Linkable Files. 6 7 Adapted for Skia's use from 8 chromium/src/build/android/pylib/symbols/elf_symbolizer.py. 9 10 Main changes: 11 -- Added prefix_to_remove param to remove path prefix from tree data. 12 """ 13 14 import collections 15 import datetime 16 import logging 17 import multiprocessing 18 import os 19 import posixpath 20 import Queue 21 import re 22 import subprocess 23 import sys 24 import threading 25 26 27 # addr2line builds a possibly infinite memory cache that can exhaust 28 # the computer's memory if allowed to grow for too long. This constant 29 # controls how many lookups we do before restarting the process. 4000 30 # gives near peak performance without extreme memory usage. 31 ADDR2LINE_RECYCLE_LIMIT = 4000 32 33 34 class ELFSymbolizer(object): 35 """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer. 36 37 This class is a frontend for addr2line (part of GNU binutils), designed to 38 symbolize batches of large numbers of symbols for a given ELF file. It 39 supports sharding symbolization against many addr2line instances and 40 pipelining of multiple requests per each instance (in order to hide addr2line 41 internals and OS pipe latencies). 42 43 The interface exhibited by this class is a very simple asynchronous interface, 44 which is based on the following three methods: 45 - SymbolizeAsync(): used to request (enqueue) resolution of a given address. 46 - The |callback| method: used to communicated back the symbol information. 47 - Join(): called to conclude the batch to gather the last outstanding results. 48 In essence, before the Join method returns, this class will have issued as 49 many callbacks as the number of SymbolizeAsync() calls. In this regard, note 50 that due to multiprocess sharding, callbacks can be delivered out of order. 51 52 Some background about addr2line: 53 - it is invoked passing the elf path in the cmdline, piping the addresses in 54 its stdin and getting results on its stdout. 55 - it has pretty large response times for the first requests, but it 56 works very well in streaming mode once it has been warmed up. 57 - it doesn't scale by itself (on more cores). However, spawning multiple 58 instances at the same time on the same file is pretty efficient as they 59 keep hitting the pagecache and become mostly CPU bound. 60 - it might hang or crash, mostly for OOM. This class deals with both of these 61 problems. 62 63 Despite the "scary" imports and the multi* words above, (almost) no multi- 64 threading/processing is involved from the python viewpoint. Concurrency 65 here is achieved by spawning several addr2line subprocesses and handling their 66 output pipes asynchronously. Therefore, all the code here (with the exception 67 of the Queue instance in Addr2Line) should be free from mind-blowing 68 thread-safety concerns. 69 70 The multiprocess sharding works as follows: 71 The symbolizer tries to use the lowest number of addr2line instances as 72 possible (with respect of |max_concurrent_jobs|) and enqueue all the requests 73 in a single addr2line instance. For few symbols (i.e. dozens) sharding isn't 74 worth the startup cost. 75 The multiprocess logic kicks in as soon as the queues for the existing 76 instances grow. Specifically, once all the existing instances reach the 77 |max_queue_size| bound, a new addr2line instance is kicked in. 78 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances 79 have a backlog of |max_queue_size|), back-pressure is applied on the caller by 80 blocking the SymbolizeAsync method. 81 82 This module has been deliberately designed to be dependency free (w.r.t. of 83 other modules in this project), to allow easy reuse in external projects. 84 """ 85 86 def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, 87 max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, 88 source_root_path=None, strip_base_path=None, prefix_to_remove=None): 89 """Args: 90 elf_file_path: path of the elf file to be symbolized. 91 addr2line_path: path of the toolchain's addr2line binary. 92 callback: a callback which will be invoked for each resolved symbol with 93 the two args (sym_info, callback_arg). The former is an instance of 94 |ELFSymbolInfo| and contains the symbol information. The latter is an 95 embedder-provided argument which is passed to SymbolizeAsync(). 96 inlines: when True, the ELFSymbolInfo will contain also the details about 97 the outer inlining functions. When False, only the innermost function 98 will be provided. 99 max_concurrent_jobs: Max number of addr2line instances spawned. 100 Parallelize responsibly, addr2line is a memory and I/O monster. 101 max_queue_size: Max number of outstanding requests per addr2line instance. 102 addr2line_timeout: Max time (in seconds) to wait for a addr2line response. 103 After the timeout, the instance will be considered hung and respawned. 104 source_root_path: In some toolchains only the name of the source file is 105 is output, without any path information; disambiguation searches 106 through the source directory specified by |source_root_path| argument 107 for files whose name matches, adding the full path information to the 108 output. For example, if the toolchain outputs "unicode.cc" and there 109 is a file called "unicode.cc" located under |source_root_path|/foo, 110 the tool will replace "unicode.cc" with 111 "|source_root_path|/foo/unicode.cc". If there are multiple files with 112 the same name, disambiguation will fail because the tool cannot 113 determine which of the files was the source of the symbol. 114 strip_base_path: Rebases the symbols source paths onto |source_root_path| 115 (i.e replace |strip_base_path| with |source_root_path). 116 prefix_to_remove: Removes the prefix from ElfSymbolInfo output. Skia added 117 """ 118 assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path 119 self.elf_file_path = elf_file_path 120 self.addr2line_path = addr2line_path 121 self.callback = callback 122 self.inlines = inlines 123 self.max_concurrent_jobs = (max_concurrent_jobs or 124 min(multiprocessing.cpu_count(), 4)) 125 self.max_queue_size = max_queue_size 126 self.addr2line_timeout = addr2line_timeout 127 self.requests_counter = 0 # For generating monotonic request IDs. 128 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. 129 130 # Skia addition: remove the given prefix from tree paths. 131 self.prefix_to_remove = prefix_to_remove 132 133 # If necessary, create disambiguation lookup table 134 self.disambiguate = source_root_path is not None 135 self.disambiguation_table = {} 136 self.strip_base_path = strip_base_path 137 if(self.disambiguate): 138 self.source_root_path = os.path.abspath(source_root_path) 139 self._CreateDisambiguationTable() 140 141 # Create one addr2line instance. More instances will be created on demand 142 # (up to |max_concurrent_jobs|) depending on the rate of the requests. 143 self._CreateNewA2LInstance() 144 145 def SymbolizeAsync(self, addr, callback_arg=None): 146 """Requests symbolization of a given address. 147 148 This method is not guaranteed to return immediately. It generally does, but 149 in some scenarios (e.g. all addr2line instances have full queues) it can 150 block to create back-pressure. 151 152 Args: 153 addr: address to symbolize. 154 callback_arg: optional argument which will be passed to the |callback|.""" 155 assert(isinstance(addr, int)) 156 157 # Process all the symbols that have been resolved in the meanwhile. 158 # Essentially, this drains all the addr2line(s) out queues. 159 for a2l_to_purge in self._a2l_instances: 160 a2l_to_purge.ProcessAllResolvedSymbolsInQueue() 161 a2l_to_purge.RecycleIfNecessary() 162 163 # Find the best instance according to this logic: 164 # 1. Find an existing instance with the shortest queue. 165 # 2. If all of instances' queues are full, but there is room in the pool, 166 # (i.e. < |max_concurrent_jobs|) create a new instance. 167 # 3. If there were already |max_concurrent_jobs| instances and all of them 168 # had full queues, make back-pressure. 169 170 # 1. 171 def _SortByQueueSizeAndReqID(a2l): 172 return (a2l.queue_size, a2l.first_request_id) 173 a2l = min(self._a2l_instances, key=_SortByQueueSizeAndReqID) 174 175 # 2. 176 if (a2l.queue_size >= self.max_queue_size and 177 len(self._a2l_instances) < self.max_concurrent_jobs): 178 a2l = self._CreateNewA2LInstance() 179 180 # 3. 181 if a2l.queue_size >= self.max_queue_size: 182 a2l.WaitForNextSymbolInQueue() 183 184 a2l.EnqueueRequest(addr, callback_arg) 185 186 def Join(self): 187 """Waits for all the outstanding requests to complete and terminates.""" 188 for a2l in self._a2l_instances: 189 a2l.WaitForIdle() 190 a2l.Terminate() 191 192 def _CreateNewA2LInstance(self): 193 assert(len(self._a2l_instances) < self.max_concurrent_jobs) 194 a2l = ELFSymbolizer.Addr2Line(self) 195 self._a2l_instances.append(a2l) 196 return a2l 197 198 def _CreateDisambiguationTable(self): 199 """ Non-unique file names will result in None entries""" 200 self.disambiguation_table = {} 201 202 for root, _, filenames in os.walk(self.source_root_path): 203 for f in filenames: 204 self.disambiguation_table[f] = os.path.join(root, f) if (f not in 205 self.disambiguation_table) else None 206 207 208 class Addr2Line(object): 209 """A python wrapper around an addr2line instance. 210 211 The communication with the addr2line process looks as follows: 212 [STDIN] [STDOUT] (from addr2line's viewpoint) 213 > f001111 214 > f002222 215 < Symbol::Name(foo, bar) for f001111 216 < /path/to/source/file.c:line_number 217 > f003333 218 < Symbol::Name2() for f002222 219 < /path/to/source/file.c:line_number 220 < Symbol::Name3() for f003333 221 < /path/to/source/file.c:line_number 222 """ 223 224 SYM_ADDR_RE = re.compile(r'([^:]+):(\?|\d+).*') 225 226 def __init__(self, symbolizer): 227 self._symbolizer = symbolizer 228 self._lib_file_name = posixpath.basename(symbolizer.elf_file_path) 229 230 # The request queue (i.e. addresses pushed to addr2line's stdin and not 231 # yet retrieved on stdout) 232 self._request_queue = collections.deque() 233 234 # This is essentially len(self._request_queue). It has been optimized to a 235 # separate field because turned out to be a perf hot-spot. 236 self.queue_size = 0 237 238 # Keep track of the number of symbols a process has processed to 239 # avoid a single process growing too big and using all the memory. 240 self._processed_symbols_count = 0 241 242 # Objects required to handle the addr2line subprocess. 243 self._proc = None # Subprocess.Popen(...) instance. 244 self._thread = None # Threading.thread instance. 245 self._out_queue = None # Queue.Queue instance (for buffering a2l stdout). 246 self._RestartAddr2LineProcess() 247 248 def EnqueueRequest(self, addr, callback_arg): 249 """Pushes an address to addr2line's stdin (and keeps track of it).""" 250 self._symbolizer.requests_counter += 1 # For global "age" of requests. 251 req_idx = self._symbolizer.requests_counter 252 self._request_queue.append((addr, callback_arg, req_idx)) 253 self.queue_size += 1 254 self._WriteToA2lStdin(addr) 255 256 def WaitForIdle(self): 257 """Waits until all the pending requests have been symbolized.""" 258 while self.queue_size > 0: 259 self.WaitForNextSymbolInQueue() 260 261 def WaitForNextSymbolInQueue(self): 262 """Waits for the next pending request to be symbolized.""" 263 if not self.queue_size: 264 return 265 266 # This outer loop guards against a2l hanging (detecting stdout timeout). 267 while True: 268 start_time = datetime.datetime.now() 269 timeout = datetime.timedelta(seconds=self._symbolizer.addr2line_timeout) 270 271 # The inner loop guards against a2l crashing (checking if it exited). 272 while (datetime.datetime.now() - start_time < timeout): 273 # poll() returns !None if the process exited. a2l should never exit. 274 if self._proc.poll(): 275 logging.warning('addr2line crashed, respawning (lib: %s).' % 276 self._lib_file_name) 277 self._RestartAddr2LineProcess() 278 # TODO(primiano): the best thing to do in this case would be 279 # shrinking the pool size as, very likely, addr2line is crashed 280 # due to low memory (and the respawned one will die again soon). 281 282 try: 283 lines = self._out_queue.get(block=True, timeout=0.25) 284 except Queue.Empty: 285 # On timeout (1/4 s.) repeat the inner loop and check if either the 286 # addr2line process did crash or we waited its output for too long. 287 continue 288 289 # In nominal conditions, we get straight to this point. 290 self._ProcessSymbolOutput(lines) 291 return 292 293 # If this point is reached, we waited more than |addr2line_timeout|. 294 logging.warning('Hung addr2line process, respawning (lib: %s).' % 295 self._lib_file_name) 296 self._RestartAddr2LineProcess() 297 298 def ProcessAllResolvedSymbolsInQueue(self): 299 """Consumes all the addr2line output lines produced (without blocking).""" 300 if not self.queue_size: 301 return 302 while True: 303 try: 304 lines = self._out_queue.get_nowait() 305 except Queue.Empty: 306 break 307 self._ProcessSymbolOutput(lines) 308 309 def RecycleIfNecessary(self): 310 """Restarts the process if it has been used for too long. 311 312 A long running addr2line process will consume excessive amounts 313 of memory without any gain in performance.""" 314 if self._processed_symbols_count >= ADDR2LINE_RECYCLE_LIMIT: 315 self._RestartAddr2LineProcess() 316 317 318 def Terminate(self): 319 """Kills the underlying addr2line process. 320 321 The poller |_thread| will terminate as well due to the broken pipe.""" 322 try: 323 self._proc.kill() 324 self._proc.communicate() # Essentially wait() without risking deadlock. 325 except Exception: # An exception while terminating? How interesting. 326 pass 327 self._proc = None 328 329 def _WriteToA2lStdin(self, addr): 330 self._proc.stdin.write('%s\n' % hex(addr)) 331 if self._symbolizer.inlines: 332 # In the case of inlines we output an extra blank line, which causes 333 # addr2line to emit a (??,??:0) tuple that we use as a boundary marker. 334 self._proc.stdin.write('\n') 335 self._proc.stdin.flush() 336 337 def _ProcessSymbolOutput(self, lines): 338 """Parses an addr2line symbol output and triggers the client callback.""" 339 (_, callback_arg, _) = self._request_queue.popleft() 340 self.queue_size -= 1 341 342 innermost_sym_info = None 343 sym_info = None 344 for (line1, line2) in lines: 345 prev_sym_info = sym_info 346 name = line1 if not line1.startswith('?') else None 347 source_path = None 348 source_line = None 349 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) 350 if m: 351 if not m.group(1).startswith('?'): 352 source_path = m.group(1) 353 if not m.group(2).startswith('?'): 354 source_line = int(m.group(2)) 355 else: 356 logging.warning('Got invalid symbol path from addr2line: %s' % line2) 357 358 # In case disambiguation is on, and needed 359 was_ambiguous = False 360 disambiguated = False 361 if self._symbolizer.disambiguate: 362 if source_path and not posixpath.isabs(source_path): 363 path = self._symbolizer.disambiguation_table.get(source_path) 364 was_ambiguous = True 365 disambiguated = path is not None 366 source_path = path if disambiguated else source_path 367 368 # Use absolute paths (so that paths are consistent, as disambiguation 369 # uses absolute paths) 370 if source_path and not was_ambiguous: 371 source_path = os.path.abspath(source_path) 372 373 if source_path and self._symbolizer.strip_base_path: 374 # Strip the base path 375 source_path = re.sub('^' + self._symbolizer.strip_base_path, 376 self._symbolizer.source_root_path or '', source_path) 377 378 sym_info = ELFSymbolInfo(name, source_path, source_line, was_ambiguous, 379 disambiguated, 380 self._symbolizer.prefix_to_remove) 381 if prev_sym_info: 382 prev_sym_info.inlined_by = sym_info 383 if not innermost_sym_info: 384 innermost_sym_info = sym_info 385 386 self._processed_symbols_count += 1 387 self._symbolizer.callback(innermost_sym_info, callback_arg) 388 389 def _RestartAddr2LineProcess(self): 390 if self._proc: 391 self.Terminate() 392 393 # The only reason of existence of this Queue (and the corresponding 394 # Thread below) is the lack of a subprocess.stdout.poll_avail_lines(). 395 # Essentially this is a pipe able to extract a couple of lines atomically. 396 self._out_queue = Queue.Queue() 397 398 # Start the underlying addr2line process in line buffered mode. 399 400 cmd = [self._symbolizer.addr2line_path, '--functions', '--demangle', 401 '--exe=' + self._symbolizer.elf_file_path] 402 if self._symbolizer.inlines: 403 cmd += ['--inlines'] 404 self._proc = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE, 405 stdin=subprocess.PIPE, stderr=sys.stderr, close_fds=True) 406 407 # Start the poller thread, which simply moves atomically the lines read 408 # from the addr2line's stdout to the |_out_queue|. 409 self._thread = threading.Thread( 410 target=ELFSymbolizer.Addr2Line.StdoutReaderThread, 411 args=(self._proc.stdout, self._out_queue, self._symbolizer.inlines)) 412 self._thread.daemon = True # Don't prevent early process exit. 413 self._thread.start() 414 415 self._processed_symbols_count = 0 416 417 # Replay the pending requests on the new process (only for the case 418 # of a hung addr2line timing out during the game). 419 for (addr, _, _) in self._request_queue: 420 self._WriteToA2lStdin(addr) 421 422 @staticmethod 423 def StdoutReaderThread(process_pipe, queue, inlines): 424 """The poller thread fn, which moves the addr2line stdout to the |queue|. 425 426 This is the only piece of code not running on the main thread. It merely 427 writes to a Queue, which is thread-safe. In the case of inlines, it 428 detects the ??,??:0 marker and sends the lines atomically, such that the 429 main thread always receives all the lines corresponding to one symbol in 430 one shot.""" 431 try: 432 lines_for_one_symbol = [] 433 while True: 434 line1 = process_pipe.readline().rstrip('\r\n') 435 line2 = process_pipe.readline().rstrip('\r\n') 436 if not line1 or not line2: 437 break 438 inline_has_more_lines = inlines and (len(lines_for_one_symbol) == 0 or 439 (line1 != '??' and line2 != '??:0')) 440 if not inlines or inline_has_more_lines: 441 lines_for_one_symbol += [(line1, line2)] 442 if inline_has_more_lines: 443 continue 444 queue.put(lines_for_one_symbol) 445 lines_for_one_symbol = [] 446 process_pipe.close() 447 448 # Every addr2line processes will die at some point, please die silently. 449 except (IOError, OSError): 450 pass 451 452 @property 453 def first_request_id(self): 454 """Returns the request_id of the oldest pending request in the queue.""" 455 return self._request_queue[0][2] if self._request_queue else 0 456 457 458 class ELFSymbolInfo(object): 459 """The result of the symbolization passed as first arg. of each callback.""" 460 461 def __init__(self, name, source_path, source_line, was_ambiguous=False, 462 disambiguated=False, prefix_to_remove=None): 463 """All the fields here can be None (if addr2line replies with '??').""" 464 self.name = name 465 if source_path and source_path.startswith(prefix_to_remove): 466 source_path = source_path[len(prefix_to_remove) : ] 467 self.source_path = source_path 468 self.source_line = source_line 469 # In the case of |inlines|=True, the |inlined_by| points to the outer 470 # function inlining the current one (and so on, to form a chain). 471 self.inlined_by = None 472 self.disambiguated = disambiguated 473 self.was_ambiguous = was_ambiguous 474 475 def __str__(self): 476 return '%s [%s:%d]' % ( 477 self.name or '??', self.source_path or '??', self.source_line or 0) 478