1 #!/usr/bin/python 2 # 3 # Copyright (C) 2013 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """Module for looking up symbolic debugging information. 18 19 The information can include symbol names, offsets, and source locations. 20 """ 21 22 import os 23 import re 24 import subprocess 25 26 CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)), 27 os.pardir, os.pardir, os.pardir, os.pardir) 28 ANDROID_BUILD_TOP = CHROME_SRC 29 SYMBOLS_DIR = CHROME_SRC 30 CHROME_SYMBOLS_DIR = CHROME_SRC 31 32 ARCH = "arm" 33 34 TOOLCHAIN_INFO = None 35 36 def Uname(): 37 """'uname' for constructing prebuilt/<...> and out/host/<...> paths.""" 38 uname = os.uname()[0] 39 if uname == "Darwin": 40 proc = os.uname()[-1] 41 if proc == "i386" or proc == "x86_64": 42 return "darwin-x86" 43 return "darwin-ppc" 44 if uname == "Linux": 45 return "linux-x86" 46 return uname 47 48 def ToolPath(tool, toolchain_info=None): 49 """Return a full qualified path to the specified tool""" 50 # ToolPath looks for the tools in the completely incorrect directory. 51 # This looks in the checked in android_tools. 52 if ARCH == "arm": 53 toolchain_source = "arm-linux-androideabi-4.6" 54 toolchain_prefix = "arm-linux-androideabi" 55 ndk = "ndk" 56 elif ARCH == "arm64": 57 toolchain_source = "aarch64-linux-android-4.9" 58 toolchain_prefix = "aarch64-linux-android" 59 ndk = "ndk" 60 elif ARCH == "x86": 61 toolchain_source = "x86-4.6" 62 toolchain_prefix = "i686-android-linux" 63 ndk = "ndk" 64 elif ARCH == "x86_64": 65 toolchain_source = "x86_64-4.9" 66 toolchain_prefix = "x86_64-linux-android" 67 ndk = "ndk" 68 elif ARCH == "mips": 69 toolchain_source = "mipsel-linux-android-4.6" 70 toolchain_prefix = "mipsel-linux-android" 71 ndk = "ndk" 72 else: 73 raise Exception("Could not find tool chain") 74 75 toolchain_subdir = ( 76 "third_party/android_tools/%s/toolchains/%s/prebuilt/linux-x86_64/bin" % 77 (ndk, toolchain_source)) 78 79 return os.path.join(CHROME_SRC, 80 toolchain_subdir, 81 toolchain_prefix + "-" + tool) 82 83 def FindToolchain(): 84 """Look for the latest available toolchain 85 86 Args: 87 None 88 89 Returns: 90 A pair of strings containing toolchain label and target prefix. 91 """ 92 global TOOLCHAIN_INFO 93 if TOOLCHAIN_INFO is not None: 94 return TOOLCHAIN_INFO 95 96 ## Known toolchains, newer ones in the front. 97 if ARCH == "arm64": 98 gcc_version = "4.9" 99 known_toolchains = [ 100 ("aarch64-linux-android-" + gcc_version, "aarch64", "aarch64-linux-android") 101 ] 102 elif ARCH == "arm": 103 gcc_version = "4.6" 104 known_toolchains = [ 105 ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi"), 106 ] 107 elif ARCH =="x86": 108 known_toolchains = [ 109 ("i686-android-linux-4.4.3", "x86", "i686-android-linux") 110 ] 111 elif ARCH =="x86_64": 112 known_toolchains = [ 113 ("x86_64-linux-android-4.9", "x86_64", "x86_64-linux-android") 114 ] 115 elif ARCH == "mips": 116 gcc_version = "4.6" 117 known_toolchains = [ 118 ("mipsel-linux-android-" + gcc_version, "mips", "mipsel-linux-android") 119 ] 120 else: 121 known_toolchains = [] 122 123 # Look for addr2line to check for valid toolchain path. 124 for (label, platform, target) in known_toolchains: 125 toolchain_info = (label, platform, target); 126 if os.path.exists(ToolPath("addr2line", toolchain_info)): 127 TOOLCHAIN_INFO = toolchain_info 128 print "Using toolchain from :" + ToolPath("", TOOLCHAIN_INFO) 129 return toolchain_info 130 131 raise Exception("Could not find tool chain") 132 133 def TranslateLibPath(lib): 134 # SymbolInformation(lib, addr) receives lib as the path from symbols 135 # root to the symbols file. This needs to be translated to point to the 136 # correct .so path. If the user doesn't explicitly specify which directory to 137 # use, then use the most recently updated one in one of the known directories. 138 # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it 139 # untranslated in case it is an Android symbol in SYMBOLS_DIR. 140 library_name = os.path.basename(lib) 141 out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out') 142 candidate_dirs = ['.', 143 os.path.join(out_dir, 'Debug', 'lib'), 144 os.path.join(out_dir, 'Debug', 'lib.target'), 145 os.path.join(out_dir, 'Release', 'lib'), 146 os.path.join(out_dir, 'Release', 'lib.target'), 147 ] 148 149 candidate_libraries = map( 150 lambda d: ('%s/%s/%s' % (CHROME_SYMBOLS_DIR, d, library_name)), 151 candidate_dirs) 152 candidate_libraries = filter(os.path.exists, candidate_libraries) 153 candidate_libraries = sorted(candidate_libraries, 154 key=os.path.getmtime, reverse=True) 155 156 if not candidate_libraries: 157 return lib 158 159 library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR) 160 return '/' + library_path 161 162 def SymbolInformation(lib, addr, get_detailed_info): 163 """Look up symbol information about an address. 164 165 Args: 166 lib: library (or executable) pathname containing symbols 167 addr: string hexidecimal address 168 169 Returns: 170 A list of the form [(source_symbol, source_location, 171 object_symbol_with_offset)]. 172 173 If the function has been inlined then the list may contain 174 more than one element with the symbols for the most deeply 175 nested inlined location appearing first. The list is 176 always non-empty, even if no information is available. 177 178 Usually you want to display the source_location and 179 object_symbol_with_offset from the last element in the list. 180 """ 181 lib = TranslateLibPath(lib) 182 info = SymbolInformationForSet(lib, set([addr]), get_detailed_info) 183 return (info and info.get(addr)) or [(None, None, None)] 184 185 186 def SymbolInformationForSet(lib, unique_addrs, get_detailed_info): 187 """Look up symbol information for a set of addresses from the given library. 188 189 Args: 190 lib: library (or executable) pathname containing symbols 191 unique_addrs: set of hexidecimal addresses 192 193 Returns: 194 A dictionary of the form {addr: [(source_symbol, source_location, 195 object_symbol_with_offset)]} where each address has a list of 196 associated symbols and locations. The list is always non-empty. 197 198 If the function has been inlined then the list may contain 199 more than one element with the symbols for the most deeply 200 nested inlined location appearing first. The list is 201 always non-empty, even if no information is available. 202 203 Usually you want to display the source_location and 204 object_symbol_with_offset from the last element in the list. 205 """ 206 if not lib: 207 return None 208 209 addr_to_line = CallAddr2LineForSet(lib, unique_addrs) 210 if not addr_to_line: 211 return None 212 213 if get_detailed_info: 214 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 215 if not addr_to_objdump: 216 return None 217 else: 218 addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs) 219 220 result = {} 221 for addr in unique_addrs: 222 source_info = addr_to_line.get(addr) 223 if not source_info: 224 source_info = [(None, None)] 225 if addr in addr_to_objdump: 226 (object_symbol, object_offset) = addr_to_objdump.get(addr) 227 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 228 object_offset) 229 else: 230 object_symbol_with_offset = None 231 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 232 for (source_symbol, source_location) in source_info] 233 234 return result 235 236 237 class MemoizedForSet(object): 238 def __init__(self, fn): 239 self.fn = fn 240 self.cache = {} 241 242 def __call__(self, lib, unique_addrs): 243 lib_cache = self.cache.setdefault(lib, {}) 244 245 no_cache = filter(lambda x: x not in lib_cache, unique_addrs) 246 if no_cache: 247 lib_cache.update((k, None) for k in no_cache) 248 result = self.fn(lib, no_cache) 249 if result: 250 lib_cache.update(result) 251 252 return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k]) 253 254 255 @MemoizedForSet 256 def CallAddr2LineForSet(lib, unique_addrs): 257 """Look up line and symbol information for a set of addresses. 258 259 Args: 260 lib: library (or executable) pathname containing symbols 261 unique_addrs: set of string hexidecimal addresses look up. 262 263 Returns: 264 A dictionary of the form {addr: [(symbol, file:line)]} where 265 each address has a list of associated symbols and locations 266 or an empty list if no symbol information was found. 267 268 If the function has been inlined then the list may contain 269 more than one element with the symbols for the most deeply 270 nested inlined location appearing first. 271 """ 272 if not lib: 273 return None 274 275 276 symbols = SYMBOLS_DIR + lib 277 if not os.path.isfile(symbols): 278 return None 279 280 (label, platform, target) = FindToolchain() 281 cmd = [ToolPath("addr2line"), "--functions", "--inlines", 282 "--demangle", "--exe=" + symbols] 283 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 284 285 result = {} 286 addrs = sorted(unique_addrs) 287 for addr in addrs: 288 child.stdin.write("0x%s\n" % addr) 289 child.stdin.flush() 290 records = [] 291 first = True 292 while True: 293 symbol = child.stdout.readline().strip() 294 if symbol == "??": 295 symbol = None 296 location = child.stdout.readline().strip() 297 if location == "??:0": 298 location = None 299 if symbol is None and location is None: 300 break 301 records.append((symbol, location)) 302 if first: 303 # Write a blank line as a sentinel so we know when to stop 304 # reading inlines from the output. 305 # The blank line will cause addr2line to emit "??\n??:0\n". 306 child.stdin.write("\n") 307 first = False 308 result[addr] = records 309 child.stdin.close() 310 child.stdout.close() 311 return result 312 313 314 def StripPC(addr): 315 """Strips the Thumb bit a program counter address when appropriate. 316 317 Args: 318 addr: the program counter address 319 320 Returns: 321 The stripped program counter address. 322 """ 323 global ARCH 324 325 if ARCH == "arm": 326 return addr & ~1 327 return addr 328 329 @MemoizedForSet 330 def CallObjdumpForSet(lib, unique_addrs): 331 """Use objdump to find out the names of the containing functions. 332 333 Args: 334 lib: library (or executable) pathname containing symbols 335 unique_addrs: set of string hexidecimal addresses to find the functions for. 336 337 Returns: 338 A dictionary of the form {addr: (string symbol, offset)}. 339 """ 340 if not lib: 341 return None 342 343 symbols = SYMBOLS_DIR + lib 344 if not os.path.exists(symbols): 345 return None 346 347 symbols = SYMBOLS_DIR + lib 348 if not os.path.exists(symbols): 349 return None 350 351 result = {} 352 353 # Function lines look like: 354 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 355 # We pull out the address and function first. Then we check for an optional 356 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 357 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 358 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 359 360 # A disassembly line looks like: 361 # 177b2: b510 push {r4, lr} 362 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 363 364 for target_addr in unique_addrs: 365 start_addr_dec = str(StripPC(int(target_addr, 16))) 366 stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8) 367 cmd = [ToolPath("objdump"), 368 "--section=.text", 369 "--demangle", 370 "--disassemble", 371 "--start-address=" + start_addr_dec, 372 "--stop-address=" + stop_addr_dec, 373 symbols] 374 375 current_symbol = None # The current function symbol in the disassembly. 376 current_symbol_addr = 0 # The address of the current function. 377 378 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout 379 for line in stream: 380 # Is it a function line like: 381 # 000177b0 <android::IBinder::~IBinder()>: 382 components = func_regexp.match(line) 383 if components: 384 # This is a new function, so record the current function and its address. 385 current_symbol_addr = int(components.group(1), 16) 386 current_symbol = components.group(2) 387 388 # Does it have an optional offset like: "foo(..)+0x2c"? 389 components = offset_regexp.match(current_symbol) 390 if components: 391 current_symbol = components.group(1) 392 offset = components.group(2) 393 if offset: 394 current_symbol_addr -= int(offset, 16) 395 396 # Is it an disassembly line like: 397 # 177b2: b510 push {r4, lr} 398 components = asm_regexp.match(line) 399 if components: 400 addr = components.group(1) 401 i_addr = int(addr, 16) 402 i_target = StripPC(int(target_addr, 16)) 403 if i_addr == i_target: 404 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 405 stream.close() 406 407 return result 408 409 410 def CallCppFilt(mangled_symbol): 411 cmd = [ToolPath("c++filt")] 412 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 413 process.stdin.write(mangled_symbol) 414 process.stdin.write("\n") 415 process.stdin.close() 416 demangled_symbol = process.stdout.readline().strip() 417 process.stdout.close() 418 return demangled_symbol 419 420 def FormatSymbolWithOffset(symbol, offset): 421 if offset == 0: 422 return symbol 423 return "%s+%d" % (symbol, offset) 424