Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2013 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Module for looking up symbolic debugging information.
     18 
     19 The information can include symbol names, offsets, and source locations.
     20 """
     21 
     22 import glob
     23 import os
     24 import platform
     25 import re
     26 import subprocess
     27 import unittest
     28 
     29 ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
     30 if not ANDROID_BUILD_TOP:
     31   ANDROID_BUILD_TOP = "."
     32 
     33 def FindSymbolsDir():
     34   saveddir = os.getcwd()
     35   os.chdir(ANDROID_BUILD_TOP)
     36   try:
     37     cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
     38            "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
     39            "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
     40     stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
     41     return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
     42   finally:
     43     os.chdir(saveddir)
     44 
     45 SYMBOLS_DIR = FindSymbolsDir()
     46 
     47 ARCH = "arm"
     48 
     49 
     50 # These are private. Do not access them from other modules.
     51 _CACHED_TOOLCHAIN = None
     52 _CACHED_TOOLCHAIN_ARCH = None
     53 
     54 
     55 def ToolPath(tool, toolchain=None):
     56   """Return a fully-qualified path to the specified tool"""
     57   if not toolchain:
     58     toolchain = FindToolchain()
     59   return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
     60 
     61 
     62 def FindToolchain():
     63   """Returns the toolchain matching ARCH."""
     64   global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH
     65   if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH:
     66     return _CACHED_TOOLCHAIN
     67 
     68   # We use slightly different names from GCC, and there's only one toolchain
     69   # for x86/x86_64. Note that these are the names of the top-level directory
     70   # rather than the _different_ names used lower down the directory hierarchy!
     71   gcc_dir = ARCH
     72   if gcc_dir == "arm64":
     73     gcc_dir = "aarch64"
     74   elif gcc_dir == "mips64":
     75     gcc_dir = "mips"
     76   elif gcc_dir == "x86_64":
     77     gcc_dir = "x86"
     78 
     79   os_name = platform.system().lower();
     80 
     81   available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir))
     82   if len(available_toolchains) == 0:
     83     raise Exception("Could not find tool chain for %s" % (ARCH))
     84 
     85   toolchain = sorted(available_toolchains)[-1]
     86 
     87   if not os.path.exists(ToolPath("addr2line", toolchain)):
     88     raise Exception("No addr2line for %s" % (toolchain))
     89 
     90   _CACHED_TOOLCHAIN = toolchain
     91   _CACHED_TOOLCHAIN_ARCH = ARCH
     92   print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN)
     93   return _CACHED_TOOLCHAIN
     94 
     95 
     96 def SymbolInformation(lib, addr):
     97   """Look up symbol information about an address.
     98 
     99   Args:
    100     lib: library (or executable) pathname containing symbols
    101     addr: string hexidecimal address
    102 
    103   Returns:
    104     A list of the form [(source_symbol, source_location,
    105     object_symbol_with_offset)].
    106 
    107     If the function has been inlined then the list may contain
    108     more than one element with the symbols for the most deeply
    109     nested inlined location appearing first.  The list is
    110     always non-empty, even if no information is available.
    111 
    112     Usually you want to display the source_location and
    113     object_symbol_with_offset from the last element in the list.
    114   """
    115   info = SymbolInformationForSet(lib, set([addr]))
    116   return (info and info.get(addr)) or [(None, None, None)]
    117 
    118 
    119 def SymbolInformationForSet(lib, unique_addrs):
    120   """Look up symbol information for a set of addresses from the given library.
    121 
    122   Args:
    123     lib: library (or executable) pathname containing symbols
    124     unique_addrs: set of hexidecimal addresses
    125 
    126   Returns:
    127     A dictionary of the form {addr: [(source_symbol, source_location,
    128     object_symbol_with_offset)]} where each address has a list of
    129     associated symbols and locations.  The list is always non-empty.
    130 
    131     If the function has been inlined then the list may contain
    132     more than one element with the symbols for the most deeply
    133     nested inlined location appearing first.  The list is
    134     always non-empty, even if no information is available.
    135 
    136     Usually you want to display the source_location and
    137     object_symbol_with_offset from the last element in the list.
    138   """
    139   if not lib:
    140     return None
    141 
    142   addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
    143   if not addr_to_line:
    144     return None
    145 
    146   addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
    147   if not addr_to_objdump:
    148     return None
    149 
    150   result = {}
    151   for addr in unique_addrs:
    152     source_info = addr_to_line.get(addr)
    153     if not source_info:
    154       source_info = [(None, None)]
    155     if addr in addr_to_objdump:
    156       (object_symbol, object_offset) = addr_to_objdump.get(addr)
    157       object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
    158                                                          object_offset)
    159     else:
    160       object_symbol_with_offset = None
    161     result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
    162         for (source_symbol, source_location) in source_info]
    163 
    164   return result
    165 
    166 
    167 def CallAddr2LineForSet(lib, unique_addrs):
    168   """Look up line and symbol information for a set of addresses.
    169 
    170   Args:
    171     lib: library (or executable) pathname containing symbols
    172     unique_addrs: set of string hexidecimal addresses look up.
    173 
    174   Returns:
    175     A dictionary of the form {addr: [(symbol, file:line)]} where
    176     each address has a list of associated symbols and locations
    177     or an empty list if no symbol information was found.
    178 
    179     If the function has been inlined then the list may contain
    180     more than one element with the symbols for the most deeply
    181     nested inlined location appearing first.
    182   """
    183   if not lib:
    184     return None
    185 
    186   symbols = SYMBOLS_DIR + lib
    187   if not os.path.exists(symbols):
    188     return None
    189 
    190   cmd = [ToolPath("addr2line"), "--functions", "--inlines",
    191       "--demangle", "--exe=" + symbols]
    192   child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    193 
    194   result = {}
    195   addrs = sorted(unique_addrs)
    196   for addr in addrs:
    197     child.stdin.write("0x%s\n" % addr)
    198     child.stdin.flush()
    199     records = []
    200     first = True
    201     while True:
    202       symbol = child.stdout.readline().strip()
    203       if symbol == "??":
    204         symbol = None
    205       location = child.stdout.readline().strip()
    206       if location == "??:0":
    207         location = None
    208       if symbol is None and location is None:
    209         break
    210       records.append((symbol, location))
    211       if first:
    212         # Write a blank line as a sentinel so we know when to stop
    213         # reading inlines from the output.
    214         # The blank line will cause addr2line to emit "??\n??:0\n".
    215         child.stdin.write("\n")
    216         first = False
    217     result[addr] = records
    218   child.stdin.close()
    219   child.stdout.close()
    220   return result
    221 
    222 
    223 def StripPC(addr):
    224   """Strips the Thumb bit a program counter address when appropriate.
    225 
    226   Args:
    227     addr: the program counter address
    228 
    229   Returns:
    230     The stripped program counter address.
    231   """
    232   global ARCH
    233   if ARCH == "arm":
    234     return addr & ~1
    235   return addr
    236 
    237 
    238 def CallObjdumpForSet(lib, unique_addrs):
    239   """Use objdump to find out the names of the containing functions.
    240 
    241   Args:
    242     lib: library (or executable) pathname containing symbols
    243     unique_addrs: set of string hexidecimal addresses to find the functions for.
    244 
    245   Returns:
    246     A dictionary of the form {addr: (string symbol, offset)}.
    247   """
    248   if not lib:
    249     return None
    250 
    251   symbols = SYMBOLS_DIR + lib
    252   if not os.path.exists(symbols):
    253     return None
    254 
    255   symbols = SYMBOLS_DIR + lib
    256   if not os.path.exists(symbols):
    257     return None
    258 
    259   addrs = sorted(unique_addrs)
    260   start_addr_dec = str(StripPC(int(addrs[0], 16)))
    261   stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
    262   cmd = [ToolPath("objdump"),
    263          "--section=.text",
    264          "--demangle",
    265          "--disassemble",
    266          "--start-address=" + start_addr_dec,
    267          "--stop-address=" + stop_addr_dec,
    268          symbols]
    269 
    270   # Function lines look like:
    271   #   000177b0 <android::IBinder::~IBinder()+0x2c>:
    272   # We pull out the address and function first. Then we check for an optional
    273   # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
    274   func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
    275   offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
    276 
    277   # A disassembly line looks like:
    278   #   177b2:	b510      	push	{r4, lr}
    279   asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
    280 
    281   current_symbol = None    # The current function symbol in the disassembly.
    282   current_symbol_addr = 0  # The address of the current function.
    283   addr_index = 0  # The address that we are currently looking for.
    284 
    285   stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    286   result = {}
    287   for line in stream:
    288     # Is it a function line like:
    289     #   000177b0 <android::IBinder::~IBinder()>:
    290     components = func_regexp.match(line)
    291     if components:
    292       # This is a new function, so record the current function and its address.
    293       current_symbol_addr = int(components.group(1), 16)
    294       current_symbol = components.group(2)
    295 
    296       # Does it have an optional offset like: "foo(..)+0x2c"?
    297       components = offset_regexp.match(current_symbol)
    298       if components:
    299         current_symbol = components.group(1)
    300         offset = components.group(2)
    301         if offset:
    302           current_symbol_addr -= int(offset, 16)
    303 
    304     # Is it an disassembly line like:
    305     #   177b2:	b510      	push	{r4, lr}
    306     components = asm_regexp.match(line)
    307     if components:
    308       addr = components.group(1)
    309       target_addr = addrs[addr_index]
    310       i_addr = int(addr, 16)
    311       i_target = StripPC(int(target_addr, 16))
    312       if i_addr == i_target:
    313         result[target_addr] = (current_symbol, i_target - current_symbol_addr)
    314         addr_index += 1
    315         if addr_index >= len(addrs):
    316           break
    317   stream.close()
    318 
    319   return result
    320 
    321 
    322 def CallCppFilt(mangled_symbol):
    323   cmd = [ToolPath("c++filt")]
    324   process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    325   process.stdin.write(mangled_symbol)
    326   process.stdin.write("\n")
    327   process.stdin.close()
    328   demangled_symbol = process.stdout.readline().strip()
    329   process.stdout.close()
    330   return demangled_symbol
    331 
    332 
    333 def FormatSymbolWithOffset(symbol, offset):
    334   if offset == 0:
    335     return symbol
    336   return "%s+%d" % (symbol, offset)
    337 
    338 
    339 
    340 class FindToolchainTests(unittest.TestCase):
    341   def assert_toolchain_found(self, abi):
    342     global ARCH
    343     ARCH = abi
    344     FindToolchain() # Will throw on failure.
    345 
    346   def test_toolchains_found(self):
    347     self.assert_toolchain_found("arm")
    348     self.assert_toolchain_found("arm64")
    349     self.assert_toolchain_found("mips")
    350     self.assert_toolchain_found("x86")
    351     self.assert_toolchain_found("x86_64")
    352 
    353 
    354 if __name__ == '__main__':
    355     unittest.main()
    356