Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2013 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Module for looking up symbolic debugging information.
     18 
     19 The information can include symbol names, offsets, and source locations.
     20 """
     21 
     22 import glob
     23 import os
     24 import platform
     25 import re
     26 import subprocess
     27 import unittest
     28 
     29 ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
     30 if not ANDROID_BUILD_TOP:
     31   ANDROID_BUILD_TOP = "."
     32 
     33 def FindSymbolsDir():
     34   saveddir = os.getcwd()
     35   os.chdir(ANDROID_BUILD_TOP)
     36   try:
     37     cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
     38            "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
     39            "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
     40     stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
     41     return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
     42   finally:
     43     os.chdir(saveddir)
     44 
     45 SYMBOLS_DIR = FindSymbolsDir()
     46 
     47 ARCH = None
     48 
     49 
     50 # These are private. Do not access them from other modules.
     51 _CACHED_TOOLCHAIN = None
     52 _CACHED_TOOLCHAIN_ARCH = None
     53 
     54 
     55 def ToolPath(tool, toolchain=None):
     56   """Return a fully-qualified path to the specified tool"""
     57   if not toolchain:
     58     toolchain = FindToolchain()
     59   return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
     60 
     61 
     62 def FindToolchain():
     63   """Returns the toolchain matching ARCH."""
     64   global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH
     65   if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH:
     66     return _CACHED_TOOLCHAIN
     67 
     68   # We use slightly different names from GCC, and there's only one toolchain
     69   # for x86/x86_64. Note that these are the names of the top-level directory
     70   # rather than the _different_ names used lower down the directory hierarchy!
     71   gcc_dir = ARCH
     72   if gcc_dir == "arm64":
     73     gcc_dir = "aarch64"
     74   elif gcc_dir == "mips64":
     75     gcc_dir = "mips"
     76   elif gcc_dir == "x86_64":
     77     gcc_dir = "x86"
     78 
     79   os_name = platform.system().lower();
     80 
     81   available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir))
     82   if len(available_toolchains) == 0:
     83     raise Exception("Could not find tool chain for %s" % (ARCH))
     84 
     85   toolchain = sorted(available_toolchains)[-1]
     86 
     87   if not os.path.exists(ToolPath("addr2line", toolchain)):
     88     raise Exception("No addr2line for %s" % (toolchain))
     89 
     90   _CACHED_TOOLCHAIN = toolchain
     91   _CACHED_TOOLCHAIN_ARCH = ARCH
     92   print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN)
     93   return _CACHED_TOOLCHAIN
     94 
     95 
     96 def SymbolInformation(lib, addr):
     97   """Look up symbol information about an address.
     98 
     99   Args:
    100     lib: library (or executable) pathname containing symbols
    101     addr: string hexidecimal address
    102 
    103   Returns:
    104     A list of the form [(source_symbol, source_location,
    105     object_symbol_with_offset)].
    106 
    107     If the function has been inlined then the list may contain
    108     more than one element with the symbols for the most deeply
    109     nested inlined location appearing first.  The list is
    110     always non-empty, even if no information is available.
    111 
    112     Usually you want to display the source_location and
    113     object_symbol_with_offset from the last element in the list.
    114   """
    115   info = SymbolInformationForSet(lib, set([addr]))
    116   return (info and info.get(addr)) or [(None, None, None)]
    117 
    118 
    119 def SymbolInformationForSet(lib, unique_addrs):
    120   """Look up symbol information for a set of addresses from the given library.
    121 
    122   Args:
    123     lib: library (or executable) pathname containing symbols
    124     unique_addrs: set of hexidecimal addresses
    125 
    126   Returns:
    127     A dictionary of the form {addr: [(source_symbol, source_location,
    128     object_symbol_with_offset)]} where each address has a list of
    129     associated symbols and locations.  The list is always non-empty.
    130 
    131     If the function has been inlined then the list may contain
    132     more than one element with the symbols for the most deeply
    133     nested inlined location appearing first.  The list is
    134     always non-empty, even if no information is available.
    135 
    136     Usually you want to display the source_location and
    137     object_symbol_with_offset from the last element in the list.
    138   """
    139   if not lib:
    140     return None
    141 
    142   addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
    143   if not addr_to_line:
    144     return None
    145 
    146   addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
    147   if not addr_to_objdump:
    148     return None
    149 
    150   result = {}
    151   for addr in unique_addrs:
    152     source_info = addr_to_line.get(addr)
    153     if not source_info:
    154       source_info = [(None, None)]
    155     if addr in addr_to_objdump:
    156       (object_symbol, object_offset) = addr_to_objdump.get(addr)
    157       object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
    158                                                          object_offset)
    159     else:
    160       object_symbol_with_offset = None
    161     result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
    162         for (source_symbol, source_location) in source_info]
    163 
    164   return result
    165 
    166 
    167 def CallAddr2LineForSet(lib, unique_addrs):
    168   """Look up line and symbol information for a set of addresses.
    169 
    170   Args:
    171     lib: library (or executable) pathname containing symbols
    172     unique_addrs: set of string hexidecimal addresses look up.
    173 
    174   Returns:
    175     A dictionary of the form {addr: [(symbol, file:line)]} where
    176     each address has a list of associated symbols and locations
    177     or an empty list if no symbol information was found.
    178 
    179     If the function has been inlined then the list may contain
    180     more than one element with the symbols for the most deeply
    181     nested inlined location appearing first.
    182   """
    183   if not lib:
    184     return None
    185 
    186   symbols = SYMBOLS_DIR + lib
    187   if not os.path.exists(symbols):
    188     symbols = lib
    189     if not os.path.exists(symbols):
    190       return None
    191 
    192   cmd = [ToolPath("addr2line"), "--functions", "--inlines",
    193       "--demangle", "--exe=" + symbols]
    194   child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    195 
    196   result = {}
    197   addrs = sorted(unique_addrs)
    198   for addr in addrs:
    199     child.stdin.write("0x%s\n" % addr)
    200     child.stdin.flush()
    201     records = []
    202     first = True
    203     while True:
    204       symbol = child.stdout.readline().strip()
    205       if symbol == "??":
    206         symbol = None
    207       location = child.stdout.readline().strip()
    208       if location == "??:0" or location == "??:?":
    209         location = None
    210       if symbol is None and location is None:
    211         break
    212       records.append((symbol, location))
    213       if first:
    214         # Write a blank line as a sentinel so we know when to stop
    215         # reading inlines from the output.
    216         # The blank line will cause addr2line to emit "??\n??:0\n".
    217         child.stdin.write("\n")
    218         first = False
    219     result[addr] = records
    220   child.stdin.close()
    221   child.stdout.close()
    222   return result
    223 
    224 
    225 def StripPC(addr):
    226   """Strips the Thumb bit a program counter address when appropriate.
    227 
    228   Args:
    229     addr: the program counter address
    230 
    231   Returns:
    232     The stripped program counter address.
    233   """
    234   global ARCH
    235   if ARCH == "arm":
    236     return addr & ~1
    237   return addr
    238 
    239 
    240 def CallObjdumpForSet(lib, unique_addrs):
    241   """Use objdump to find out the names of the containing functions.
    242 
    243   Args:
    244     lib: library (or executable) pathname containing symbols
    245     unique_addrs: set of string hexidecimal addresses to find the functions for.
    246 
    247   Returns:
    248     A dictionary of the form {addr: (string symbol, offset)}.
    249   """
    250   if not lib:
    251     return None
    252 
    253   symbols = SYMBOLS_DIR + lib
    254   if not os.path.exists(symbols):
    255     symbols = lib
    256     if not os.path.exists(symbols):
    257       return None
    258 
    259   addrs = sorted(unique_addrs)
    260   start_addr_dec = str(StripPC(int(addrs[0], 16)))
    261   stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
    262   cmd = [ToolPath("objdump"),
    263          "--section=.text",
    264          "--demangle",
    265          "--disassemble",
    266          "--start-address=" + start_addr_dec,
    267          "--stop-address=" + stop_addr_dec,
    268          symbols]
    269 
    270   # Function lines look like:
    271   #   000177b0 <android::IBinder::~IBinder()+0x2c>:
    272   # We pull out the address and function first. Then we check for an optional
    273   # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
    274   func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
    275   offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
    276 
    277   # A disassembly line looks like:
    278   #   177b2:	b510      	push	{r4, lr}
    279   asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
    280 
    281   current_symbol = None    # The current function symbol in the disassembly.
    282   current_symbol_addr = 0  # The address of the current function.
    283   addr_index = 0  # The address that we are currently looking for.
    284 
    285   stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    286   result = {}
    287   for line in stream:
    288     # Is it a function line like:
    289     #   000177b0 <android::IBinder::~IBinder()>:
    290     components = func_regexp.match(line)
    291     if components:
    292       # This is a new function, so record the current function and its address.
    293       current_symbol_addr = int(components.group(1), 16)
    294       current_symbol = components.group(2)
    295 
    296       # Does it have an optional offset like: "foo(..)+0x2c"?
    297       components = offset_regexp.match(current_symbol)
    298       if components:
    299         current_symbol = components.group(1)
    300         offset = components.group(2)
    301         if offset:
    302           current_symbol_addr -= int(offset, 16)
    303 
    304     # Is it an disassembly line like:
    305     #   177b2:	b510      	push	{r4, lr}
    306     components = asm_regexp.match(line)
    307     if components:
    308       addr = components.group(1)
    309       target_addr = addrs[addr_index]
    310       i_addr = int(addr, 16)
    311       i_target = StripPC(int(target_addr, 16))
    312       if i_addr == i_target:
    313         result[target_addr] = (current_symbol, i_target - current_symbol_addr)
    314         addr_index += 1
    315         if addr_index >= len(addrs):
    316           break
    317   stream.close()
    318 
    319   return result
    320 
    321 
    322 def CallCppFilt(mangled_symbol):
    323   cmd = [ToolPath("c++filt")]
    324   process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    325   process.stdin.write(mangled_symbol)
    326   process.stdin.write("\n")
    327   process.stdin.close()
    328   demangled_symbol = process.stdout.readline().strip()
    329   process.stdout.close()
    330   return demangled_symbol
    331 
    332 
    333 def FormatSymbolWithOffset(symbol, offset):
    334   if offset == 0:
    335     return symbol
    336   return "%s+%d" % (symbol, offset)
    337 
    338 
    339 def GetAbiFromToolchain(toolchain_var, bits):
    340   toolchain = os.environ.get(toolchain_var)
    341   if not toolchain:
    342     return None
    343 
    344   toolchain_match = re.search("\/(aarch64|arm|mips|x86)\/", toolchain)
    345   if toolchain_match:
    346     abi = toolchain_match.group(1)
    347     if abi == "aarch64":
    348       return "arm64"
    349     elif bits == 64:
    350       if abi == "x86":
    351         return "x86_64"
    352       elif abi == "mips":
    353         return "mips64"
    354     return abi
    355   return None
    356 
    357 
    358 def SetAbi(lines):
    359   global ARCH
    360 
    361   abi_line = re.compile("ABI: \'(.*)\'")
    362   trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)")
    363 
    364   ARCH = None
    365   for line in lines:
    366     abi_match = abi_line.search(line)
    367     if abi_match:
    368       ARCH = abi_match.group(1)
    369       break
    370     trace_match = trace_line.search(line)
    371     if trace_match:
    372       # Try to guess the arch, we know the bitness.
    373       if len(trace_match.group(1)) == 16:
    374         # 64 bit
    375         # Check for ANDROID_TOOLCHAIN, if it is set, we can figure out the
    376         # arch this way. If this is not set, then default to arm64.
    377         ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 64)
    378         if not ARCH:
    379           ARCH = "arm64"
    380       else:
    381         # 32 bit
    382         # Check for ANDROID_TOOLCHAIN_2ND_ARCH first, if set, use that.
    383         # If not try ANDROID_TOOLCHAIN to find the arch.
    384         # If this is not set, then default to arm.
    385         ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN_2ND_ARCH", 32)
    386         if not ARCH:
    387           ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 32)
    388           if not ARCH:
    389             ARCH = "arm"
    390       break
    391   if not ARCH:
    392     raise Exception("Could not determine arch from input")
    393 
    394 
    395 class FindToolchainTests(unittest.TestCase):
    396   def assert_toolchain_found(self, abi):
    397     global ARCH
    398     ARCH = abi
    399     FindToolchain() # Will throw on failure.
    400 
    401   def test_toolchains_found(self):
    402     self.assert_toolchain_found("arm")
    403     self.assert_toolchain_found("arm64")
    404     self.assert_toolchain_found("mips")
    405     self.assert_toolchain_found("x86")
    406     self.assert_toolchain_found("x86_64")
    407 
    408 class SetArchTests(unittest.TestCase):
    409   def test_abi_check(self):
    410     global ARCH
    411 
    412     SetAbi(["ABI: 'arm'"])
    413     self.assertEqual(ARCH, "arm")
    414     SetAbi(["ABI: 'arm64'"])
    415     self.assertEqual(ARCH, "arm64")
    416 
    417     SetAbi(["ABI: 'mips'"])
    418     self.assertEqual(ARCH, "mips")
    419     SetAbi(["ABI: 'mips64'"])
    420     self.assertEqual(ARCH, "mips64")
    421 
    422     SetAbi(["ABI: 'x86'"])
    423     self.assertEqual(ARCH, "x86")
    424     SetAbi(["ABI: 'x86_64'"])
    425     self.assertEqual(ARCH, "x86_64")
    426 
    427   def test_32bit_trace_line_toolchain(self):
    428     global ARCH
    429 
    430     os.environ.clear()
    431     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
    432     SetAbi(["#00 pc 000374e0"])
    433     self.assertEqual(ARCH, "arm")
    434 
    435     os.environ.clear()
    436     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
    437     SetAbi(["#00 pc 000374e0"])
    438     self.assertEqual(ARCH, "mips")
    439 
    440     os.environ.clear()
    441     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
    442     SetAbi(["#00 pc 000374e0"])
    443     self.assertEqual(ARCH, "x86")
    444 
    445   def test_32bit_trace_line_toolchain_2nd(self):
    446     global ARCH
    447 
    448     os.environ.clear()
    449     os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin"
    450     os.environ["ANDROID_TOOLCHAIN_ARCH"] = "linux-x86/aarch64/aarch64-linux-android-4.9/bin"
    451     SetAbi(["#00 pc 000374e0"])
    452     self.assertEqual(ARCH, "arm")
    453 
    454     os.environ.clear()
    455     os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/mips/mips-linux-androideabi-4.9/bin"
    456     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
    457     SetAbi(["#00 pc 000374e0"])
    458     self.assertEqual(ARCH, "mips")
    459 
    460     os.environ.clear()
    461     os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/x86/x86-linux-androideabi-4.9/bin"
    462     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin"
    463     SetAbi(["#00 pc 000374e0"])
    464     self.assertEqual(ARCH, "x86")
    465 
    466   def test_64bit_trace_line_toolchain(self):
    467     global ARCH
    468 
    469     os.environ.clear()
    470     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/aarch/aarch-linux-androideabi-4.9/bin"
    471     SetAbi(["#00 pc 00000000000374e0"])
    472     self.assertEqual(ARCH, "arm64")
    473 
    474     os.environ.clear()
    475     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin"
    476     SetAbi(["#00 pc 00000000000374e0"])
    477     self.assertEqual(ARCH, "mips64")
    478 
    479     os.environ.clear()
    480     os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin"
    481     SetAbi(["#00 pc 00000000000374e0"])
    482     self.assertEqual(ARCH, "x86_64")
    483 
    484   def test_default_abis(self):
    485     global ARCH
    486 
    487     os.environ.clear()
    488     SetAbi(["#00 pc 000374e0"])
    489     self.assertEqual(ARCH, "arm")
    490     SetAbi(["#00 pc 00000000000374e0"])
    491     self.assertEqual(ARCH, "arm64")
    492 
    493   def test_no_abi(self):
    494     global ARCH
    495 
    496     self.assertRaisesRegexp(Exception, "Could not determine arch from input", SetAbi, [])
    497 
    498 if __name__ == '__main__':
    499     unittest.main()
    500