Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2013 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Module for looking up symbolic debugging information.
     18 
     19 The information can include symbol names, offsets, and source locations.
     20 """
     21 
     22 import glob
     23 import os
     24 import re
     25 import subprocess
     26 
     27 ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
     28 if not ANDROID_BUILD_TOP:
     29   ANDROID_BUILD_TOP = "."
     30 
     31 def FindSymbolsDir():
     32   saveddir = os.getcwd()
     33   os.chdir(ANDROID_BUILD_TOP)
     34   try:
     35     cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
     36            "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
     37            "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
     38     stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
     39     return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
     40   finally:
     41     os.chdir(saveddir)
     42 
     43 SYMBOLS_DIR = FindSymbolsDir()
     44 
     45 ARCH = "arm"
     46 
     47 TOOLCHAIN = None
     48 
     49 def ToolPath(tool, toolchain=None):
     50   """Return a fully-qualified path to the specified tool"""
     51   if not toolchain:
     52     toolchain = FindToolchain()
     53   return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
     54 
     55 def FindToolchain():
     56   """Returns the toolchain matching ARCH. Assumes that you're lunched
     57   such that the necessary toolchain is either your primary or secondary.
     58   TODO: we could make this 'just work' for most users by just globbing the
     59   newest toolchains for every architecture out of prebuilts/, but other
     60   parts of this tool assume you're lunched correctly anyway."""
     61   global TOOLCHAIN
     62   if TOOLCHAIN is not None:
     63     return TOOLCHAIN
     64 
     65   # We use slightly different names from GCC, and there's only one toolchain
     66   # for x86/x86_64.
     67   gcc_arch = ARCH
     68   if gcc_arch == "arm64":
     69     gcc_arch = "aarch64"
     70   elif gcc_arch == "mips":
     71     gcc_arch = "mipsel"
     72   elif gcc_arch == "x86":
     73     gcc_arch = "x86_64"
     74 
     75   tc1 = os.environ["ANDROID_TOOLCHAIN"]
     76   tc2 = os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"]
     77 
     78   if ("/" + gcc_arch + "-linux-") in tc1:
     79     toolchain = tc1
     80   elif ("/" + gcc_arch + "-linux-") in tc2:
     81     toolchain = tc2
     82   else:
     83     raise Exception("Could not find tool chain for %s" % (gcc_arch))
     84 
     85   if not os.path.exists(ToolPath("addr2line", toolchain)):
     86     raise Exception("No addr2line for %s" % (toolchain))
     87 
     88   TOOLCHAIN = toolchain
     89   print "Using toolchain from: %s" % TOOLCHAIN
     90   return TOOLCHAIN
     91 
     92 def SymbolInformation(lib, addr):
     93   """Look up symbol information about an address.
     94 
     95   Args:
     96     lib: library (or executable) pathname containing symbols
     97     addr: string hexidecimal address
     98 
     99   Returns:
    100     A list of the form [(source_symbol, source_location,
    101     object_symbol_with_offset)].
    102 
    103     If the function has been inlined then the list may contain
    104     more than one element with the symbols for the most deeply
    105     nested inlined location appearing first.  The list is
    106     always non-empty, even if no information is available.
    107 
    108     Usually you want to display the source_location and
    109     object_symbol_with_offset from the last element in the list.
    110   """
    111   info = SymbolInformationForSet(lib, set([addr]))
    112   return (info and info.get(addr)) or [(None, None, None)]
    113 
    114 
    115 def SymbolInformationForSet(lib, unique_addrs):
    116   """Look up symbol information for a set of addresses from the given library.
    117 
    118   Args:
    119     lib: library (or executable) pathname containing symbols
    120     unique_addrs: set of hexidecimal addresses
    121 
    122   Returns:
    123     A dictionary of the form {addr: [(source_symbol, source_location,
    124     object_symbol_with_offset)]} where each address has a list of
    125     associated symbols and locations.  The list is always non-empty.
    126 
    127     If the function has been inlined then the list may contain
    128     more than one element with the symbols for the most deeply
    129     nested inlined location appearing first.  The list is
    130     always non-empty, even if no information is available.
    131 
    132     Usually you want to display the source_location and
    133     object_symbol_with_offset from the last element in the list.
    134   """
    135   if not lib:
    136     return None
    137 
    138   addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
    139   if not addr_to_line:
    140     return None
    141 
    142   addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
    143   if not addr_to_objdump:
    144     return None
    145 
    146   result = {}
    147   for addr in unique_addrs:
    148     source_info = addr_to_line.get(addr)
    149     if not source_info:
    150       source_info = [(None, None)]
    151     if addr in addr_to_objdump:
    152       (object_symbol, object_offset) = addr_to_objdump.get(addr)
    153       object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
    154                                                          object_offset)
    155     else:
    156       object_symbol_with_offset = None
    157     result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
    158         for (source_symbol, source_location) in source_info]
    159 
    160   return result
    161 
    162 
    163 def CallAddr2LineForSet(lib, unique_addrs):
    164   """Look up line and symbol information for a set of addresses.
    165 
    166   Args:
    167     lib: library (or executable) pathname containing symbols
    168     unique_addrs: set of string hexidecimal addresses look up.
    169 
    170   Returns:
    171     A dictionary of the form {addr: [(symbol, file:line)]} where
    172     each address has a list of associated symbols and locations
    173     or an empty list if no symbol information was found.
    174 
    175     If the function has been inlined then the list may contain
    176     more than one element with the symbols for the most deeply
    177     nested inlined location appearing first.
    178   """
    179   if not lib:
    180     return None
    181 
    182 
    183   symbols = SYMBOLS_DIR + lib
    184   if not os.path.exists(symbols):
    185     return None
    186 
    187   cmd = [ToolPath("addr2line"), "--functions", "--inlines",
    188       "--demangle", "--exe=" + symbols]
    189   child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    190 
    191   result = {}
    192   addrs = sorted(unique_addrs)
    193   for addr in addrs:
    194     child.stdin.write("0x%s\n" % addr)
    195     child.stdin.flush()
    196     records = []
    197     first = True
    198     while True:
    199       symbol = child.stdout.readline().strip()
    200       if symbol == "??":
    201         symbol = None
    202       location = child.stdout.readline().strip()
    203       if location == "??:0":
    204         location = None
    205       if symbol is None and location is None:
    206         break
    207       records.append((symbol, location))
    208       if first:
    209         # Write a blank line as a sentinel so we know when to stop
    210         # reading inlines from the output.
    211         # The blank line will cause addr2line to emit "??\n??:0\n".
    212         child.stdin.write("\n")
    213         first = False
    214     result[addr] = records
    215   child.stdin.close()
    216   child.stdout.close()
    217   return result
    218 
    219 
    220 def StripPC(addr):
    221   """Strips the Thumb bit a program counter address when appropriate.
    222 
    223   Args:
    224     addr: the program counter address
    225 
    226   Returns:
    227     The stripped program counter address.
    228   """
    229   global ARCH
    230 
    231   if ARCH == "arm":
    232     return addr & ~1
    233   return addr
    234 
    235 def CallObjdumpForSet(lib, unique_addrs):
    236   """Use objdump to find out the names of the containing functions.
    237 
    238   Args:
    239     lib: library (or executable) pathname containing symbols
    240     unique_addrs: set of string hexidecimal addresses to find the functions for.
    241 
    242   Returns:
    243     A dictionary of the form {addr: (string symbol, offset)}.
    244   """
    245   if not lib:
    246     return None
    247 
    248   symbols = SYMBOLS_DIR + lib
    249   if not os.path.exists(symbols):
    250     return None
    251 
    252   symbols = SYMBOLS_DIR + lib
    253   if not os.path.exists(symbols):
    254     return None
    255 
    256   addrs = sorted(unique_addrs)
    257   start_addr_dec = str(StripPC(int(addrs[0], 16)))
    258   stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
    259   cmd = [ToolPath("objdump"),
    260          "--section=.text",
    261          "--demangle",
    262          "--disassemble",
    263          "--start-address=" + start_addr_dec,
    264          "--stop-address=" + stop_addr_dec,
    265          symbols]
    266 
    267   # Function lines look like:
    268   #   000177b0 <android::IBinder::~IBinder()+0x2c>:
    269   # We pull out the address and function first. Then we check for an optional
    270   # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
    271   func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
    272   offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
    273 
    274   # A disassembly line looks like:
    275   #   177b2:	b510      	push	{r4, lr}
    276   asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
    277 
    278   current_symbol = None    # The current function symbol in the disassembly.
    279   current_symbol_addr = 0  # The address of the current function.
    280   addr_index = 0  # The address that we are currently looking for.
    281 
    282   stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    283   result = {}
    284   for line in stream:
    285     # Is it a function line like:
    286     #   000177b0 <android::IBinder::~IBinder()>:
    287     components = func_regexp.match(line)
    288     if components:
    289       # This is a new function, so record the current function and its address.
    290       current_symbol_addr = int(components.group(1), 16)
    291       current_symbol = components.group(2)
    292 
    293       # Does it have an optional offset like: "foo(..)+0x2c"?
    294       components = offset_regexp.match(current_symbol)
    295       if components:
    296         current_symbol = components.group(1)
    297         offset = components.group(2)
    298         if offset:
    299           current_symbol_addr -= int(offset, 16)
    300 
    301     # Is it an disassembly line like:
    302     #   177b2:	b510      	push	{r4, lr}
    303     components = asm_regexp.match(line)
    304     if components:
    305       addr = components.group(1)
    306       target_addr = addrs[addr_index]
    307       i_addr = int(addr, 16)
    308       i_target = StripPC(int(target_addr, 16))
    309       if i_addr == i_target:
    310         result[target_addr] = (current_symbol, i_target - current_symbol_addr)
    311         addr_index += 1
    312         if addr_index >= len(addrs):
    313           break
    314   stream.close()
    315 
    316   return result
    317 
    318 
    319 def CallCppFilt(mangled_symbol):
    320   cmd = [ToolPath("c++filt")]
    321   process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    322   process.stdin.write(mangled_symbol)
    323   process.stdin.write("\n")
    324   process.stdin.close()
    325   demangled_symbol = process.stdout.readline().strip()
    326   process.stdout.close()
    327   return demangled_symbol
    328 
    329 def FormatSymbolWithOffset(symbol, offset):
    330   if offset == 0:
    331     return symbol
    332   return "%s+%d" % (symbol, offset)
    333