Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2013 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Module for looking up symbolic debugging information.
     18 
     19 The information can include symbol names, offsets, and source locations.
     20 """
     21 
     22 import os
     23 import re
     24 import subprocess
     25 
     26 ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
     27 if not ANDROID_BUILD_TOP:
     28   ANDROID_BUILD_TOP = "."
     29 
     30 def FindSymbolsDir():
     31   saveddir = os.getcwd()
     32   os.chdir(ANDROID_BUILD_TOP)
     33   try:
     34     cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
     35            "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
     36            "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
     37     stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
     38     return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
     39   finally:
     40     os.chdir(saveddir)
     41 
     42 SYMBOLS_DIR = FindSymbolsDir()
     43 
     44 ARCH = "arm"
     45 
     46 TOOLCHAIN_INFO = None
     47 
     48 def Uname():
     49   """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
     50   uname = os.uname()[0]
     51   if uname == "Darwin":
     52     proc = os.uname()[-1]
     53     if proc == "i386" or proc == "x86_64":
     54       return "darwin-x86"
     55     return "darwin-ppc"
     56   if uname == "Linux":
     57     return "linux-x86"
     58   return uname
     59 
     60 def ToolPath(tool, toolchain_info=None):
     61   """Return a full qualified path to the specified tool"""
     62   if not toolchain_info:
     63     toolchain_info = FindToolchain()
     64   (label, platform, target) = toolchain_info
     65   return os.path.join(ANDROID_BUILD_TOP, "prebuilts/gcc", Uname(), platform, label, "bin",
     66                      target + "-" + tool)
     67 
     68 def FindToolchain():
     69   """Look for the latest available toolchain
     70 
     71   Args:
     72     None
     73 
     74   Returns:
     75     A pair of strings containing toolchain label and target prefix.
     76   """
     77   global TOOLCHAIN_INFO
     78   if TOOLCHAIN_INFO is not None:
     79     return TOOLCHAIN_INFO
     80 
     81   ## Known toolchains, newer ones in the front.
     82   if ARCH == "arm":
     83     gcc_version = os.environ["TARGET_GCC_VERSION"]
     84     known_toolchains = [
     85       ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi"),
     86     ]
     87   elif ARCH =="x86":
     88     known_toolchains = [
     89       ("i686-android-linux-4.4.3", "x86", "i686-android-linux")
     90     ]
     91   else:
     92     known_toolchains = []
     93 
     94   # Look for addr2line to check for valid toolchain path.
     95   for (label, platform, target) in known_toolchains:
     96     toolchain_info = (label, platform, target);
     97     if os.path.exists(ToolPath("addr2line", toolchain_info)):
     98       TOOLCHAIN_INFO = toolchain_info
     99       return toolchain_info
    100 
    101   raise Exception("Could not find tool chain")
    102 
    103 def SymbolInformation(lib, addr):
    104   """Look up symbol information about an address.
    105 
    106   Args:
    107     lib: library (or executable) pathname containing symbols
    108     addr: string hexidecimal address
    109 
    110   Returns:
    111     A list of the form [(source_symbol, source_location,
    112     object_symbol_with_offset)].
    113 
    114     If the function has been inlined then the list may contain
    115     more than one element with the symbols for the most deeply
    116     nested inlined location appearing first.  The list is
    117     always non-empty, even if no information is available.
    118 
    119     Usually you want to display the source_location and
    120     object_symbol_with_offset from the last element in the list.
    121   """
    122   info = SymbolInformationForSet(lib, set([addr]))
    123   return (info and info.get(addr)) or [(None, None, None)]
    124 
    125 
    126 def SymbolInformationForSet(lib, unique_addrs):
    127   """Look up symbol information for a set of addresses from the given library.
    128 
    129   Args:
    130     lib: library (or executable) pathname containing symbols
    131     unique_addrs: set of hexidecimal addresses
    132 
    133   Returns:
    134     A dictionary of the form {addr: [(source_symbol, source_location,
    135     object_symbol_with_offset)]} where each address has a list of
    136     associated symbols and locations.  The list is always non-empty.
    137 
    138     If the function has been inlined then the list may contain
    139     more than one element with the symbols for the most deeply
    140     nested inlined location appearing first.  The list is
    141     always non-empty, even if no information is available.
    142 
    143     Usually you want to display the source_location and
    144     object_symbol_with_offset from the last element in the list.
    145   """
    146   if not lib:
    147     return None
    148 
    149   addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
    150   if not addr_to_line:
    151     return None
    152 
    153   addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
    154   if not addr_to_objdump:
    155     return None
    156 
    157   result = {}
    158   for addr in unique_addrs:
    159     source_info = addr_to_line.get(addr)
    160     if not source_info:
    161       source_info = [(None, None)]
    162     if addr in addr_to_objdump:
    163       (object_symbol, object_offset) = addr_to_objdump.get(addr)
    164       object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
    165                                                          object_offset)
    166     else:
    167       object_symbol_with_offset = None
    168     result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
    169         for (source_symbol, source_location) in source_info]
    170 
    171   return result
    172 
    173 
    174 def CallAddr2LineForSet(lib, unique_addrs):
    175   """Look up line and symbol information for a set of addresses.
    176 
    177   Args:
    178     lib: library (or executable) pathname containing symbols
    179     unique_addrs: set of string hexidecimal addresses look up.
    180 
    181   Returns:
    182     A dictionary of the form {addr: [(symbol, file:line)]} where
    183     each address has a list of associated symbols and locations
    184     or an empty list if no symbol information was found.
    185 
    186     If the function has been inlined then the list may contain
    187     more than one element with the symbols for the most deeply
    188     nested inlined location appearing first.
    189   """
    190   if not lib:
    191     return None
    192 
    193 
    194   symbols = SYMBOLS_DIR + lib
    195   if not os.path.exists(symbols):
    196     return None
    197 
    198   (label, platform, target) = FindToolchain()
    199   cmd = [ToolPath("addr2line"), "--functions", "--inlines",
    200       "--demangle", "--exe=" + symbols]
    201   child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    202 
    203   result = {}
    204   addrs = sorted(unique_addrs)
    205   for addr in addrs:
    206     child.stdin.write("0x%s\n" % addr)
    207     child.stdin.flush()
    208     records = []
    209     first = True
    210     while True:
    211       symbol = child.stdout.readline().strip()
    212       if symbol == "??":
    213         symbol = None
    214       location = child.stdout.readline().strip()
    215       if location == "??:0":
    216         location = None
    217       if symbol is None and location is None:
    218         break
    219       records.append((symbol, location))
    220       if first:
    221         # Write a blank line as a sentinel so we know when to stop
    222         # reading inlines from the output.
    223         # The blank line will cause addr2line to emit "??\n??:0\n".
    224         child.stdin.write("\n")
    225         first = False
    226     result[addr] = records
    227   child.stdin.close()
    228   child.stdout.close()
    229   return result
    230 
    231 
    232 def StripPC(addr):
    233   """Strips the Thumb bit a program counter address when appropriate.
    234 
    235   Args:
    236     addr: the program counter address
    237 
    238   Returns:
    239     The stripped program counter address.
    240   """
    241   global ARCH
    242 
    243   if ARCH == "arm":
    244     return addr & ~1
    245   return addr
    246 
    247 def CallObjdumpForSet(lib, unique_addrs):
    248   """Use objdump to find out the names of the containing functions.
    249 
    250   Args:
    251     lib: library (or executable) pathname containing symbols
    252     unique_addrs: set of string hexidecimal addresses to find the functions for.
    253 
    254   Returns:
    255     A dictionary of the form {addr: (string symbol, offset)}.
    256   """
    257   if not lib:
    258     return None
    259 
    260   symbols = SYMBOLS_DIR + lib
    261   if not os.path.exists(symbols):
    262     return None
    263 
    264   symbols = SYMBOLS_DIR + lib
    265   if not os.path.exists(symbols):
    266     return None
    267 
    268   addrs = sorted(unique_addrs)
    269   start_addr_dec = str(StripPC(int(addrs[0], 16)))
    270   stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
    271   cmd = [ToolPath("objdump"),
    272          "--section=.text",
    273          "--demangle",
    274          "--disassemble",
    275          "--start-address=" + start_addr_dec,
    276          "--stop-address=" + stop_addr_dec,
    277          symbols]
    278 
    279   # Function lines look like:
    280   #   000177b0 <android::IBinder::~IBinder()+0x2c>:
    281   # We pull out the address and function first. Then we check for an optional
    282   # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
    283   func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
    284   offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
    285 
    286   # A disassembly line looks like:
    287   #   177b2:	b510      	push	{r4, lr}
    288   asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
    289 
    290   current_symbol = None    # The current function symbol in the disassembly.
    291   current_symbol_addr = 0  # The address of the current function.
    292   addr_index = 0  # The address that we are currently looking for.
    293 
    294   stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    295   result = {}
    296   for line in stream:
    297     # Is it a function line like:
    298     #   000177b0 <android::IBinder::~IBinder()>:
    299     components = func_regexp.match(line)
    300     if components:
    301       # This is a new function, so record the current function and its address.
    302       current_symbol_addr = int(components.group(1), 16)
    303       current_symbol = components.group(2)
    304 
    305       # Does it have an optional offset like: "foo(..)+0x2c"?
    306       components = offset_regexp.match(current_symbol)
    307       if components:
    308         current_symbol = components.group(1)
    309         offset = components.group(2)
    310         if offset:
    311           current_symbol_addr -= int(offset, 16)
    312 
    313     # Is it an disassembly line like:
    314     #   177b2:	b510      	push	{r4, lr}
    315     components = asm_regexp.match(line)
    316     if components:
    317       addr = components.group(1)
    318       target_addr = addrs[addr_index]
    319       i_addr = int(addr, 16)
    320       i_target = StripPC(int(target_addr, 16))
    321       if i_addr == i_target:
    322         result[target_addr] = (current_symbol, i_target - current_symbol_addr)
    323         addr_index += 1
    324         if addr_index >= len(addrs):
    325           break
    326   stream.close()
    327 
    328   return result
    329 
    330 
    331 def CallCppFilt(mangled_symbol):
    332   cmd = [ToolPath("c++filt")]
    333   process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    334   process.stdin.write(mangled_symbol)
    335   process.stdin.write("\n")
    336   process.stdin.close()
    337   demangled_symbol = process.stdout.readline().strip()
    338   process.stdout.close()
    339   return demangled_symbol
    340 
    341 def FormatSymbolWithOffset(symbol, offset):
    342   if offset == 0:
    343     return symbol
    344   return "%s+%d" % (symbol, offset)
    345