Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2013 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Module for looking up symbolic debugging information.
     18 
     19 The information can include symbol names, offsets, and source locations.
     20 """
     21 
     22 import os
     23 import re
     24 import subprocess
     25 
     26 CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)),
     27                           os.pardir, os.pardir, os.pardir, os.pardir)
     28 ANDROID_BUILD_TOP = CHROME_SRC
     29 SYMBOLS_DIR = CHROME_SRC
     30 CHROME_SYMBOLS_DIR = CHROME_SRC
     31 
     32 ARCH = "arm"
     33 
     34 TOOLCHAIN_INFO = None
     35 
     36 def Uname():
     37   """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
     38   uname = os.uname()[0]
     39   if uname == "Darwin":
     40     proc = os.uname()[-1]
     41     if proc == "i386" or proc == "x86_64":
     42       return "darwin-x86"
     43     return "darwin-ppc"
     44   if uname == "Linux":
     45     return "linux-x86"
     46   return uname
     47 
     48 def ToolPath(tool, toolchain_info=None):
     49   """Return a full qualified path to the specified tool"""
     50   # ToolPath looks for the tools in the completely incorrect directory.
     51   # This looks in the checked in android_tools.
     52   if ARCH == "arm":
     53     toolchain_source = "arm-linux-androideabi-4.6"
     54     toolchain_prefix = "arm-linux-androideabi"
     55     ndk = "ndk"
     56   elif ARCH == "arm64":
     57     toolchain_source = "aarch64-linux-android-4.9"
     58     toolchain_prefix = "aarch64-linux-android"
     59     ndk = "ndk"
     60   elif ARCH == "x86":
     61     toolchain_source = "x86-4.6"
     62     toolchain_prefix = "i686-android-linux"
     63     ndk = "ndk"
     64   elif ARCH == "x86_64":
     65     toolchain_source = "x86_64-4.9"
     66     toolchain_prefix = "x86_64-linux-android"
     67     ndk = "ndk"
     68   elif ARCH == "mips":
     69     toolchain_source = "mipsel-linux-android-4.6"
     70     toolchain_prefix = "mipsel-linux-android"
     71     ndk = "ndk"
     72   else:
     73     raise Exception("Could not find tool chain")
     74 
     75   toolchain_subdir = (
     76       "third_party/android_tools/%s/toolchains/%s/prebuilt/linux-x86_64/bin" %
     77        (ndk, toolchain_source))
     78 
     79   return os.path.join(CHROME_SRC,
     80                       toolchain_subdir,
     81                       toolchain_prefix + "-" + tool)
     82 
     83 def FindToolchain():
     84   """Look for the latest available toolchain
     85 
     86   Args:
     87     None
     88 
     89   Returns:
     90     A pair of strings containing toolchain label and target prefix.
     91   """
     92   global TOOLCHAIN_INFO
     93   if TOOLCHAIN_INFO is not None:
     94     return TOOLCHAIN_INFO
     95 
     96   ## Known toolchains, newer ones in the front.
     97   if ARCH == "arm64":
     98     gcc_version = "4.9"
     99     known_toolchains = [
    100       ("aarch64-linux-android-" + gcc_version, "aarch64", "aarch64-linux-android")
    101     ]
    102   elif ARCH == "arm":
    103     gcc_version = "4.6"
    104     known_toolchains = [
    105       ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi"),
    106     ]
    107   elif ARCH =="x86":
    108     known_toolchains = [
    109       ("i686-android-linux-4.4.3", "x86", "i686-android-linux")
    110     ]
    111   elif ARCH =="x86_64":
    112     known_toolchains = [
    113       ("x86_64-linux-android-4.9", "x86_64", "x86_64-linux-android")
    114     ]
    115   elif ARCH == "mips":
    116     gcc_version = "4.6"
    117     known_toolchains = [
    118       ("mipsel-linux-android-" + gcc_version, "mips", "mipsel-linux-android")
    119     ]
    120   else:
    121     known_toolchains = []
    122 
    123   # Look for addr2line to check for valid toolchain path.
    124   for (label, platform, target) in known_toolchains:
    125     toolchain_info = (label, platform, target);
    126     if os.path.exists(ToolPath("addr2line", toolchain_info)):
    127       TOOLCHAIN_INFO = toolchain_info
    128       print "Using toolchain from :" + ToolPath("", TOOLCHAIN_INFO)
    129       return toolchain_info
    130 
    131   raise Exception("Could not find tool chain")
    132 
    133 def TranslateLibPath(lib):
    134   # SymbolInformation(lib, addr) receives lib as the path from symbols
    135   # root to the symbols file. This needs to be translated to point to the
    136   # correct .so path. If the user doesn't explicitly specify which directory to
    137   # use, then use the most recently updated one in one of the known directories.
    138   # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it
    139   # untranslated in case it is an Android symbol in SYMBOLS_DIR.
    140   library_name = os.path.basename(lib)
    141   out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out')
    142   candidate_dirs = ['.',
    143                     os.path.join(out_dir, 'Debug', 'lib'),
    144                     os.path.join(out_dir, 'Debug', 'lib.target'),
    145                     os.path.join(out_dir, 'Release', 'lib'),
    146                     os.path.join(out_dir, 'Release', 'lib.target'),
    147                     ]
    148 
    149   candidate_libraries = map(
    150       lambda d: ('%s/%s/%s' % (CHROME_SYMBOLS_DIR, d, library_name)),
    151       candidate_dirs)
    152   candidate_libraries = filter(os.path.exists, candidate_libraries)
    153   candidate_libraries = sorted(candidate_libraries,
    154                                key=os.path.getmtime, reverse=True)
    155 
    156   if not candidate_libraries:
    157     return lib
    158 
    159   library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR)
    160   return '/' + library_path
    161 
    162 def SymbolInformation(lib, addr, get_detailed_info):
    163   """Look up symbol information about an address.
    164 
    165   Args:
    166     lib: library (or executable) pathname containing symbols
    167     addr: string hexidecimal address
    168 
    169   Returns:
    170     A list of the form [(source_symbol, source_location,
    171     object_symbol_with_offset)].
    172 
    173     If the function has been inlined then the list may contain
    174     more than one element with the symbols for the most deeply
    175     nested inlined location appearing first.  The list is
    176     always non-empty, even if no information is available.
    177 
    178     Usually you want to display the source_location and
    179     object_symbol_with_offset from the last element in the list.
    180   """
    181   lib = TranslateLibPath(lib)
    182   info = SymbolInformationForSet(lib, set([addr]), get_detailed_info)
    183   return (info and info.get(addr)) or [(None, None, None)]
    184 
    185 
    186 def SymbolInformationForSet(lib, unique_addrs, get_detailed_info):
    187   """Look up symbol information for a set of addresses from the given library.
    188 
    189   Args:
    190     lib: library (or executable) pathname containing symbols
    191     unique_addrs: set of hexidecimal addresses
    192 
    193   Returns:
    194     A dictionary of the form {addr: [(source_symbol, source_location,
    195     object_symbol_with_offset)]} where each address has a list of
    196     associated symbols and locations.  The list is always non-empty.
    197 
    198     If the function has been inlined then the list may contain
    199     more than one element with the symbols for the most deeply
    200     nested inlined location appearing first.  The list is
    201     always non-empty, even if no information is available.
    202 
    203     Usually you want to display the source_location and
    204     object_symbol_with_offset from the last element in the list.
    205   """
    206   if not lib:
    207     return None
    208 
    209   addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
    210   if not addr_to_line:
    211     return None
    212 
    213   if get_detailed_info:
    214     addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
    215     if not addr_to_objdump:
    216       return None
    217   else:
    218     addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs)
    219 
    220   result = {}
    221   for addr in unique_addrs:
    222     source_info = addr_to_line.get(addr)
    223     if not source_info:
    224       source_info = [(None, None)]
    225     if addr in addr_to_objdump:
    226       (object_symbol, object_offset) = addr_to_objdump.get(addr)
    227       object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
    228                                                          object_offset)
    229     else:
    230       object_symbol_with_offset = None
    231     result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
    232         for (source_symbol, source_location) in source_info]
    233 
    234   return result
    235 
    236 
    237 class MemoizedForSet(object):
    238   def __init__(self, fn):
    239     self.fn = fn
    240     self.cache = {}
    241 
    242   def __call__(self, lib, unique_addrs):
    243     lib_cache = self.cache.setdefault(lib, {})
    244 
    245     no_cache = filter(lambda x: x not in lib_cache, unique_addrs)
    246     if no_cache:
    247       lib_cache.update((k, None) for k in no_cache)
    248       result = self.fn(lib, no_cache)
    249       if result:
    250         lib_cache.update(result)
    251 
    252     return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k])
    253 
    254 
    255 @MemoizedForSet
    256 def CallAddr2LineForSet(lib, unique_addrs):
    257   """Look up line and symbol information for a set of addresses.
    258 
    259   Args:
    260     lib: library (or executable) pathname containing symbols
    261     unique_addrs: set of string hexidecimal addresses look up.
    262 
    263   Returns:
    264     A dictionary of the form {addr: [(symbol, file:line)]} where
    265     each address has a list of associated symbols and locations
    266     or an empty list if no symbol information was found.
    267 
    268     If the function has been inlined then the list may contain
    269     more than one element with the symbols for the most deeply
    270     nested inlined location appearing first.
    271   """
    272   if not lib:
    273     return None
    274 
    275 
    276   symbols = SYMBOLS_DIR + lib
    277   if not os.path.isfile(symbols):
    278     return None
    279 
    280   (label, platform, target) = FindToolchain()
    281   cmd = [ToolPath("addr2line"), "--functions", "--inlines",
    282       "--demangle", "--exe=" + symbols]
    283   child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    284 
    285   result = {}
    286   addrs = sorted(unique_addrs)
    287   for addr in addrs:
    288     child.stdin.write("0x%s\n" % addr)
    289     child.stdin.flush()
    290     records = []
    291     first = True
    292     while True:
    293       symbol = child.stdout.readline().strip()
    294       if symbol == "??":
    295         symbol = None
    296       location = child.stdout.readline().strip()
    297       if location == "??:0":
    298         location = None
    299       if symbol is None and location is None:
    300         break
    301       records.append((symbol, location))
    302       if first:
    303         # Write a blank line as a sentinel so we know when to stop
    304         # reading inlines from the output.
    305         # The blank line will cause addr2line to emit "??\n??:0\n".
    306         child.stdin.write("\n")
    307         first = False
    308     result[addr] = records
    309   child.stdin.close()
    310   child.stdout.close()
    311   return result
    312 
    313 
    314 def StripPC(addr):
    315   """Strips the Thumb bit a program counter address when appropriate.
    316 
    317   Args:
    318     addr: the program counter address
    319 
    320   Returns:
    321     The stripped program counter address.
    322   """
    323   global ARCH
    324 
    325   if ARCH == "arm":
    326     return addr & ~1
    327   return addr
    328 
    329 @MemoizedForSet
    330 def CallObjdumpForSet(lib, unique_addrs):
    331   """Use objdump to find out the names of the containing functions.
    332 
    333   Args:
    334     lib: library (or executable) pathname containing symbols
    335     unique_addrs: set of string hexidecimal addresses to find the functions for.
    336 
    337   Returns:
    338     A dictionary of the form {addr: (string symbol, offset)}.
    339   """
    340   if not lib:
    341     return None
    342 
    343   symbols = SYMBOLS_DIR + lib
    344   if not os.path.exists(symbols):
    345     return None
    346 
    347   symbols = SYMBOLS_DIR + lib
    348   if not os.path.exists(symbols):
    349     return None
    350 
    351   result = {}
    352 
    353   # Function lines look like:
    354   #   000177b0 <android::IBinder::~IBinder()+0x2c>:
    355   # We pull out the address and function first. Then we check for an optional
    356   # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
    357   func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
    358   offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
    359 
    360   # A disassembly line looks like:
    361   #   177b2:  b510        push  {r4, lr}
    362   asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
    363 
    364   for target_addr in unique_addrs:
    365     start_addr_dec = str(StripPC(int(target_addr, 16)))
    366     stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8)
    367     cmd = [ToolPath("objdump"),
    368            "--section=.text",
    369            "--demangle",
    370            "--disassemble",
    371            "--start-address=" + start_addr_dec,
    372            "--stop-address=" + stop_addr_dec,
    373            symbols]
    374 
    375     current_symbol = None    # The current function symbol in the disassembly.
    376     current_symbol_addr = 0  # The address of the current function.
    377 
    378     stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    379     for line in stream:
    380       # Is it a function line like:
    381       #   000177b0 <android::IBinder::~IBinder()>:
    382       components = func_regexp.match(line)
    383       if components:
    384         # This is a new function, so record the current function and its address.
    385         current_symbol_addr = int(components.group(1), 16)
    386         current_symbol = components.group(2)
    387 
    388         # Does it have an optional offset like: "foo(..)+0x2c"?
    389         components = offset_regexp.match(current_symbol)
    390         if components:
    391           current_symbol = components.group(1)
    392           offset = components.group(2)
    393           if offset:
    394             current_symbol_addr -= int(offset, 16)
    395 
    396       # Is it an disassembly line like:
    397       #   177b2:  b510        push  {r4, lr}
    398       components = asm_regexp.match(line)
    399       if components:
    400         addr = components.group(1)
    401         i_addr = int(addr, 16)
    402         i_target = StripPC(int(target_addr, 16))
    403         if i_addr == i_target:
    404           result[target_addr] = (current_symbol, i_target - current_symbol_addr)
    405     stream.close()
    406 
    407   return result
    408 
    409 
    410 def CallCppFilt(mangled_symbol):
    411   cmd = [ToolPath("c++filt")]
    412   process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    413   process.stdin.write(mangled_symbol)
    414   process.stdin.write("\n")
    415   process.stdin.close()
    416   demangled_symbol = process.stdout.readline().strip()
    417   process.stdout.close()
    418   return demangled_symbol
    419 
    420 def FormatSymbolWithOffset(symbol, offset):
    421   if offset == 0:
    422     return symbol
    423   return "%s+%d" % (symbol, offset)
    424