Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2013 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Module for looking up symbolic debugging information.
     18 
     19 The information can include symbol names, offsets, and source locations.
     20 """
     21 
     22 import glob
     23 import itertools
     24 import os
     25 import re
     26 import subprocess
     27 import zipfile
     28 
     29 CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)),
     30                           os.pardir, os.pardir, os.pardir, os.pardir)
     31 ANDROID_BUILD_TOP = CHROME_SRC
     32 SYMBOLS_DIR = CHROME_SRC
     33 CHROME_SYMBOLS_DIR = CHROME_SRC
     34 
     35 ARCH = "arm"
     36 
     37 TOOLCHAIN_INFO = None
     38 
     39 def Uname():
     40   """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
     41   uname = os.uname()[0]
     42   if uname == "Darwin":
     43     proc = os.uname()[-1]
     44     if proc == "i386" or proc == "x86_64":
     45       return "darwin-x86"
     46     return "darwin-ppc"
     47   if uname == "Linux":
     48     return "linux-x86"
     49   return uname
     50 
     51 def ToolPath(tool, toolchain_info=None):
     52   """Return a full qualified path to the specified tool"""
     53   # ToolPath looks for the tools in the completely incorrect directory.
     54   # This looks in the checked in android_tools.
     55   if ARCH == "arm":
     56     toolchain_source = "arm-linux-androideabi-4.6"
     57     toolchain_prefix = "arm-linux-androideabi"
     58     ndk = "ndk"
     59   elif ARCH == "arm64":
     60     toolchain_source = "aarch64-linux-android-4.9"
     61     toolchain_prefix = "aarch64-linux-android"
     62     ndk = "ndk"
     63   elif ARCH == "x86":
     64     toolchain_source = "x86-4.6"
     65     toolchain_prefix = "i686-android-linux"
     66     ndk = "ndk"
     67   elif ARCH == "x86_64":
     68     toolchain_source = "x86_64-4.9"
     69     toolchain_prefix = "x86_64-linux-android"
     70     ndk = "ndk"
     71   elif ARCH == "mips":
     72     toolchain_source = "mipsel-linux-android-4.6"
     73     toolchain_prefix = "mipsel-linux-android"
     74     ndk = "ndk"
     75   else:
     76     raise Exception("Could not find tool chain")
     77 
     78   toolchain_subdir = (
     79       "third_party/android_tools/%s/toolchains/%s/prebuilt/linux-x86_64/bin" %
     80        (ndk, toolchain_source))
     81 
     82   return os.path.join(CHROME_SRC,
     83                       toolchain_subdir,
     84                       toolchain_prefix + "-" + tool)
     85 
     86 def FindToolchain():
     87   """Look for the latest available toolchain
     88 
     89   Args:
     90     None
     91 
     92   Returns:
     93     A pair of strings containing toolchain label and target prefix.
     94   """
     95   global TOOLCHAIN_INFO
     96   if TOOLCHAIN_INFO is not None:
     97     return TOOLCHAIN_INFO
     98 
     99   ## Known toolchains, newer ones in the front.
    100   if ARCH == "arm64":
    101     gcc_version = "4.9"
    102     known_toolchains = [
    103       ("aarch64-linux-android-" + gcc_version, "aarch64", "aarch64-linux-android")
    104     ]
    105   elif ARCH == "arm":
    106     gcc_version = "4.6"
    107     known_toolchains = [
    108       ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi"),
    109     ]
    110   elif ARCH =="x86":
    111     known_toolchains = [
    112       ("i686-android-linux-4.4.3", "x86", "i686-android-linux")
    113     ]
    114   elif ARCH =="x86_64":
    115     known_toolchains = [
    116       ("x86_64-linux-android-4.9", "x86_64", "x86_64-linux-android")
    117     ]
    118   elif ARCH == "mips":
    119     gcc_version = "4.6"
    120     known_toolchains = [
    121       ("mipsel-linux-android-" + gcc_version, "mips", "mipsel-linux-android")
    122     ]
    123   else:
    124     known_toolchains = []
    125 
    126   # Look for addr2line to check for valid toolchain path.
    127   for (label, platform, target) in known_toolchains:
    128     toolchain_info = (label, platform, target);
    129     if os.path.exists(ToolPath("addr2line", toolchain_info)):
    130       TOOLCHAIN_INFO = toolchain_info
    131       print "Using toolchain from :" + ToolPath("", TOOLCHAIN_INFO)
    132       return toolchain_info
    133 
    134   raise Exception("Could not find tool chain")
    135 
    136 def GetAapt():
    137   """Returns the path to aapt.
    138 
    139   Args:
    140     None
    141 
    142   Returns:
    143     the pathname of the 'aapt' executable.
    144   """
    145   sdk_home = os.path.join('third_party', 'android_tools', 'sdk')
    146   sdk_home = os.environ.get('SDK_HOME', sdk_home)
    147   aapt_exe = glob.glob(os.path.join(sdk_home, 'build-tools', '*', 'aapt'))
    148   if not aapt_exe:
    149     return None
    150   return sorted(aapt_exe, key=os.path.getmtime, reverse=True)[0]
    151 
    152 def ApkMatchPackageName(aapt, apk_path, package_name):
    153   """Returns true the APK's package name matches package_name.
    154 
    155   Args:
    156     aapt: pathname for the 'aapt' executable.
    157     apk_path: pathname of the APK file.
    158     package_name: package name to match.
    159 
    160   Returns:
    161     True if the package name matches or aapt is None, False otherwise.
    162   """
    163   if not aapt:
    164     # Allow false positives
    165     return True
    166   aapt_output = subprocess.check_output(
    167       [aapt, 'dump', 'badging', apk_path]).split('\n')
    168   package_name_re = re.compile(r'package: .*name=\'(\S*)\'')
    169   for line in aapt_output:
    170     match = package_name_re.match(line)
    171     if match:
    172       return package_name == match.group(1)
    173   return False
    174 
    175 def PathListJoin(prefix_list, suffix_list):
    176    """Returns each prefix in prefix_list joined with each suffix in suffix list.
    177 
    178    Args:
    179      prefix_list: list of path prefixes.
    180      suffix_list: list of path suffixes.
    181 
    182    Returns:
    183      List of paths each of which joins a prefix with a suffix.
    184    """
    185    return [
    186        os.path.join(prefix, suffix)
    187        for prefix in prefix_list for suffix in suffix_list ]
    188 
    189 def GetCandidates(dirs, filepart, candidate_fun):
    190   """Returns a list of candidate filenames.
    191 
    192   Args:
    193     dirs: a list of the directory part of the pathname.
    194     filepart: the file part of the pathname.
    195     candidate_fun: a function to apply to each candidate, returns a list.
    196 
    197   Returns:
    198     A list of candidate files ordered by modification time, newest first.
    199   """
    200   out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out')
    201   out_dir = os.path.join(CHROME_SYMBOLS_DIR, out_dir)
    202   buildtype = os.environ.get('BUILDTYPE')
    203   if buildtype:
    204     buildtype_list = [ buildtype ]
    205   else:
    206     buildtype_list = [ 'Debug', 'Release' ]
    207 
    208   candidates = PathListJoin([out_dir], buildtype_list) + [CHROME_SYMBOLS_DIR]
    209   candidates = PathListJoin(candidates, dirs)
    210   candidates = PathListJoin(candidates, [filepart])
    211   candidates = list(
    212       itertools.chain.from_iterable(map(candidate_fun, candidates)))
    213   candidates = sorted(candidates, key=os.path.getmtime, reverse=True)
    214   return candidates
    215 
    216 def GetCandidateApks():
    217   """Returns a list of APKs which could contain the library.
    218 
    219   Args:
    220     None
    221 
    222   Returns:
    223     list of APK filename which could contain the library.
    224   """
    225   return GetCandidates(['apks'], '*.apk', glob.glob)
    226 
    227 def GetCrazyLib(apk_filename):
    228   """Returns the name of the first crazy library from this APK.
    229 
    230   Args:
    231     apk_filename: name of an APK file.
    232 
    233   Returns:
    234     Name of the first library which would be crazy loaded from this APK.
    235   """
    236   zip_file = zipfile.ZipFile(apk_filename, 'r')
    237   for filename in zip_file.namelist():
    238     match = re.match('lib/[^/]*/crazy.(lib.*[.]so)', filename)
    239     if match:
    240       return match.group(1)
    241 
    242 def GetMatchingApks(device_apk_name):
    243   """Find any APKs which match the package indicated by the device_apk_name.
    244 
    245   Args:
    246      device_apk_name: name of the APK on the device.
    247 
    248   Returns:
    249      A list of APK filenames which could contain the desired library.
    250   """
    251   match = re.match('(.*)-[0-9]+[.]apk$', device_apk_name)
    252   if not match:
    253     return None
    254   package_name = match.group(1)
    255   return filter(
    256       lambda candidate_apk:
    257           ApkMatchPackageName(GetAapt(), candidate_apk, package_name),
    258       GetCandidateApks())
    259 
    260 def MapDeviceApkToLibrary(device_apk_name):
    261   """Provide a library name which corresponds with device_apk_name.
    262 
    263   Args:
    264     device_apk_name: name of the APK on the device.
    265 
    266   Returns:
    267     Name of the library which corresponds to that APK.
    268   """
    269   matching_apks = GetMatchingApks(device_apk_name)
    270   for matching_apk in matching_apks:
    271     crazy_lib = GetCrazyLib(matching_apk)
    272     if crazy_lib:
    273       return crazy_lib
    274 
    275 def GetCandidateLibraries(library_name):
    276   """Returns a list of candidate library filenames.
    277 
    278   Args:
    279     library_name: basename of the library to match.
    280 
    281   Returns:
    282     A list of matching library filenames for library_name.
    283   """
    284   return GetCandidates(
    285       ['lib', 'lib.target'], library_name,
    286       lambda filename: filter(os.path.exists, [filename]))
    287 
    288 def TranslateLibPath(lib):
    289   # SymbolInformation(lib, addr) receives lib as the path from symbols
    290   # root to the symbols file. This needs to be translated to point to the
    291   # correct .so path. If the user doesn't explicitly specify which directory to
    292   # use, then use the most recently updated one in one of the known directories.
    293   # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it
    294   # untranslated in case it is an Android symbol in SYMBOLS_DIR.
    295   library_name = os.path.basename(lib)
    296 
    297   # The filename in the stack trace maybe an APK name rather than a library
    298   # name. This happens when the library was loaded directly from inside the
    299   # APK. If this is the case we try to figure out the library name by looking
    300   # for a matching APK file and finding the name of the library in contains.
    301   # The name of the APK file on the device is of the form
    302   # <package_name>-<number>.apk. The APK file on the host may have any name
    303   # so we look at the APK badging to see if the package name matches.
    304   if re.search('-[0-9]+[.]apk$', library_name):
    305     mapping = MapDeviceApkToLibrary(library_name)
    306     if mapping:
    307       library_name = mapping
    308 
    309   candidate_libraries = GetCandidateLibraries(library_name)
    310   if not candidate_libraries:
    311     return lib
    312 
    313   library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR)
    314   return '/' + library_path
    315 
    316 def SymbolInformation(lib, addr, get_detailed_info):
    317   """Look up symbol information about an address.
    318 
    319   Args:
    320     lib: library (or executable) pathname containing symbols
    321     addr: string hexidecimal address
    322 
    323   Returns:
    324     A list of the form [(source_symbol, source_location,
    325     object_symbol_with_offset)].
    326 
    327     If the function has been inlined then the list may contain
    328     more than one element with the symbols for the most deeply
    329     nested inlined location appearing first.  The list is
    330     always non-empty, even if no information is available.
    331 
    332     Usually you want to display the source_location and
    333     object_symbol_with_offset from the last element in the list.
    334   """
    335   lib = TranslateLibPath(lib)
    336   info = SymbolInformationForSet(lib, set([addr]), get_detailed_info)
    337   return (info and info.get(addr)) or [(None, None, None)]
    338 
    339 
    340 def SymbolInformationForSet(lib, unique_addrs, get_detailed_info):
    341   """Look up symbol information for a set of addresses from the given library.
    342 
    343   Args:
    344     lib: library (or executable) pathname containing symbols
    345     unique_addrs: set of hexidecimal addresses
    346 
    347   Returns:
    348     A dictionary of the form {addr: [(source_symbol, source_location,
    349     object_symbol_with_offset)]} where each address has a list of
    350     associated symbols and locations.  The list is always non-empty.
    351 
    352     If the function has been inlined then the list may contain
    353     more than one element with the symbols for the most deeply
    354     nested inlined location appearing first.  The list is
    355     always non-empty, even if no information is available.
    356 
    357     Usually you want to display the source_location and
    358     object_symbol_with_offset from the last element in the list.
    359   """
    360   if not lib:
    361     return None
    362 
    363   addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
    364   if not addr_to_line:
    365     return None
    366 
    367   if get_detailed_info:
    368     addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
    369     if not addr_to_objdump:
    370       return None
    371   else:
    372     addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs)
    373 
    374   result = {}
    375   for addr in unique_addrs:
    376     source_info = addr_to_line.get(addr)
    377     if not source_info:
    378       source_info = [(None, None)]
    379     if addr in addr_to_objdump:
    380       (object_symbol, object_offset) = addr_to_objdump.get(addr)
    381       object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
    382                                                          object_offset)
    383     else:
    384       object_symbol_with_offset = None
    385     result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
    386         for (source_symbol, source_location) in source_info]
    387 
    388   return result
    389 
    390 
    391 class MemoizedForSet(object):
    392   def __init__(self, fn):
    393     self.fn = fn
    394     self.cache = {}
    395 
    396   def __call__(self, lib, unique_addrs):
    397     lib_cache = self.cache.setdefault(lib, {})
    398 
    399     no_cache = filter(lambda x: x not in lib_cache, unique_addrs)
    400     if no_cache:
    401       lib_cache.update((k, None) for k in no_cache)
    402       result = self.fn(lib, no_cache)
    403       if result:
    404         lib_cache.update(result)
    405 
    406     return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k])
    407 
    408 
    409 @MemoizedForSet
    410 def CallAddr2LineForSet(lib, unique_addrs):
    411   """Look up line and symbol information for a set of addresses.
    412 
    413   Args:
    414     lib: library (or executable) pathname containing symbols
    415     unique_addrs: set of string hexidecimal addresses look up.
    416 
    417   Returns:
    418     A dictionary of the form {addr: [(symbol, file:line)]} where
    419     each address has a list of associated symbols and locations
    420     or an empty list if no symbol information was found.
    421 
    422     If the function has been inlined then the list may contain
    423     more than one element with the symbols for the most deeply
    424     nested inlined location appearing first.
    425   """
    426   if not lib:
    427     return None
    428 
    429 
    430   symbols = SYMBOLS_DIR + lib
    431   if not os.path.isfile(symbols):
    432     return None
    433 
    434   (label, platform, target) = FindToolchain()
    435   cmd = [ToolPath("addr2line"), "--functions", "--inlines",
    436       "--demangle", "--exe=" + symbols]
    437   child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    438 
    439   result = {}
    440   addrs = sorted(unique_addrs)
    441   for addr in addrs:
    442     child.stdin.write("0x%s\n" % addr)
    443     child.stdin.flush()
    444     records = []
    445     first = True
    446     while True:
    447       symbol = child.stdout.readline().strip()
    448       if symbol == "??":
    449         symbol = None
    450       location = child.stdout.readline().strip()
    451       if location == "??:0":
    452         location = None
    453       if symbol is None and location is None:
    454         break
    455       records.append((symbol, location))
    456       if first:
    457         # Write a blank line as a sentinel so we know when to stop
    458         # reading inlines from the output.
    459         # The blank line will cause addr2line to emit "??\n??:0\n".
    460         child.stdin.write("\n")
    461         first = False
    462     result[addr] = records
    463   child.stdin.close()
    464   child.stdout.close()
    465   return result
    466 
    467 
    468 def StripPC(addr):
    469   """Strips the Thumb bit a program counter address when appropriate.
    470 
    471   Args:
    472     addr: the program counter address
    473 
    474   Returns:
    475     The stripped program counter address.
    476   """
    477   global ARCH
    478 
    479   if ARCH == "arm":
    480     return addr & ~1
    481   return addr
    482 
    483 @MemoizedForSet
    484 def CallObjdumpForSet(lib, unique_addrs):
    485   """Use objdump to find out the names of the containing functions.
    486 
    487   Args:
    488     lib: library (or executable) pathname containing symbols
    489     unique_addrs: set of string hexidecimal addresses to find the functions for.
    490 
    491   Returns:
    492     A dictionary of the form {addr: (string symbol, offset)}.
    493   """
    494   if not lib:
    495     return None
    496 
    497   symbols = SYMBOLS_DIR + lib
    498   if not os.path.exists(symbols):
    499     return None
    500 
    501   symbols = SYMBOLS_DIR + lib
    502   if not os.path.exists(symbols):
    503     return None
    504 
    505   result = {}
    506 
    507   # Function lines look like:
    508   #   000177b0 <android::IBinder::~IBinder()+0x2c>:
    509   # We pull out the address and function first. Then we check for an optional
    510   # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
    511   func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
    512   offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
    513 
    514   # A disassembly line looks like:
    515   #   177b2:  b510        push  {r4, lr}
    516   asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
    517 
    518   for target_addr in unique_addrs:
    519     start_addr_dec = str(StripPC(int(target_addr, 16)))
    520     stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8)
    521     cmd = [ToolPath("objdump"),
    522            "--section=.text",
    523            "--demangle",
    524            "--disassemble",
    525            "--start-address=" + start_addr_dec,
    526            "--stop-address=" + stop_addr_dec,
    527            symbols]
    528 
    529     current_symbol = None    # The current function symbol in the disassembly.
    530     current_symbol_addr = 0  # The address of the current function.
    531 
    532     stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
    533     for line in stream:
    534       # Is it a function line like:
    535       #   000177b0 <android::IBinder::~IBinder()>:
    536       components = func_regexp.match(line)
    537       if components:
    538         # This is a new function, so record the current function and its address.
    539         current_symbol_addr = int(components.group(1), 16)
    540         current_symbol = components.group(2)
    541 
    542         # Does it have an optional offset like: "foo(..)+0x2c"?
    543         components = offset_regexp.match(current_symbol)
    544         if components:
    545           current_symbol = components.group(1)
    546           offset = components.group(2)
    547           if offset:
    548             current_symbol_addr -= int(offset, 16)
    549 
    550       # Is it an disassembly line like:
    551       #   177b2:  b510        push  {r4, lr}
    552       components = asm_regexp.match(line)
    553       if components:
    554         addr = components.group(1)
    555         i_addr = int(addr, 16)
    556         i_target = StripPC(int(target_addr, 16))
    557         if i_addr == i_target:
    558           result[target_addr] = (current_symbol, i_target - current_symbol_addr)
    559     stream.close()
    560 
    561   return result
    562 
    563 
    564 def CallCppFilt(mangled_symbol):
    565   cmd = [ToolPath("c++filt")]
    566   process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    567   process.stdin.write(mangled_symbol)
    568   process.stdin.write("\n")
    569   process.stdin.close()
    570   demangled_symbol = process.stdout.readline().strip()
    571   process.stdout.close()
    572   return demangled_symbol
    573 
    574 def FormatSymbolWithOffset(symbol, offset):
    575   if offset == 0:
    576     return symbol
    577   return "%s+%d" % (symbol, offset)
    578