Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2017 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Generates a human-interpretable view of a native heap dump from 'am dumpheap -n'."""
     18 
     19 import logging
     20 import os
     21 import os.path
     22 import re
     23 import subprocess
     24 import sys
     25 import zipfile
     26 
     27 class Args:
     28   _usage = """
     29 Usage:
     30 1. Collect a native heap dump from the device. For example:
     31    $ adb shell stop
     32    $ adb shell setprop libc.debug.malloc.program app_process
     33    $ adb shell setprop libc.debug.malloc.options backtrace=64
     34    $ adb shell start
     35     (launch and use app)
     36    $ adb shell am dumpheap -n <pid> /data/local/tmp/native_heap.txt
     37    $ adb pull /data/local/tmp/native_heap.txt
     38 
     39 2. Run the viewer:
     40    $ python native_heapdump_viewer.py [options] native_heap.txt
     41       [--verbose]: verbose output
     42       [--html]: interactive html output
     43       [--reverse]: reverse the backtraces (start the tree from the leaves)
     44       [--symbols SYMBOL_DIR] SYMBOL_DIR is the directory containing the .so files with symbols.
     45                  Defaults to $ANDROID_PRODUCT_OUT/symbols
     46       [--app-symbols SYMBOL_DIR] SYMBOL_DIR is the directory containing the app APK and so files.
     47                  Defaults to the current directory.
     48    This outputs a file with lines of the form:
     49 
     50       5831776  29.09% 100.00%    10532     71b07bc0b0 /system/lib64/libandroid_runtime.so Typeface_createFromArray frameworks/base/core/jni/android/graphics/Typeface.cpp:68
     51 
     52    5831776 is the total number of bytes allocated at this stack frame, which
     53    is 29.09% of the total number of bytes allocated and 100.00% of the parent
     54    frame's bytes allocated. 10532 is the total number of allocations at this
     55    stack frame. 71b07bc0b0 is the address of the stack frame.
     56 """
     57 
     58   def __init__(self):
     59     self.verbose = False
     60     self.html_output = False
     61     self.reverse_frames = False
     62     product_out = os.getenv("ANDROID_PRODUCT_OUT")
     63     if product_out:
     64       self.symboldir = product_out + "/symbols"
     65     else:
     66       self.symboldir = "./symbols"
     67     self.app_symboldir = ""
     68 
     69     i = 1
     70     extra_args = []
     71     while i < len(sys.argv):
     72       if sys.argv[i] == "--symbols":
     73         i += 1
     74         self.symboldir = sys.argv[i] + "/"
     75       elif sys.argv[i] == "--app-symbols":
     76         i += 1
     77         self.app_symboldir = sys.argv[i] + "/"
     78       elif sys.argv[i] == "--verbose":
     79         self.verbose = True
     80       elif sys.argv[i] == "--html":
     81         self.html_output = True
     82       elif sys.argv[i] == "--reverse":
     83         self.reverse_frames = True
     84       elif sys.argv[i][0] == '-':
     85         print("Invalid option %s" % (sys.argv[i]))
     86       else:
     87         extra_args.append(sys.argv[i])
     88       i += 1
     89 
     90     if len(extra_args) != 1:
     91       print(self._usage)
     92       sys.exit(1)
     93 
     94     self.native_heap = extra_args[0]
     95 
     96 class Backtrace:
     97   def __init__(self, is_zygote, size, num_allocs, frames):
     98     self.is_zygote = is_zygote
     99     self.size = size
    100     self.num_allocs = num_allocs
    101     self.frames = frames
    102 
    103 class Mapping:
    104   def __init__(self, start, end, offset, name):
    105     self.start = start
    106     self.end = end
    107     self.offset = offset
    108     self.name = name
    109 
    110 class FrameDescription:
    111   def __init__(self, function, location, library):
    112     self.function = function
    113     self.location = location
    114     self.library = library
    115 
    116 def GetVersion(native_heap):
    117   """Get the version of the native heap dump."""
    118 
    119   re_line = re.compile("Android\s+Native\s+Heap\s+Dump\s+(?P<version>v\d+\.\d+)\s*$")
    120   matched = 0
    121   with open(native_heap, "r") as f:
    122     for line in f:
    123       m = re_line.match(line)
    124       if m:
    125         return m.group('version')
    126   return None
    127 
    128 def GetNumFieldValidByParsingLines(native_heap):
    129   """Determine if the num field is valid by parsing the backtrace lines.
    130 
    131   Malloc debug for N incorrectly set the num field to the number of
    132   backtraces instead of the number of allocations with the same size and
    133   backtrace. Read the file and if at least three lines all have the field
    134   set to the number of backtraces values, then consider this generated by
    135   the buggy malloc debug and indicate the num field is not valid.
    136 
    137   Returns:
    138     True if the num field is valid.
    139     False if the num field is not valid and should be ignored.
    140   """
    141 
    142   re_backtrace = re.compile("Backtrace\s+size:\s+(?P<backtrace_size>\d+)")
    143 
    144   re_line = re.compile("z\s+(?P<zygote>\d+)\s+sz\s+(?P<size>\d+)\s+num\s+(?P<num_allocations>\d+)")
    145   matched = 0
    146   backtrace_size = 0
    147   with open(native_heap, "r") as f:
    148     for line in f:
    149       if backtrace_size == 0:
    150         m = re_backtrace.match(line)
    151         if m:
    152           backtrace_size = int(m.group('backtrace_size'))
    153       parts = line.split()
    154       if len(parts) > 7 and parts[0] == "z" and parts[2] == "sz":
    155         m = re_line.match(line)
    156         if m:
    157           num_allocations = int(m.group('num_allocations'))
    158           if num_allocations == backtrace_size:
    159             # At least three lines must match this pattern before
    160             # considering this the old buggy version of malloc debug.
    161             matched += 1
    162             if matched == 3:
    163               return False
    164           else:
    165             return True
    166   return matched == 0
    167 
    168 def GetNumFieldValid(native_heap):
    169   version = GetVersion(native_heap)
    170   if not version or version == "v1.0":
    171     # Version v1.0 was produced by a buggy version of malloc debug where the
    172     # num field was set incorrectly.
    173     # Unfortunately, Android P produced a v1.0 version that does set the
    174     # num field. Do one more check to see if this is the broken version.
    175     return GetNumFieldValidByParsingLines(native_heap)
    176   else:
    177     return True
    178 
    179 def GetMappingFromOffset(mapping, app_symboldir):
    180   """
    181   If the input mapping is a zip file, translate the contained uncompressed files and add mapping
    182   entries.
    183 
    184   This is done to handle symbols for the uncompressed .so files inside APKs. With the replaced
    185   mappings, the script looks up the .so files as separate files.
    186   """
    187   basename = os.path.basename(mapping.name)
    188   zip_name = app_symboldir + basename
    189   if os.path.isfile(zip_name):
    190     opened_zip = zipfile.ZipFile(zip_name)
    191     if opened_zip:
    192       # For all files in the zip, add mappings for the internal files.
    193       for file_info in opened_zip.infolist():
    194         # Only add stored files since it doesn't make sense to have PC into compressed ones.
    195         if file_info.compress_type == zipfile.ZIP_STORED:
    196           zip_header_entry_size = 30
    197           data_offset = (file_info.header_offset
    198               + zip_header_entry_size
    199               + len(file_info.filename)
    200               + len(file_info.extra)
    201               + len(file_info.comment))
    202           end_offset = data_offset + file_info.file_size
    203           if mapping.offset >= data_offset and mapping.offset < end_offset:
    204             # Round up the data_offset to the nearest page since the .so must be aligned.
    205             so_file_alignment = 4096
    206             data_offset += so_file_alignment - 1;
    207             data_offset -= data_offset % so_file_alignment;
    208             mapping.name = file_info.filename
    209             mapping.offset -= data_offset
    210             break
    211   return mapping
    212 
    213 def ParseNativeHeap(native_heap, reverse_frames, num_field_valid, app_symboldir):
    214   """Parse the native heap into backtraces, maps.
    215 
    216   Returns two lists, the first is a list of all of the backtraces, the
    217   second is the sorted list of maps.
    218   """
    219 
    220   backtraces = []
    221   mappings = []
    222 
    223   re_map = re.compile("(?P<start>[0-9a-f]+)-(?P<end>[0-9a-f]+) .... (?P<offset>[0-9a-f]+) [0-9a-f]+:[0-9a-f]+ [0-9]+ +(?P<name>.*)")
    224 
    225   with open(native_heap, "r") as f:
    226     for line in f:
    227       # Format of line:
    228       #   z 0  sz       50  num    1  bt 000000000000a100 000000000000b200
    229       parts = line.split()
    230       if len(parts) > 7 and parts[0] == "z" and parts[2] == "sz":
    231         is_zygote = parts[1] != "1"
    232         size = int(parts[3])
    233         if num_field_valid:
    234           num_allocs = int(parts[5])
    235         else:
    236           num_allocs = 1
    237         frames = list(map(lambda x: int(x, 16), parts[7:]))
    238         if reverse_frames:
    239           frames = list(reversed(frames))
    240         backtraces.append(Backtrace(is_zygote, size, num_allocs, frames))
    241       else:
    242         # Parse map line:
    243         #   720de01000-720ded7000 r-xp 00000000 fd:00 495  /system/lib64/libc.so
    244         m = re_map.match(line)
    245         if m:
    246           # Offset of mapping start
    247           start = int(m.group('start'), 16)
    248           # Offset of mapping end
    249           end = int(m.group('end'), 16)
    250           # Offset within file that is mapped
    251           offset = int(m.group('offset'), 16)
    252           name = m.group('name')
    253           mappings.append(GetMappingFromOffset(Mapping(start, end, offset, name), app_symboldir))
    254   return backtraces, mappings
    255 
    256 def FindMapping(mappings, addr):
    257   """Find the mapping given addr.
    258 
    259   Returns the mapping that contains addr.
    260   Returns None if there is no such mapping.
    261   """
    262 
    263   min = 0
    264   max = len(mappings) - 1
    265   while True:
    266     if max < min:
    267       return None
    268     mid = (min + max) // 2
    269     if mappings[mid].end <= addr:
    270       min = mid + 1
    271     elif mappings[mid].start > addr:
    272       max = mid - 1
    273     else:
    274       return mappings[mid]
    275 
    276 
    277 def ResolveAddrs(html_output, symboldir, app_symboldir, backtraces, mappings):
    278   """Resolve address libraries and offsets.
    279 
    280   addr_offsets maps addr to .so file offset
    281   addrs_by_lib maps library to list of addrs from that library
    282   Resolved addrs maps addr to FrameDescription
    283 
    284   Returns the resolved_addrs hash.
    285   """
    286 
    287   addr_offsets = {}
    288   addrs_by_lib = {}
    289   resolved_addrs = {}
    290   empty_frame_description = FrameDescription("???", "???", "???")
    291   for backtrace in backtraces:
    292     for addr in backtrace.frames:
    293       if addr in addr_offsets:
    294         continue
    295       mapping = FindMapping(mappings, addr)
    296       if mapping:
    297         addr_offsets[addr] = addr - mapping.start + mapping.offset
    298         if not (mapping.name in addrs_by_lib):
    299           addrs_by_lib[mapping.name] = []
    300         addrs_by_lib[mapping.name].append(addr)
    301       else:
    302         resolved_addrs[addr] = empty_frame_description
    303 
    304   # Resolve functions and line numbers.
    305   if html_output == False:
    306     print("Resolving symbols using directory %s..." % symboldir)
    307 
    308   for lib in addrs_by_lib:
    309     sofile = app_symboldir + lib
    310     if not os.path.isfile(sofile):
    311       sofile = symboldir + lib
    312     if os.path.isfile(sofile):
    313       file_offset = 0
    314       result = subprocess.check_output(["objdump", "-w", "-j", ".text", "-h", sofile])
    315       for line in result.split("\n"):
    316         splitted = line.split()
    317         if len(splitted) > 5 and splitted[1] == ".text":
    318           file_offset = int(splitted[5], 16)
    319           break
    320 
    321       input_addrs = ""
    322       for addr in addrs_by_lib[lib]:
    323         input_addrs += "%s\n" % hex(addr_offsets[addr] - file_offset)
    324 
    325       p = subprocess.Popen(["addr2line", "-C", "-j", ".text", "-e", sofile, "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
    326       result = p.communicate(input_addrs)[0]
    327       addr2line_rc = p.returncode
    328       if addr2line_rc and (addr2line_rc < 0):
    329         logging.warn("addr2line on " + sofile + " terminated by signal " + str(-1 * addr2line_rc))
    330       splitted = result.split("\n")
    331       for x in range(0, len(addrs_by_lib[lib])):
    332         try:
    333           function = splitted[2*x];
    334           location = splitted[2*x+1];
    335           resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription(function, location, lib)
    336         except Exception:
    337           logging.warn("exception while resolving symbols", exc_info=True)
    338           resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription("---", "---", lib)
    339     else:
    340       if html_output == False:
    341         print("%s not found for symbol resolution" % lib)
    342 
    343       fd = FrameDescription("???", "???", lib)
    344       for addr in addrs_by_lib[lib]:
    345         resolved_addrs[addr] = fd
    346 
    347   return resolved_addrs
    348 
    349 def Addr2Line(resolved_addrs, addr):
    350   if addr == "ZYGOTE" or addr == "APP":
    351     return FrameDescription("", "", "")
    352 
    353   return resolved_addrs[int(addr, 16)]
    354 
    355 class AddrInfo:
    356   def __init__(self, addr):
    357     self.addr = addr
    358     self.size = 0
    359     self.number = 0
    360     self.num_allocs = 0
    361     self.children = {}
    362 
    363   def addStack(self, size, num_allocs, stack):
    364     self.size += size * num_allocs
    365     self.number += num_allocs
    366     if len(stack) > 0:
    367       child = stack[0]
    368       if not (child.addr in self.children):
    369         self.children[child.addr] = child
    370       self.children[child.addr].addStack(size, num_allocs, stack[1:])
    371 
    372 def Display(resolved_addrs, indent, total, parent_total, node):
    373   fd = Addr2Line(resolved_addrs, node.addr)
    374   total_percent = 0
    375   if total != 0:
    376     total_percent = 100 * node.size / float(total)
    377   parent_percent = 0
    378   if parent_total != 0:
    379     parent_percent = 100 * node.size / float(parent_total)
    380   print("%9d %6.2f%% %6.2f%% %8d %s%s %s %s %s" % (node.size, total_percent, parent_percent, node.number, indent, node.addr, fd.library, fd.function, fd.location))
    381   children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
    382   for child in children:
    383     Display(resolved_addrs, indent + "  ", total, node.size, child)
    384 
    385 def DisplayHtml(verbose, resolved_addrs, total, node, extra, label_count):
    386   fd = Addr2Line(resolved_addrs, node.addr)
    387   if verbose:
    388     lib = fd.library
    389   else:
    390     lib = os.path.basename(fd.library)
    391   total_percent = 0
    392   if total != 0:
    393     total_percent = 100 * node.size / float(total)
    394   label = "%d %6.2f%% %6d %s%s %s %s" % (node.size, total_percent, node.number, extra, lib, fd.function, fd.location)
    395   label = label.replace("&", "&amp;")
    396   label = label.replace("'", "&apos;")
    397   label = label.replace('"', "&quot;")
    398   label = label.replace("<", "&lt;")
    399   label = label.replace(">", "&gt;")
    400   children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
    401   print('<li>')
    402   if len(children) > 0:
    403     print('<label for="' + str(label_count) + '">' + label + '</label>')
    404     print('<input type="checkbox" id="' + str(label_count) + '"/>')
    405     print('<ol>')
    406     label_count += 1
    407     for child in children:
    408       label_count = DisplayHtml(verbose, resolved_addrs, total, child, "", label_count)
    409     print('</ol>')
    410   else:
    411     print(label)
    412   print('</li>')
    413 
    414   return label_count
    415 
    416 def CreateHtml(verbose, app, zygote, resolved_addrs):
    417   print("""
    418 <!DOCTYPE html>
    419 <html><head><style>
    420 li input {
    421     display: none;
    422 }
    423 li input:checked + ol > li {
    424     display: block;
    425 }
    426 li input + ol > li {
    427     display: none;
    428 }
    429 li {
    430     font-family: Roboto Mono,monospace;
    431 }
    432 label {
    433     font-family: Roboto Mono,monospace;
    434     cursor: pointer
    435 }
    436 </style></head><body>Native allocation HTML viewer<br><br>
    437 Click on an individual line to expand/collapse to see the details of the
    438 allocation data<ol>
    439 """)
    440 
    441   label_count = 0
    442   label_count = DisplayHtml(verbose, resolved_addrs, app.size, app, "app ", label_count)
    443   if zygote.size > 0:
    444     DisplayHtml(verbose, resolved_addrs, zygote.size, zygote, "zygote ", label_count)
    445   print("</ol></body></html>")
    446 
    447 def main():
    448   args = Args()
    449 
    450   num_field_valid = GetNumFieldValid(args.native_heap)
    451 
    452   backtraces, mappings = ParseNativeHeap(args.native_heap, args.reverse_frames, num_field_valid,
    453       args.app_symboldir)
    454   # Resolve functions and line numbers
    455   resolved_addrs = ResolveAddrs(args.html_output, args.symboldir, args.app_symboldir, backtraces,
    456       mappings)
    457 
    458   app = AddrInfo("APP")
    459   zygote = AddrInfo("ZYGOTE")
    460 
    461   for backtrace in backtraces:
    462     stack = []
    463     for addr in backtrace.frames:
    464       stack.append(AddrInfo("%x" % addr))
    465     stack.reverse()
    466     if backtrace.is_zygote:
    467       zygote.addStack(backtrace.size, backtrace.num_allocs, stack)
    468     else:
    469       app.addStack(backtrace.size, backtrace.num_allocs, stack)
    470 
    471   if args.html_output:
    472     CreateHtml(args.verbose, app, zygote, resolved_addrs)
    473   else:
    474     print("")
    475     print("%9s %6s %6s %8s    %s %s %s %s" % ("BYTES", "%TOTAL", "%PARENT", "COUNT", "ADDR", "LIBRARY", "FUNCTION", "LOCATION"))
    476     Display(resolved_addrs, "", app.size, app.size + zygote.size, app)
    477     print("")
    478     Display(resolved_addrs, "", zygote.size, app.size + zygote.size, zygote)
    479     print("")
    480 
    481 if __name__ == '__main__':
    482   main()
    483