1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2017 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """Generates a human-interpretable view of a native heap dump from 'am dumpheap -n'.""" 18 19 import logging 20 import os 21 import os.path 22 import re 23 import subprocess 24 import sys 25 import zipfile 26 27 class Args: 28 _usage = """ 29 Usage: 30 1. Collect a native heap dump from the device. For example: 31 $ adb shell stop 32 $ adb shell setprop libc.debug.malloc.program app_process 33 $ adb shell setprop libc.debug.malloc.options backtrace=64 34 $ adb shell start 35 (launch and use app) 36 $ adb shell am dumpheap -n <pid> /data/local/tmp/native_heap.txt 37 $ adb pull /data/local/tmp/native_heap.txt 38 39 2. Run the viewer: 40 $ python native_heapdump_viewer.py [options] native_heap.txt 41 [--verbose]: verbose output 42 [--html]: interactive html output 43 [--reverse]: reverse the backtraces (start the tree from the leaves) 44 [--symbols SYMBOL_DIR] SYMBOL_DIR is the directory containing the .so files with symbols. 45 Defaults to $ANDROID_PRODUCT_OUT/symbols 46 [--app-symbols SYMBOL_DIR] SYMBOL_DIR is the directory containing the app APK and so files. 47 Defaults to the current directory. 48 This outputs a file with lines of the form: 49 50 5831776 29.09% 100.00% 10532 71b07bc0b0 /system/lib64/libandroid_runtime.so Typeface_createFromArray frameworks/base/core/jni/android/graphics/Typeface.cpp:68 51 52 5831776 is the total number of bytes allocated at this stack frame, which 53 is 29.09% of the total number of bytes allocated and 100.00% of the parent 54 frame's bytes allocated. 10532 is the total number of allocations at this 55 stack frame. 71b07bc0b0 is the address of the stack frame. 56 """ 57 58 def __init__(self): 59 self.verbose = False 60 self.html_output = False 61 self.reverse_frames = False 62 product_out = os.getenv("ANDROID_PRODUCT_OUT") 63 if product_out: 64 self.symboldir = product_out + "/symbols" 65 else: 66 self.symboldir = "./symbols" 67 self.app_symboldir = "" 68 69 i = 1 70 extra_args = [] 71 while i < len(sys.argv): 72 if sys.argv[i] == "--symbols": 73 i += 1 74 self.symboldir = sys.argv[i] + "/" 75 elif sys.argv[i] == "--app-symbols": 76 i += 1 77 self.app_symboldir = sys.argv[i] + "/" 78 elif sys.argv[i] == "--verbose": 79 self.verbose = True 80 elif sys.argv[i] == "--html": 81 self.html_output = True 82 elif sys.argv[i] == "--reverse": 83 self.reverse_frames = True 84 elif sys.argv[i][0] == '-': 85 print("Invalid option %s" % (sys.argv[i])) 86 else: 87 extra_args.append(sys.argv[i]) 88 i += 1 89 90 if len(extra_args) != 1: 91 print(self._usage) 92 sys.exit(1) 93 94 self.native_heap = extra_args[0] 95 96 class Backtrace: 97 def __init__(self, is_zygote, size, num_allocs, frames): 98 self.is_zygote = is_zygote 99 self.size = size 100 self.num_allocs = num_allocs 101 self.frames = frames 102 103 class Mapping: 104 def __init__(self, start, end, offset, name): 105 self.start = start 106 self.end = end 107 self.offset = offset 108 self.name = name 109 110 class FrameDescription: 111 def __init__(self, function, location, library): 112 self.function = function 113 self.location = location 114 self.library = library 115 116 def GetVersion(native_heap): 117 """Get the version of the native heap dump.""" 118 119 re_line = re.compile("Android\s+Native\s+Heap\s+Dump\s+(?P<version>v\d+\.\d+)\s*$") 120 matched = 0 121 with open(native_heap, "r") as f: 122 for line in f: 123 m = re_line.match(line) 124 if m: 125 return m.group('version') 126 return None 127 128 def GetNumFieldValidByParsingLines(native_heap): 129 """Determine if the num field is valid by parsing the backtrace lines. 130 131 Malloc debug for N incorrectly set the num field to the number of 132 backtraces instead of the number of allocations with the same size and 133 backtrace. Read the file and if at least three lines all have the field 134 set to the number of backtraces values, then consider this generated by 135 the buggy malloc debug and indicate the num field is not valid. 136 137 Returns: 138 True if the num field is valid. 139 False if the num field is not valid and should be ignored. 140 """ 141 142 re_backtrace = re.compile("Backtrace\s+size:\s+(?P<backtrace_size>\d+)") 143 144 re_line = re.compile("z\s+(?P<zygote>\d+)\s+sz\s+(?P<size>\d+)\s+num\s+(?P<num_allocations>\d+)") 145 matched = 0 146 backtrace_size = 0 147 with open(native_heap, "r") as f: 148 for line in f: 149 if backtrace_size == 0: 150 m = re_backtrace.match(line) 151 if m: 152 backtrace_size = int(m.group('backtrace_size')) 153 parts = line.split() 154 if len(parts) > 7 and parts[0] == "z" and parts[2] == "sz": 155 m = re_line.match(line) 156 if m: 157 num_allocations = int(m.group('num_allocations')) 158 if num_allocations == backtrace_size: 159 # At least three lines must match this pattern before 160 # considering this the old buggy version of malloc debug. 161 matched += 1 162 if matched == 3: 163 return False 164 else: 165 return True 166 return matched == 0 167 168 def GetNumFieldValid(native_heap): 169 version = GetVersion(native_heap) 170 if not version or version == "v1.0": 171 # Version v1.0 was produced by a buggy version of malloc debug where the 172 # num field was set incorrectly. 173 # Unfortunately, Android P produced a v1.0 version that does set the 174 # num field. Do one more check to see if this is the broken version. 175 return GetNumFieldValidByParsingLines(native_heap) 176 else: 177 return True 178 179 def GetMappingFromOffset(mapping, app_symboldir): 180 """ 181 If the input mapping is a zip file, translate the contained uncompressed files and add mapping 182 entries. 183 184 This is done to handle symbols for the uncompressed .so files inside APKs. With the replaced 185 mappings, the script looks up the .so files as separate files. 186 """ 187 basename = os.path.basename(mapping.name) 188 zip_name = app_symboldir + basename 189 if os.path.isfile(zip_name): 190 opened_zip = zipfile.ZipFile(zip_name) 191 if opened_zip: 192 # For all files in the zip, add mappings for the internal files. 193 for file_info in opened_zip.infolist(): 194 # Only add stored files since it doesn't make sense to have PC into compressed ones. 195 if file_info.compress_type == zipfile.ZIP_STORED: 196 zip_header_entry_size = 30 197 data_offset = (file_info.header_offset 198 + zip_header_entry_size 199 + len(file_info.filename) 200 + len(file_info.extra) 201 + len(file_info.comment)) 202 end_offset = data_offset + file_info.file_size 203 if mapping.offset >= data_offset and mapping.offset < end_offset: 204 # Round up the data_offset to the nearest page since the .so must be aligned. 205 so_file_alignment = 4096 206 data_offset += so_file_alignment - 1; 207 data_offset -= data_offset % so_file_alignment; 208 mapping.name = file_info.filename 209 mapping.offset -= data_offset 210 break 211 return mapping 212 213 def ParseNativeHeap(native_heap, reverse_frames, num_field_valid, app_symboldir): 214 """Parse the native heap into backtraces, maps. 215 216 Returns two lists, the first is a list of all of the backtraces, the 217 second is the sorted list of maps. 218 """ 219 220 backtraces = [] 221 mappings = [] 222 223 re_map = re.compile("(?P<start>[0-9a-f]+)-(?P<end>[0-9a-f]+) .... (?P<offset>[0-9a-f]+) [0-9a-f]+:[0-9a-f]+ [0-9]+ +(?P<name>.*)") 224 225 with open(native_heap, "r") as f: 226 for line in f: 227 # Format of line: 228 # z 0 sz 50 num 1 bt 000000000000a100 000000000000b200 229 parts = line.split() 230 if len(parts) > 7 and parts[0] == "z" and parts[2] == "sz": 231 is_zygote = parts[1] != "1" 232 size = int(parts[3]) 233 if num_field_valid: 234 num_allocs = int(parts[5]) 235 else: 236 num_allocs = 1 237 frames = list(map(lambda x: int(x, 16), parts[7:])) 238 if reverse_frames: 239 frames = list(reversed(frames)) 240 backtraces.append(Backtrace(is_zygote, size, num_allocs, frames)) 241 else: 242 # Parse map line: 243 # 720de01000-720ded7000 r-xp 00000000 fd:00 495 /system/lib64/libc.so 244 m = re_map.match(line) 245 if m: 246 # Offset of mapping start 247 start = int(m.group('start'), 16) 248 # Offset of mapping end 249 end = int(m.group('end'), 16) 250 # Offset within file that is mapped 251 offset = int(m.group('offset'), 16) 252 name = m.group('name') 253 mappings.append(GetMappingFromOffset(Mapping(start, end, offset, name), app_symboldir)) 254 return backtraces, mappings 255 256 def FindMapping(mappings, addr): 257 """Find the mapping given addr. 258 259 Returns the mapping that contains addr. 260 Returns None if there is no such mapping. 261 """ 262 263 min = 0 264 max = len(mappings) - 1 265 while True: 266 if max < min: 267 return None 268 mid = (min + max) // 2 269 if mappings[mid].end <= addr: 270 min = mid + 1 271 elif mappings[mid].start > addr: 272 max = mid - 1 273 else: 274 return mappings[mid] 275 276 277 def ResolveAddrs(html_output, symboldir, app_symboldir, backtraces, mappings): 278 """Resolve address libraries and offsets. 279 280 addr_offsets maps addr to .so file offset 281 addrs_by_lib maps library to list of addrs from that library 282 Resolved addrs maps addr to FrameDescription 283 284 Returns the resolved_addrs hash. 285 """ 286 287 addr_offsets = {} 288 addrs_by_lib = {} 289 resolved_addrs = {} 290 empty_frame_description = FrameDescription("???", "???", "???") 291 for backtrace in backtraces: 292 for addr in backtrace.frames: 293 if addr in addr_offsets: 294 continue 295 mapping = FindMapping(mappings, addr) 296 if mapping: 297 addr_offsets[addr] = addr - mapping.start + mapping.offset 298 if not (mapping.name in addrs_by_lib): 299 addrs_by_lib[mapping.name] = [] 300 addrs_by_lib[mapping.name].append(addr) 301 else: 302 resolved_addrs[addr] = empty_frame_description 303 304 # Resolve functions and line numbers. 305 if html_output == False: 306 print("Resolving symbols using directory %s..." % symboldir) 307 308 for lib in addrs_by_lib: 309 sofile = app_symboldir + lib 310 if not os.path.isfile(sofile): 311 sofile = symboldir + lib 312 if os.path.isfile(sofile): 313 file_offset = 0 314 result = subprocess.check_output(["objdump", "-w", "-j", ".text", "-h", sofile]) 315 for line in result.split("\n"): 316 splitted = line.split() 317 if len(splitted) > 5 and splitted[1] == ".text": 318 file_offset = int(splitted[5], 16) 319 break 320 321 input_addrs = "" 322 for addr in addrs_by_lib[lib]: 323 input_addrs += "%s\n" % hex(addr_offsets[addr] - file_offset) 324 325 p = subprocess.Popen(["addr2line", "-C", "-j", ".text", "-e", sofile, "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE) 326 result = p.communicate(input_addrs)[0] 327 addr2line_rc = p.returncode 328 if addr2line_rc and (addr2line_rc < 0): 329 logging.warn("addr2line on " + sofile + " terminated by signal " + str(-1 * addr2line_rc)) 330 splitted = result.split("\n") 331 for x in range(0, len(addrs_by_lib[lib])): 332 try: 333 function = splitted[2*x]; 334 location = splitted[2*x+1]; 335 resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription(function, location, lib) 336 except Exception: 337 logging.warn("exception while resolving symbols", exc_info=True) 338 resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription("---", "---", lib) 339 else: 340 if html_output == False: 341 print("%s not found for symbol resolution" % lib) 342 343 fd = FrameDescription("???", "???", lib) 344 for addr in addrs_by_lib[lib]: 345 resolved_addrs[addr] = fd 346 347 return resolved_addrs 348 349 def Addr2Line(resolved_addrs, addr): 350 if addr == "ZYGOTE" or addr == "APP": 351 return FrameDescription("", "", "") 352 353 return resolved_addrs[int(addr, 16)] 354 355 class AddrInfo: 356 def __init__(self, addr): 357 self.addr = addr 358 self.size = 0 359 self.number = 0 360 self.num_allocs = 0 361 self.children = {} 362 363 def addStack(self, size, num_allocs, stack): 364 self.size += size * num_allocs 365 self.number += num_allocs 366 if len(stack) > 0: 367 child = stack[0] 368 if not (child.addr in self.children): 369 self.children[child.addr] = child 370 self.children[child.addr].addStack(size, num_allocs, stack[1:]) 371 372 def Display(resolved_addrs, indent, total, parent_total, node): 373 fd = Addr2Line(resolved_addrs, node.addr) 374 total_percent = 0 375 if total != 0: 376 total_percent = 100 * node.size / float(total) 377 parent_percent = 0 378 if parent_total != 0: 379 parent_percent = 100 * node.size / float(parent_total) 380 print("%9d %6.2f%% %6.2f%% %8d %s%s %s %s %s" % (node.size, total_percent, parent_percent, node.number, indent, node.addr, fd.library, fd.function, fd.location)) 381 children = sorted(node.children.values(), key=lambda x: x.size, reverse=True) 382 for child in children: 383 Display(resolved_addrs, indent + " ", total, node.size, child) 384 385 def DisplayHtml(verbose, resolved_addrs, total, node, extra, label_count): 386 fd = Addr2Line(resolved_addrs, node.addr) 387 if verbose: 388 lib = fd.library 389 else: 390 lib = os.path.basename(fd.library) 391 total_percent = 0 392 if total != 0: 393 total_percent = 100 * node.size / float(total) 394 label = "%d %6.2f%% %6d %s%s %s %s" % (node.size, total_percent, node.number, extra, lib, fd.function, fd.location) 395 label = label.replace("&", "&") 396 label = label.replace("'", "'") 397 label = label.replace('"', """) 398 label = label.replace("<", "<") 399 label = label.replace(">", ">") 400 children = sorted(node.children.values(), key=lambda x: x.size, reverse=True) 401 print('<li>') 402 if len(children) > 0: 403 print('<label for="' + str(label_count) + '">' + label + '</label>') 404 print('<input type="checkbox" id="' + str(label_count) + '"/>') 405 print('<ol>') 406 label_count += 1 407 for child in children: 408 label_count = DisplayHtml(verbose, resolved_addrs, total, child, "", label_count) 409 print('</ol>') 410 else: 411 print(label) 412 print('</li>') 413 414 return label_count 415 416 def CreateHtml(verbose, app, zygote, resolved_addrs): 417 print(""" 418 <!DOCTYPE html> 419 <html><head><style> 420 li input { 421 display: none; 422 } 423 li input:checked + ol > li { 424 display: block; 425 } 426 li input + ol > li { 427 display: none; 428 } 429 li { 430 font-family: Roboto Mono,monospace; 431 } 432 label { 433 font-family: Roboto Mono,monospace; 434 cursor: pointer 435 } 436 </style></head><body>Native allocation HTML viewer<br><br> 437 Click on an individual line to expand/collapse to see the details of the 438 allocation data<ol> 439 """) 440 441 label_count = 0 442 label_count = DisplayHtml(verbose, resolved_addrs, app.size, app, "app ", label_count) 443 if zygote.size > 0: 444 DisplayHtml(verbose, resolved_addrs, zygote.size, zygote, "zygote ", label_count) 445 print("</ol></body></html>") 446 447 def main(): 448 args = Args() 449 450 num_field_valid = GetNumFieldValid(args.native_heap) 451 452 backtraces, mappings = ParseNativeHeap(args.native_heap, args.reverse_frames, num_field_valid, 453 args.app_symboldir) 454 # Resolve functions and line numbers 455 resolved_addrs = ResolveAddrs(args.html_output, args.symboldir, args.app_symboldir, backtraces, 456 mappings) 457 458 app = AddrInfo("APP") 459 zygote = AddrInfo("ZYGOTE") 460 461 for backtrace in backtraces: 462 stack = [] 463 for addr in backtrace.frames: 464 stack.append(AddrInfo("%x" % addr)) 465 stack.reverse() 466 if backtrace.is_zygote: 467 zygote.addStack(backtrace.size, backtrace.num_allocs, stack) 468 else: 469 app.addStack(backtrace.size, backtrace.num_allocs, stack) 470 471 if args.html_output: 472 CreateHtml(args.verbose, app, zygote, resolved_addrs) 473 else: 474 print("") 475 print("%9s %6s %6s %8s %s %s %s %s" % ("BYTES", "%TOTAL", "%PARENT", "COUNT", "ADDR", "LIBRARY", "FUNCTION", "LOCATION")) 476 Display(resolved_addrs, "", app.size, app.size + zygote.size, app) 477 print("") 478 Display(resolved_addrs, "", zygote.size, app.size + zygote.size, zygote) 479 print("") 480 481 if __name__ == '__main__': 482 main() 483