1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2013 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """stack symbolizes native crash dumps.""" 18 19 import os 20 import re 21 import subprocess 22 import symbol 23 import tempfile 24 import unittest 25 26 import example_crashes 27 28 def ConvertTrace(lines): 29 tracer = TraceConverter() 30 print "Reading symbols from", symbol.SYMBOLS_DIR 31 tracer.ConvertTrace(lines) 32 33 class TraceConverter: 34 process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)") 35 revision_line = re.compile("(Revision: \'(.*)\')") 36 signal_line = re.compile("(signal [0-9]+ \(.*\).*)") 37 abort_message_line = re.compile("(Abort message: '.*')") 38 thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-") 39 dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)") 40 dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)") 41 register_line = re.compile("$a") 42 trace_line = re.compile("$a") 43 sanitizer_trace_line = re.compile("$a") 44 value_line = re.compile("$a") 45 code_line = re.compile("$a") 46 zipinfo_central_directory_line = re.compile("Central\s+directory\s+entry") 47 zipinfo_central_info_match = re.compile( 48 "^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)" 49 ".*^\s*compressed size:\s+(\d+)", re.M | re.S) 50 unreachable_line = re.compile("((\d+ bytes in \d+ unreachable allocations)|"+\ 51 "(\d+ bytes unreachable at [0-9a-f]+)|"+\ 52 "(referencing \d+ unreachable bytes in \d+ allocation(s)?)|"+\ 53 "(and \d+ similar unreachable bytes in \d+ allocation(s)?))") 54 trace_lines = [] 55 value_lines = [] 56 last_frame = -1 57 width = "{8}" 58 spacing = "" 59 apk_info = dict() 60 61 register_names = { 62 "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr", 63 "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate", 64 "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 65 "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 66 "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags", 67 "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags", 68 } 69 70 def UpdateAbiRegexes(self): 71 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 72 self.width = "{16}" 73 self.spacing = " " 74 else: 75 self.width = "{8}" 76 self.spacing = "" 77 78 self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})") 79 80 # Note that both trace and value line matching allow for variable amounts of 81 # whitespace (e.g. \t). This is because the we want to allow for the stack 82 # tool to operate on AndroidFeedback provided system logs. AndroidFeedback 83 # strips out double spaces that are found in tombsone files and logcat output. 84 # 85 # Examples of matched trace lines include lines from tombstone files like: 86 # #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 87 # 88 # Or lines from AndroidFeedback crash report system logs like: 89 # 03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 90 # Please note the spacing differences. 91 self.trace_line = re.compile( 92 ".*" # Random start stuff. 93 "\#(?P<frame>[0-9]+)" # Frame number. 94 "[ \t]+..[ \t]+" # (space)pc(space). 95 "(?P<offset>[0-9a-f]" + self.width + ")[ \t]+" # Offset (hex number given without 96 # 0x prefix). 97 "(?P<dso>\[[^\]]+\]|[^\r\n \t]*)" # Library name. 98 "( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?" # Offset into the file to find the start of the shared so. 99 "(?P<symbolpresent> \((?P<symbol>.*)\))?") # Is the symbol there? 100 # pylint: disable-msg=C6310 101 # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as 102 # its own regex. Example: 103 # 08-19 05:29:26.283 397 403 I : #0 0xb6a15237 (/system/lib/libclang_rt.asan-arm-android.so+0x4f237) 104 self.sanitizer_trace_line = re.compile( 105 ".*" # Random start stuff. 106 "\#(?P<frame>[0-9]+)" # Frame number. 107 "[ \t]+0x[0-9a-f]+[ \t]+" # PC, not interesting to us. 108 "\(" # Opening paren. 109 "(?P<dso>[^+]+)" # Library name. 110 "\+" # '+' 111 "0x(?P<offset>[0-9a-f]+)" # Offset (hex number given with 112 # 0x prefix). 113 "\)") # Closin paren. 114 # pylint: disable-msg=C6310 115 # Examples of matched value lines include: 116 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 117 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so (symbol) 118 # 03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 119 # Again, note the spacing differences. 120 self.value_line = re.compile("(.*)([0-9a-f]" + self.width + ")[ \t]+([0-9a-f]" + self.width + ")[ \t]+([^\r\n \t]*)( \((.*)\))?") 121 # Lines from 'code around' sections of the output will be matched before 122 # value lines because otheriwse the 'code around' sections will be confused as 123 # value lines. 124 # 125 # Examples include: 126 # 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 127 # 03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 128 self.code_line = re.compile("(.*)[ \t]*[a-f0-9]" + self.width + 129 "[ \t]*[a-f0-9]" + self.width + 130 "[ \t]*[a-f0-9]" + self.width + 131 "[ \t]*[a-f0-9]" + self.width + 132 "[ \t]*[a-f0-9]" + self.width + 133 "[ \t]*[ \r\n]") # pylint: disable-msg=C6310 134 135 def CleanLine(self, ln): 136 # AndroidFeedback adds zero width spaces into its crash reports. These 137 # should be removed or the regular expresssions will fail to match. 138 return unicode(ln, errors='ignore') 139 140 def PrintTraceLines(self, trace_lines): 141 """Print back trace.""" 142 maxlen = max(map(lambda tl: len(tl[1]), trace_lines)) 143 print 144 print "Stack Trace:" 145 print " RELADDR " + self.spacing + "FUNCTION".ljust(maxlen) + " FILE:LINE" 146 for tl in self.trace_lines: 147 (addr, symbol_with_offset, location) = tl 148 print " %8s %s %s" % (addr, symbol_with_offset.ljust(maxlen), location) 149 return 150 151 def PrintValueLines(self, value_lines): 152 """Print stack data values.""" 153 maxlen = max(map(lambda tl: len(tl[2]), self.value_lines)) 154 print 155 print "Stack Data:" 156 print " ADDR " + self.spacing + "VALUE " + "FUNCTION".ljust(maxlen) + " FILE:LINE" 157 for vl in self.value_lines: 158 (addr, value, symbol_with_offset, location) = vl 159 print " %8s %8s %s %s" % (addr, value, symbol_with_offset.ljust(maxlen), location) 160 return 161 162 def PrintOutput(self, trace_lines, value_lines): 163 if self.trace_lines: 164 self.PrintTraceLines(self.trace_lines) 165 if self.value_lines: 166 self.PrintValueLines(self.value_lines) 167 168 def PrintDivider(self): 169 print 170 print "-----------------------------------------------------\n" 171 172 def DeleteApkTmpFiles(self): 173 for _, _, tmp_files in self.apk_info.values(): 174 for tmp_file in tmp_files.values(): 175 os.unlink(tmp_file) 176 177 def ConvertTrace(self, lines): 178 lines = map(self.CleanLine, lines) 179 try: 180 if not symbol.ARCH: 181 symbol.SetAbi(lines) 182 self.UpdateAbiRegexes() 183 for line in lines: 184 self.ProcessLine(line) 185 self.PrintOutput(self.trace_lines, self.value_lines) 186 finally: 187 # Delete any temporary files created while processing the lines. 188 self.DeleteApkTmpFiles() 189 190 def MatchTraceLine(self, line): 191 if self.trace_line.match(line): 192 match = self.trace_line.match(line) 193 return {"frame": match.group("frame"), 194 "offset": match.group("offset"), 195 "so_offset": match.group("so_offset"), 196 "dso": match.group("dso"), 197 "symbol_present": bool(match.group("symbolpresent")), 198 "symbol_name": match.group("symbol")} 199 if self.sanitizer_trace_line.match(line): 200 match = self.sanitizer_trace_line.match(line) 201 return {"frame": match.group("frame"), 202 "offset": match.group("offset"), 203 "so_offset": None, 204 "dso": match.group("dso"), 205 "symbol_present": False, 206 "symbol_name": None} 207 return None 208 209 def ExtractLibFromApk(self, apk, shared_lib_name): 210 # Create a temporary file containing the shared library from the apk. 211 tmp_file = None 212 try: 213 tmp_fd, tmp_file = tempfile.mkstemp() 214 if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0: 215 os.close(tmp_fd) 216 shared_file = tmp_file 217 tmp_file = None 218 return shared_file 219 finally: 220 if tmp_file: 221 os.close(tmp_fd) 222 os.unlink(tmp_file) 223 return None 224 225 def ProcessCentralInfo(self, offset_list, central_info): 226 match = self.zipinfo_central_info_match.search(central_info) 227 if not match: 228 raise Exception("Cannot find all info from zipinfo\n" + central_info) 229 name = match.group(1) 230 start = int(match.group(2)) 231 end = start + int(match.group(3)) 232 233 offset_list.append([name, start, end]) 234 return name, start, end 235 236 def GetLibFromApk(self, apk, offset): 237 # Convert the string to hex. 238 offset = int(offset, 16) 239 240 # Check if we already have information about this offset. 241 if apk in self.apk_info: 242 apk_full_path, offset_list, tmp_files = self.apk_info[apk] 243 for file_name, start, end in offset_list: 244 if offset >= start and offset < end: 245 if file_name in tmp_files: 246 return file_name, tmp_files[file_name] 247 tmp_file = self.ExtractLibFromApk(apk_full_path, file_name) 248 if tmp_file: 249 tmp_files[file_name] = tmp_file 250 return file_name, tmp_file 251 break 252 return None, None 253 254 if not "ANDROID_PRODUCT_OUT" in os.environ: 255 print "ANDROID_PRODUCT_OUT environment variable not set." 256 return None, None 257 out_dir = os.environ["ANDROID_PRODUCT_OUT"] 258 if not os.path.exists(out_dir): 259 print "ANDROID_PRODUCT_OUT " + out_dir + " does not exist." 260 return None, None 261 if apk.startswith("/"): 262 apk_full_path = out_dir + apk 263 else: 264 apk_full_path = os.path.join(out_dir, apk) 265 if not os.path.exists(apk_full_path): 266 print "Cannot find apk " + apk; 267 return None, None 268 269 cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE) 270 # Find the first central info marker. 271 for line in cmd.stdout: 272 if self.zipinfo_central_directory_line.search(line): 273 break 274 275 central_info = "" 276 file_name = None 277 offset_list = [] 278 for line in cmd.stdout: 279 match = self.zipinfo_central_directory_line.search(line) 280 if match: 281 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 282 if not file_name and offset >= start and offset < end: 283 file_name = cur_name 284 central_info = "" 285 else: 286 central_info += line 287 if central_info: 288 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 289 if not file_name and offset >= start and offset < end: 290 file_name = cur_name 291 292 # Save the information from the zip. 293 tmp_files = dict() 294 self.apk_info[apk] = [apk_full_path, offset_list, tmp_files] 295 if not file_name: 296 return None, None 297 tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name) 298 if tmp_shared_lib: 299 tmp_files[file_name] = tmp_shared_lib 300 return file_name, tmp_shared_lib 301 return None, None 302 303 def ProcessLine(self, line): 304 ret = False 305 process_header = self.process_info_line.search(line) 306 signal_header = self.signal_line.search(line) 307 abort_message_header = self.abort_message_line.search(line) 308 thread_header = self.thread_line.search(line) 309 register_header = self.register_line.search(line) 310 revision_header = self.revision_line.search(line) 311 dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line) 312 dalvik_native_thread_header = self.dalvik_native_thread_line.search(line) 313 unreachable_header = self.unreachable_line.search(line) 314 if process_header or signal_header or abort_message_header or thread_header or \ 315 register_header or dalvik_jni_thread_header or dalvik_native_thread_header or \ 316 revision_header or unreachable_header: 317 ret = True 318 if self.trace_lines or self.value_lines: 319 self.PrintOutput(self.trace_lines, self.value_lines) 320 self.PrintDivider() 321 self.trace_lines = [] 322 self.value_lines = [] 323 self.last_frame = -1 324 if process_header: 325 print process_header.group(1) 326 if signal_header: 327 print signal_header.group(1) 328 if abort_message_header: 329 print abort_message_header.group(1) 330 if register_header: 331 print register_header.group(1) 332 if thread_header: 333 print thread_header.group(1) 334 if dalvik_jni_thread_header: 335 print dalvik_jni_thread_header.group(1) 336 if dalvik_native_thread_header: 337 print dalvik_native_thread_header.group(1) 338 if revision_header: 339 print revision_header.group(1) 340 if unreachable_header: 341 print unreachable_header.group(1) 342 return True 343 trace_line_dict = self.MatchTraceLine(line) 344 if trace_line_dict is not None: 345 ret = True 346 frame = int(trace_line_dict["frame"]) 347 code_addr = trace_line_dict["offset"] 348 area = trace_line_dict["dso"] 349 so_offset = trace_line_dict["so_offset"] 350 symbol_present = trace_line_dict["symbol_present"] 351 symbol_name = trace_line_dict["symbol_name"] 352 353 if frame <= self.last_frame and (self.trace_lines or self.value_lines): 354 self.PrintOutput(self.trace_lines, self.value_lines) 355 self.PrintDivider() 356 self.trace_lines = [] 357 self.value_lines = [] 358 self.last_frame = frame 359 360 if area == "<unknown>" or area == "[heap]" or area == "[stack]": 361 self.trace_lines.append((code_addr, "", area)) 362 else: 363 # If this is an apk, it usually means that there is actually 364 # a shared so that was loaded directly out of it. In that case, 365 # extract the shared library and the name of the shared library. 366 lib = None 367 if area.endswith(".apk") and so_offset: 368 lib_name, lib = self.GetLibFromApk(area, so_offset) 369 if not lib: 370 lib = area 371 lib_name = None 372 373 # If a calls b which further calls c and c is inlined to b, we want to 374 # display "a -> b -> c" in the stack trace instead of just "a -> c" 375 info = symbol.SymbolInformation(lib, code_addr) 376 nest_count = len(info) - 1 377 for (source_symbol, source_location, object_symbol_with_offset) in info: 378 if not source_symbol: 379 if symbol_present: 380 source_symbol = symbol.CallCppFilt(symbol_name) 381 else: 382 source_symbol = "<unknown>" 383 if not source_location: 384 source_location = area 385 if lib_name: 386 source_location += "(" + lib_name + ")" 387 if nest_count > 0: 388 nest_count = nest_count - 1 389 arrow = "v------>" 390 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 391 arrow = "v-------------->" 392 self.trace_lines.append((arrow, source_symbol, source_location)) 393 else: 394 if not object_symbol_with_offset: 395 object_symbol_with_offset = source_symbol 396 self.trace_lines.append((code_addr, 397 object_symbol_with_offset, 398 source_location)) 399 if self.code_line.match(line): 400 # Code lines should be ignored. If this were exluded the 'code around' 401 # sections would trigger value_line matches. 402 return ret 403 if self.value_line.match(line): 404 ret = True 405 match = self.value_line.match(line) 406 (unused_, addr, value, area, symbol_present, symbol_name) = match.groups() 407 if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area: 408 self.value_lines.append((addr, value, "", area)) 409 else: 410 info = symbol.SymbolInformation(area, value) 411 (source_symbol, source_location, object_symbol_with_offset) = info.pop() 412 # If there is no information, skip this. 413 if source_symbol or source_location or object_symbol_with_offset: 414 if not source_symbol: 415 if symbol_present: 416 source_symbol = symbol.CallCppFilt(symbol_name) 417 else: 418 source_symbol = "<unknown>" 419 if not source_location: 420 source_location = area 421 if not object_symbol_with_offset: 422 object_symbol_with_offset = source_symbol 423 self.value_lines.append((addr, 424 value, 425 object_symbol_with_offset, 426 source_location)) 427 428 return ret 429 430 431 class RegisterPatternTests(unittest.TestCase): 432 def assert_register_matches(self, abi, example_crash, stupid_pattern): 433 tc = TraceConverter() 434 lines = example_crash.split('\n') 435 symbol.SetAbi(lines) 436 tc.UpdateAbiRegexes() 437 for line in lines: 438 tc.ProcessLine(line) 439 is_register = (re.search(stupid_pattern, line) is not None) 440 matched = (tc.register_line.search(line) is not None) 441 self.assertEquals(matched, is_register, line) 442 tc.PrintOutput(tc.trace_lines, tc.value_lines) 443 444 def test_arm_registers(self): 445 self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b') 446 447 def test_arm64_registers(self): 448 self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b') 449 450 def test_mips_registers(self): 451 self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b') 452 453 def test_mips64_registers(self): 454 self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b') 455 456 def test_x86_registers(self): 457 self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b') 458 459 def test_x86_64_registers(self): 460 self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b') 461 462 class LibmemunreachablePatternTests(unittest.TestCase): 463 def test_libmemunreachable(self): 464 tc = TraceConverter() 465 lines = example_crashes.libmemunreachable.split('\n') 466 467 symbol.SetAbi(lines) 468 self.assertEquals(symbol.ARCH, "arm") 469 470 tc.UpdateAbiRegexes() 471 header_lines = 0 472 trace_lines = 0 473 for line in lines: 474 tc.ProcessLine(line) 475 if re.search(tc.unreachable_line, line) is not None: 476 header_lines += 1 477 if tc.MatchTraceLine(line) is not None: 478 trace_lines += 1 479 self.assertEquals(header_lines, 3) 480 self.assertEquals(trace_lines, 2) 481 tc.PrintOutput(tc.trace_lines, tc.value_lines) 482 483 class LongASANStackTests(unittest.TestCase): 484 # Test that a long ASAN-style (non-padded frame numbers) stack trace is not split into two 485 # when the frame number becomes two digits. This happened before as the frame number was 486 # handled as a string and not converted to an integral. 487 def test_long_asan_crash(self): 488 tc = TraceConverter() 489 lines = example_crashes.long_asan_crash.splitlines() 490 symbol.SetAbi(lines) 491 tc.UpdateAbiRegexes() 492 # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace 493 # is split, a separator is printed and trace_lines is flushed. 494 trace_line_count = 0 495 for line in lines: 496 tc.ProcessLine(line) 497 self.assertLessEqual(trace_line_count, len(tc.trace_lines)) 498 trace_line_count = len(tc.trace_lines) 499 # The split happened at transition of frame #9 -> #10. Make sure we have parsed (and stored) 500 # more than ten frames. 501 self.assertGreater(trace_line_count, 10) 502 tc.PrintOutput(tc.trace_lines, tc.value_lines) 503 504 class ValueLinesTest(unittest.TestCase): 505 def test_value_line_skipped(self): 506 tc = TraceConverter() 507 symbol.SetAbi(["ABI: 'arm'"]) 508 tc.UpdateAbiRegexes() 509 tc.ProcessLine(" 12345678 00001000 .") 510 self.assertEqual([], tc.value_lines) 511 512 if __name__ == '__main__': 513 unittest.main() 514