1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2013 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """stack symbolizes native crash dumps.""" 18 19 import os 20 import re 21 import subprocess 22 import symbol 23 import tempfile 24 import unittest 25 26 import example_crashes 27 28 def ConvertTrace(lines): 29 tracer = TraceConverter() 30 print "Reading symbols from", symbol.SYMBOLS_DIR 31 tracer.ConvertTrace(lines) 32 33 class TraceConverter: 34 process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)") 35 revision_line = re.compile("(Revision: \'(.*)\')") 36 signal_line = re.compile("(signal [0-9]+ \(.*\).*)") 37 abort_message_line = re.compile("(Abort message: '.*')") 38 thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-") 39 dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)") 40 dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)") 41 register_line = re.compile("$a") 42 trace_line = re.compile("$a") 43 sanitizer_trace_line = re.compile("$a") 44 value_line = re.compile("$a") 45 code_line = re.compile("$a") 46 zipinfo_central_directory_line = re.compile("Central\s+directory\s+entry") 47 zipinfo_central_info_match = re.compile( 48 "^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)" 49 ".*^\s*compressed size:\s+(\d+)", re.M | re.S) 50 trace_lines = [] 51 value_lines = [] 52 last_frame = -1 53 width = "{8}" 54 spacing = "" 55 apk_info = dict() 56 57 register_names = { 58 "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr", 59 "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate", 60 "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 61 "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", 62 "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags", 63 "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags", 64 } 65 66 def UpdateAbiRegexes(self): 67 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 68 self.width = "{16}" 69 self.spacing = " " 70 else: 71 self.width = "{8}" 72 self.spacing = "" 73 74 self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})") 75 76 # Note that both trace and value line matching allow for variable amounts of 77 # whitespace (e.g. \t). This is because the we want to allow for the stack 78 # tool to operate on AndroidFeedback provided system logs. AndroidFeedback 79 # strips out double spaces that are found in tombsone files and logcat output. 80 # 81 # Examples of matched trace lines include lines from tombstone files like: 82 # #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 83 # 84 # Or lines from AndroidFeedback crash report system logs like: 85 # 03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so 86 # Please note the spacing differences. 87 self.trace_line = re.compile( 88 ".*" # Random start stuff. 89 "\#(?P<frame>[0-9]+)" # Frame number. 90 "[ \t]+..[ \t]+" # (space)pc(space). 91 "(?P<offset>[0-9a-f]" + self.width + ")[ \t]+" # Offset (hex number given without 92 # 0x prefix). 93 "(?P<dso>\[[^\]]+\]|[^\r\n \t]*)" # Library name. 94 "( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?" # Offset into the file to find the start of the shared so. 95 "(?P<symbolpresent> \((?P<symbol>.*)\))?") # Is the symbol there? 96 # pylint: disable-msg=C6310 97 # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as 98 # its own regex. Example: 99 # 08-19 05:29:26.283 397 403 I : #0 0xb6a15237 (/system/lib/libclang_rt.asan-arm-android.so+0x4f237) 100 self.sanitizer_trace_line = re.compile( 101 ".*" # Random start stuff. 102 "\#(?P<frame>[0-9]+)" # Frame number. 103 "[ \t]+0x[0-9a-f]+[ \t]+" # PC, not interesting to us. 104 "\(" # Opening paren. 105 "(?P<dso>[^+]+)" # Library name. 106 "\+" # '+' 107 "0x(?P<offset>[0-9a-f]+)" # Offset (hex number given with 108 # 0x prefix). 109 "\)") # Closin paren. 110 # pylint: disable-msg=C6310 111 # Examples of matched value lines include: 112 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 113 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so (symbol) 114 # 03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so 115 # Again, note the spacing differences. 116 self.value_line = re.compile("(.*)([0-9a-f]" + self.width + ")[ \t]+([0-9a-f]" + self.width + ")[ \t]+([^\r\n \t]*)( \((.*)\))?") 117 # Lines from 'code around' sections of the output will be matched before 118 # value lines because otheriwse the 'code around' sections will be confused as 119 # value lines. 120 # 121 # Examples include: 122 # 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 123 # 03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 124 self.code_line = re.compile("(.*)[ \t]*[a-f0-9]" + self.width + 125 "[ \t]*[a-f0-9]" + self.width + 126 "[ \t]*[a-f0-9]" + self.width + 127 "[ \t]*[a-f0-9]" + self.width + 128 "[ \t]*[a-f0-9]" + self.width + 129 "[ \t]*[ \r\n]") # pylint: disable-msg=C6310 130 131 def CleanLine(self, ln): 132 # AndroidFeedback adds zero width spaces into its crash reports. These 133 # should be removed or the regular expresssions will fail to match. 134 return unicode(ln, errors='ignore') 135 136 def PrintTraceLines(self, trace_lines): 137 """Print back trace.""" 138 maxlen = max(map(lambda tl: len(tl[1]), trace_lines)) 139 print 140 print "Stack Trace:" 141 print " RELADDR " + self.spacing + "FUNCTION".ljust(maxlen) + " FILE:LINE" 142 for tl in self.trace_lines: 143 (addr, symbol_with_offset, location) = tl 144 print " %8s %s %s" % (addr, symbol_with_offset.ljust(maxlen), location) 145 return 146 147 def PrintValueLines(self, value_lines): 148 """Print stack data values.""" 149 maxlen = max(map(lambda tl: len(tl[2]), self.value_lines)) 150 print 151 print "Stack Data:" 152 print " ADDR " + self.spacing + "VALUE " + "FUNCTION".ljust(maxlen) + " FILE:LINE" 153 for vl in self.value_lines: 154 (addr, value, symbol_with_offset, location) = vl 155 print " %8s %8s %s %s" % (addr, value, symbol_with_offset.ljust(maxlen), location) 156 return 157 158 def PrintOutput(self, trace_lines, value_lines): 159 if self.trace_lines: 160 self.PrintTraceLines(self.trace_lines) 161 if self.value_lines: 162 self.PrintValueLines(self.value_lines) 163 164 def PrintDivider(self): 165 print 166 print "-----------------------------------------------------\n" 167 168 def DeleteApkTmpFiles(self): 169 for _, _, tmp_files in self.apk_info.values(): 170 for tmp_file in tmp_files.values(): 171 os.unlink(tmp_file) 172 173 def ConvertTrace(self, lines): 174 lines = map(self.CleanLine, lines) 175 try: 176 if not symbol.ARCH: 177 symbol.SetAbi(lines) 178 self.UpdateAbiRegexes() 179 for line in lines: 180 self.ProcessLine(line) 181 self.PrintOutput(self.trace_lines, self.value_lines) 182 finally: 183 # Delete any temporary files created while processing the lines. 184 self.DeleteApkTmpFiles() 185 186 def MatchTraceLine(self, line): 187 if self.trace_line.match(line): 188 match = self.trace_line.match(line) 189 return {"frame": match.group("frame"), 190 "offset": match.group("offset"), 191 "so_offset": match.group("so_offset"), 192 "dso": match.group("dso"), 193 "symbol_present": bool(match.group("symbolpresent")), 194 "symbol_name": match.group("symbol")} 195 if self.sanitizer_trace_line.match(line): 196 match = self.sanitizer_trace_line.match(line) 197 return {"frame": match.group("frame"), 198 "offset": match.group("offset"), 199 "so_offset": None, 200 "dso": match.group("dso"), 201 "symbol_present": False, 202 "symbol_name": None} 203 return None 204 205 def ExtractLibFromApk(self, apk, shared_lib_name): 206 # Create a temporary file containing the shared library from the apk. 207 tmp_file = None 208 try: 209 tmp_fd, tmp_file = tempfile.mkstemp() 210 if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0: 211 os.close(tmp_fd) 212 shared_file = tmp_file 213 tmp_file = None 214 return shared_file 215 finally: 216 if tmp_file: 217 os.close(tmp_fd) 218 os.unlink(tmp_file) 219 return None 220 221 def ProcessCentralInfo(self, offset_list, central_info): 222 match = self.zipinfo_central_info_match.search(central_info) 223 if not match: 224 raise Exception("Cannot find all info from zipinfo\n" + central_info) 225 name = match.group(1) 226 start = int(match.group(2)) 227 end = start + int(match.group(3)) 228 229 offset_list.append([name, start, end]) 230 return name, start, end 231 232 def GetLibFromApk(self, apk, offset): 233 # Convert the string to hex. 234 offset = int(offset, 16) 235 236 # Check if we already have information about this offset. 237 if apk in self.apk_info: 238 apk_full_path, offset_list, tmp_files = self.apk_info[apk] 239 for file_name, start, end in offset_list: 240 if offset >= start and offset < end: 241 if file_name in tmp_files: 242 return file_name, tmp_files[file_name] 243 tmp_file = self.ExtractLibFromApk(apk_full_path, file_name) 244 if tmp_file: 245 tmp_files[file_name] = tmp_file 246 return file_name, tmp_file 247 break 248 return None, None 249 250 if not "ANDROID_PRODUCT_OUT" in os.environ: 251 print "ANDROID_PRODUCT_OUT environment variable not set." 252 return None, None 253 out_dir = os.environ["ANDROID_PRODUCT_OUT"] 254 if not os.path.exists(out_dir): 255 print "ANDROID_PRODUCT_OUT " + out_dir + " does not exist." 256 return None, None 257 if apk.startswith("/"): 258 apk_full_path = out_dir + apk 259 else: 260 apk_full_path = os.path.join(out_dir, apk) 261 if not os.path.exists(apk_full_path): 262 print "Cannot find apk " + apk; 263 return None, None 264 265 cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE) 266 # Find the first central info marker. 267 for line in cmd.stdout: 268 if self.zipinfo_central_directory_line.search(line): 269 break 270 271 central_info = "" 272 file_name = None 273 offset_list = [] 274 for line in cmd.stdout: 275 match = self.zipinfo_central_directory_line.search(line) 276 if match: 277 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 278 if not file_name and offset >= start and offset < end: 279 file_name = cur_name 280 central_info = "" 281 else: 282 central_info += line 283 if central_info: 284 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) 285 if not file_name and offset >= start and offset < end: 286 file_name = cur_name 287 288 # Save the information from the zip. 289 tmp_files = dict() 290 self.apk_info[apk] = [apk_full_path, offset_list, tmp_files] 291 if not file_name: 292 return None, None 293 tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name) 294 if tmp_shared_lib: 295 tmp_files[file_name] = tmp_shared_lib 296 return file_name, tmp_shared_lib 297 return None, None 298 299 def ProcessLine(self, line): 300 ret = False 301 process_header = self.process_info_line.search(line) 302 signal_header = self.signal_line.search(line) 303 abort_message_header = self.abort_message_line.search(line) 304 thread_header = self.thread_line.search(line) 305 register_header = self.register_line.search(line) 306 revision_header = self.revision_line.search(line) 307 dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line) 308 dalvik_native_thread_header = self.dalvik_native_thread_line.search(line) 309 if process_header or signal_header or abort_message_header or thread_header or \ 310 register_header or dalvik_jni_thread_header or dalvik_native_thread_header or revision_header: 311 if self.trace_lines or self.value_lines: 312 self.PrintOutput(self.trace_lines, self.value_lines) 313 self.PrintDivider() 314 self.trace_lines = [] 315 self.value_lines = [] 316 self.last_frame = -1 317 if process_header: 318 print process_header.group(1) 319 if signal_header: 320 print signal_header.group(1) 321 if abort_message_header: 322 print abort_message_header.group(1) 323 if register_header: 324 print register_header.group(1) 325 if thread_header: 326 print thread_header.group(1) 327 if dalvik_jni_thread_header: 328 print dalvik_jni_thread_header.group(1) 329 if dalvik_native_thread_header: 330 print dalvik_native_thread_header.group(1) 331 if revision_header: 332 print revision_header.group(1) 333 return True 334 trace_line_dict = self.MatchTraceLine(line) 335 if trace_line_dict is not None: 336 ret = True 337 frame = trace_line_dict["frame"] 338 code_addr = trace_line_dict["offset"] 339 area = trace_line_dict["dso"] 340 so_offset = trace_line_dict["so_offset"] 341 symbol_present = trace_line_dict["symbol_present"] 342 symbol_name = trace_line_dict["symbol_name"] 343 344 if frame <= self.last_frame and (self.trace_lines or self.value_lines): 345 self.PrintOutput(self.trace_lines, self.value_lines) 346 self.PrintDivider() 347 self.trace_lines = [] 348 self.value_lines = [] 349 self.last_frame = frame 350 351 if area == "<unknown>" or area == "[heap]" or area == "[stack]": 352 self.trace_lines.append((code_addr, "", area)) 353 else: 354 # If this is an apk, it usually means that there is actually 355 # a shared so that was loaded directly out of it. In that case, 356 # extract the shared library and the name of the shared library. 357 lib = None 358 if area.endswith(".apk") and so_offset: 359 lib_name, lib = self.GetLibFromApk(area, so_offset) 360 if not lib: 361 lib = area 362 lib_name = None 363 364 # If a calls b which further calls c and c is inlined to b, we want to 365 # display "a -> b -> c" in the stack trace instead of just "a -> c" 366 info = symbol.SymbolInformation(lib, code_addr) 367 nest_count = len(info) - 1 368 for (source_symbol, source_location, object_symbol_with_offset) in info: 369 if not source_symbol: 370 if symbol_present: 371 source_symbol = symbol.CallCppFilt(symbol_name) 372 else: 373 source_symbol = "<unknown>" 374 if not source_location: 375 source_location = area 376 if lib_name: 377 source_location += "(" + lib_name + ")" 378 if nest_count > 0: 379 nest_count = nest_count - 1 380 arrow = "v------>" 381 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": 382 arrow = "v-------------->" 383 self.trace_lines.append((arrow, source_symbol, source_location)) 384 else: 385 if not object_symbol_with_offset: 386 object_symbol_with_offset = source_symbol 387 self.trace_lines.append((code_addr, 388 object_symbol_with_offset, 389 source_location)) 390 if self.code_line.match(line): 391 # Code lines should be ignored. If this were exluded the 'code around' 392 # sections would trigger value_line matches. 393 return ret 394 if self.value_line.match(line): 395 ret = True 396 match = self.value_line.match(line) 397 (unused_, addr, value, area, symbol_present, symbol_name) = match.groups() 398 if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area: 399 self.value_lines.append((addr, value, "", area)) 400 else: 401 info = symbol.SymbolInformation(area, value) 402 (source_symbol, source_location, object_symbol_with_offset) = info.pop() 403 if not source_symbol: 404 if symbol_present: 405 source_symbol = symbol.CallCppFilt(symbol_name) 406 else: 407 source_symbol = "<unknown>" 408 if not source_location: 409 source_location = area 410 if not object_symbol_with_offset: 411 object_symbol_with_offset = source_symbol 412 self.value_lines.append((addr, 413 value, 414 object_symbol_with_offset, 415 source_location)) 416 417 return ret 418 419 420 class RegisterPatternTests(unittest.TestCase): 421 def assert_register_matches(self, abi, example_crash, stupid_pattern): 422 tc = TraceConverter() 423 lines = example_crash.split('\n') 424 symbol.SetAbi(lines) 425 tc.UpdateAbiRegexes() 426 for line in lines: 427 tc.ProcessLine(line) 428 is_register = (re.search(stupid_pattern, line) is not None) 429 matched = (tc.register_line.search(line) is not None) 430 self.assertEquals(matched, is_register, line) 431 tc.PrintOutput(tc.trace_lines, tc.value_lines) 432 433 def test_arm_registers(self): 434 self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b') 435 436 def test_arm64_registers(self): 437 self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b') 438 439 def test_mips_registers(self): 440 self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b') 441 442 def test_mips64_registers(self): 443 self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b') 444 445 def test_x86_registers(self): 446 self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b') 447 448 def test_x86_64_registers(self): 449 self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b') 450 451 452 if __name__ == '__main__': 453 unittest.main() 454