Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2013 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """stack symbolizes native crash dumps."""
     18 
     19 import os
     20 import re
     21 import subprocess
     22 import symbol
     23 import tempfile
     24 import unittest
     25 
     26 import example_crashes
     27 
     28 def ConvertTrace(lines):
     29   tracer = TraceConverter()
     30   print "Reading symbols from", symbol.SYMBOLS_DIR
     31   tracer.ConvertTrace(lines)
     32 
     33 class TraceConverter:
     34   process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)")
     35   revision_line = re.compile("(Revision: \'(.*)\')")
     36   signal_line = re.compile("(signal [0-9]+ \(.*\).*)")
     37   abort_message_line = re.compile("(Abort message: '.*')")
     38   thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-")
     39   dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)")
     40   dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)")
     41   register_line = re.compile("$a")
     42   trace_line = re.compile("$a")
     43   sanitizer_trace_line = re.compile("$a")
     44   value_line = re.compile("$a")
     45   code_line = re.compile("$a")
     46   zipinfo_central_directory_line = re.compile("Central\s+directory\s+entry")
     47   zipinfo_central_info_match = re.compile(
     48       "^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)"
     49       ".*^\s*compressed size:\s+(\d+)", re.M | re.S)
     50   trace_lines = []
     51   value_lines = []
     52   last_frame = -1
     53   width = "{8}"
     54   spacing = ""
     55   apk_info = dict()
     56 
     57   register_names = {
     58     "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr",
     59     "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate",
     60     "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
     61     "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
     62     "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags",
     63     "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags",
     64   }
     65 
     66   def UpdateAbiRegexes(self):
     67     if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
     68       self.width = "{16}"
     69       self.spacing = "        "
     70     else:
     71       self.width = "{8}"
     72       self.spacing = ""
     73 
     74     self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})")
     75 
     76     # Note that both trace and value line matching allow for variable amounts of
     77     # whitespace (e.g. \t). This is because the we want to allow for the stack
     78     # tool to operate on AndroidFeedback provided system logs. AndroidFeedback
     79     # strips out double spaces that are found in tombsone files and logcat output.
     80     #
     81     # Examples of matched trace lines include lines from tombstone files like:
     82     #   #00  pc 001cf42e  /data/data/com.my.project/lib/libmyproject.so
     83     #
     84     # Or lines from AndroidFeedback crash report system logs like:
     85     #   03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
     86     # Please note the spacing differences.
     87     self.trace_line = re.compile(
     88         ".*"                                                 # Random start stuff.
     89         "\#(?P<frame>[0-9]+)"                                # Frame number.
     90         "[ \t]+..[ \t]+"                                     # (space)pc(space).
     91         "(?P<offset>[0-9a-f]" + self.width + ")[ \t]+"       # Offset (hex number given without
     92                                                              #         0x prefix).
     93         "(?P<dso>\[[^\]]+\]|[^\r\n \t]*)"                    # Library name.
     94         "( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?"       # Offset into the file to find the start of the shared so.
     95         "(?P<symbolpresent> \((?P<symbol>.*)\))?")           # Is the symbol there?
     96                                                              # pylint: disable-msg=C6310
     97     # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as
     98     # its own regex. Example:
     99     # 08-19 05:29:26.283   397   403 I         :     #0 0xb6a15237  (/system/lib/libclang_rt.asan-arm-android.so+0x4f237)
    100     self.sanitizer_trace_line = re.compile(
    101         ".*"                                                 # Random start stuff.
    102         "\#(?P<frame>[0-9]+)"                                # Frame number.
    103         "[ \t]+0x[0-9a-f]+[ \t]+"                            # PC, not interesting to us.
    104         "\("                                                 # Opening paren.
    105         "(?P<dso>[^+]+)"                                     # Library name.
    106         "\+"                                                 # '+'
    107         "0x(?P<offset>[0-9a-f]+)"                            # Offset (hex number given with
    108                                                              #         0x prefix).
    109         "\)")                                                # Closin paren.
    110                                                              # pylint: disable-msg=C6310
    111     # Examples of matched value lines include:
    112     #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so
    113     #   bea4170c  8018e4e9  /data/data/com.my.project/lib/libmyproject.so (symbol)
    114     #   03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
    115     # Again, note the spacing differences.
    116     self.value_line = re.compile("(.*)([0-9a-f]" + self.width + ")[ \t]+([0-9a-f]" + self.width + ")[ \t]+([^\r\n \t]*)( \((.*)\))?")
    117     # Lines from 'code around' sections of the output will be matched before
    118     # value lines because otheriwse the 'code around' sections will be confused as
    119     # value lines.
    120     #
    121     # Examples include:
    122     #   801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
    123     #   03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
    124     self.code_line = re.compile("(.*)[ \t]*[a-f0-9]" + self.width +
    125                                 "[ \t]*[a-f0-9]" + self.width +
    126                                 "[ \t]*[a-f0-9]" + self.width +
    127                                 "[ \t]*[a-f0-9]" + self.width +
    128                                 "[ \t]*[a-f0-9]" + self.width +
    129                                 "[ \t]*[ \r\n]")  # pylint: disable-msg=C6310
    130 
    131   def CleanLine(self, ln):
    132     # AndroidFeedback adds zero width spaces into its crash reports. These
    133     # should be removed or the regular expresssions will fail to match.
    134     return unicode(ln, errors='ignore')
    135 
    136   def PrintTraceLines(self, trace_lines):
    137     """Print back trace."""
    138     maxlen = max(map(lambda tl: len(tl[1]), trace_lines))
    139     print
    140     print "Stack Trace:"
    141     print "  RELADDR   " + self.spacing + "FUNCTION".ljust(maxlen) + "  FILE:LINE"
    142     for tl in self.trace_lines:
    143       (addr, symbol_with_offset, location) = tl
    144       print "  %8s  %s  %s" % (addr, symbol_with_offset.ljust(maxlen), location)
    145     return
    146 
    147   def PrintValueLines(self, value_lines):
    148     """Print stack data values."""
    149     maxlen = max(map(lambda tl: len(tl[2]), self.value_lines))
    150     print
    151     print "Stack Data:"
    152     print "  ADDR      " + self.spacing + "VALUE     " + "FUNCTION".ljust(maxlen) + "  FILE:LINE"
    153     for vl in self.value_lines:
    154       (addr, value, symbol_with_offset, location) = vl
    155       print "  %8s  %8s  %s  %s" % (addr, value, symbol_with_offset.ljust(maxlen), location)
    156     return
    157 
    158   def PrintOutput(self, trace_lines, value_lines):
    159     if self.trace_lines:
    160       self.PrintTraceLines(self.trace_lines)
    161     if self.value_lines:
    162       self.PrintValueLines(self.value_lines)
    163 
    164   def PrintDivider(self):
    165     print
    166     print "-----------------------------------------------------\n"
    167 
    168   def DeleteApkTmpFiles(self):
    169     for _, _, tmp_files in self.apk_info.values():
    170       for tmp_file in tmp_files.values():
    171         os.unlink(tmp_file)
    172 
    173   def ConvertTrace(self, lines):
    174     lines = map(self.CleanLine, lines)
    175     try:
    176       if not symbol.ARCH:
    177         symbol.SetAbi(lines)
    178       self.UpdateAbiRegexes()
    179       for line in lines:
    180         self.ProcessLine(line)
    181       self.PrintOutput(self.trace_lines, self.value_lines)
    182     finally:
    183       # Delete any temporary files created while processing the lines.
    184       self.DeleteApkTmpFiles()
    185 
    186   def MatchTraceLine(self, line):
    187     if self.trace_line.match(line):
    188       match = self.trace_line.match(line)
    189       return {"frame": match.group("frame"),
    190               "offset": match.group("offset"),
    191               "so_offset": match.group("so_offset"),
    192               "dso": match.group("dso"),
    193               "symbol_present": bool(match.group("symbolpresent")),
    194               "symbol_name": match.group("symbol")}
    195     if self.sanitizer_trace_line.match(line):
    196       match = self.sanitizer_trace_line.match(line)
    197       return {"frame": match.group("frame"),
    198               "offset": match.group("offset"),
    199               "so_offset": None,
    200               "dso": match.group("dso"),
    201               "symbol_present": False,
    202               "symbol_name": None}
    203     return None
    204 
    205   def ExtractLibFromApk(self, apk, shared_lib_name):
    206     # Create a temporary file containing the shared library from the apk.
    207     tmp_file = None
    208     try:
    209       tmp_fd, tmp_file = tempfile.mkstemp()
    210       if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0:
    211         os.close(tmp_fd)
    212         shared_file = tmp_file
    213         tmp_file = None
    214         return shared_file
    215     finally:
    216       if tmp_file:
    217         os.close(tmp_fd)
    218         os.unlink(tmp_file)
    219     return None
    220 
    221   def ProcessCentralInfo(self, offset_list, central_info):
    222     match = self.zipinfo_central_info_match.search(central_info)
    223     if not match:
    224       raise Exception("Cannot find all info from zipinfo\n" + central_info)
    225     name = match.group(1)
    226     start = int(match.group(2))
    227     end = start + int(match.group(3))
    228 
    229     offset_list.append([name, start, end])
    230     return name, start, end
    231 
    232   def GetLibFromApk(self, apk, offset):
    233     # Convert the string to hex.
    234     offset = int(offset, 16)
    235 
    236     # Check if we already have information about this offset.
    237     if apk in self.apk_info:
    238       apk_full_path, offset_list, tmp_files = self.apk_info[apk]
    239       for file_name, start, end in offset_list:
    240         if offset >= start and offset < end:
    241           if file_name in tmp_files:
    242             return file_name, tmp_files[file_name]
    243           tmp_file = self.ExtractLibFromApk(apk_full_path, file_name)
    244           if tmp_file:
    245             tmp_files[file_name] = tmp_file
    246             return file_name, tmp_file
    247           break
    248       return None, None
    249 
    250     if not "ANDROID_PRODUCT_OUT" in os.environ:
    251       print "ANDROID_PRODUCT_OUT environment variable not set."
    252       return None, None
    253     out_dir = os.environ["ANDROID_PRODUCT_OUT"]
    254     if not os.path.exists(out_dir):
    255       print "ANDROID_PRODUCT_OUT " + out_dir + " does not exist."
    256       return None, None
    257     if apk.startswith("/"):
    258       apk_full_path = out_dir + apk
    259     else:
    260       apk_full_path = os.path.join(out_dir, apk)
    261     if not os.path.exists(apk_full_path):
    262       print "Cannot find apk " + apk;
    263       return None, None
    264 
    265     cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE)
    266     # Find the first central info marker.
    267     for line in cmd.stdout:
    268       if self.zipinfo_central_directory_line.search(line):
    269         break
    270 
    271     central_info = ""
    272     file_name = None
    273     offset_list = []
    274     for line in cmd.stdout:
    275       match = self.zipinfo_central_directory_line.search(line)
    276       if match:
    277         cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
    278         if not file_name and offset >= start and offset < end:
    279           file_name = cur_name
    280         central_info = ""
    281       else:
    282         central_info += line
    283     if central_info:
    284       cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
    285       if not file_name and offset >= start and offset < end:
    286         file_name = cur_name
    287 
    288     # Save the information from the zip.
    289     tmp_files = dict()
    290     self.apk_info[apk] = [apk_full_path, offset_list, tmp_files]
    291     if not file_name:
    292       return None, None
    293     tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name)
    294     if tmp_shared_lib:
    295       tmp_files[file_name] = tmp_shared_lib
    296       return file_name, tmp_shared_lib
    297     return None, None
    298 
    299   def ProcessLine(self, line):
    300     ret = False
    301     process_header = self.process_info_line.search(line)
    302     signal_header = self.signal_line.search(line)
    303     abort_message_header = self.abort_message_line.search(line)
    304     thread_header = self.thread_line.search(line)
    305     register_header = self.register_line.search(line)
    306     revision_header = self.revision_line.search(line)
    307     dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line)
    308     dalvik_native_thread_header = self.dalvik_native_thread_line.search(line)
    309     if process_header or signal_header or abort_message_header or thread_header or \
    310         register_header or dalvik_jni_thread_header or dalvik_native_thread_header or revision_header:
    311       if self.trace_lines or self.value_lines:
    312         self.PrintOutput(self.trace_lines, self.value_lines)
    313         self.PrintDivider()
    314         self.trace_lines = []
    315         self.value_lines = []
    316         self.last_frame = -1
    317       if process_header:
    318         print process_header.group(1)
    319       if signal_header:
    320         print signal_header.group(1)
    321       if abort_message_header:
    322         print abort_message_header.group(1)
    323       if register_header:
    324         print register_header.group(1)
    325       if thread_header:
    326         print thread_header.group(1)
    327       if dalvik_jni_thread_header:
    328         print dalvik_jni_thread_header.group(1)
    329       if dalvik_native_thread_header:
    330         print dalvik_native_thread_header.group(1)
    331       if revision_header:
    332         print revision_header.group(1)
    333       return True
    334     trace_line_dict = self.MatchTraceLine(line)
    335     if trace_line_dict is not None:
    336       ret = True
    337       frame = trace_line_dict["frame"]
    338       code_addr = trace_line_dict["offset"]
    339       area = trace_line_dict["dso"]
    340       so_offset = trace_line_dict["so_offset"]
    341       symbol_present = trace_line_dict["symbol_present"]
    342       symbol_name = trace_line_dict["symbol_name"]
    343 
    344       if frame <= self.last_frame and (self.trace_lines or self.value_lines):
    345         self.PrintOutput(self.trace_lines, self.value_lines)
    346         self.PrintDivider()
    347         self.trace_lines = []
    348         self.value_lines = []
    349       self.last_frame = frame
    350 
    351       if area == "<unknown>" or area == "[heap]" or area == "[stack]":
    352         self.trace_lines.append((code_addr, "", area))
    353       else:
    354         # If this is an apk, it usually means that there is actually
    355         # a shared so that was loaded directly out of it. In that case,
    356         # extract the shared library and the name of the shared library.
    357         lib = None
    358         if area.endswith(".apk") and so_offset:
    359           lib_name, lib = self.GetLibFromApk(area, so_offset)
    360         if not lib:
    361           lib = area
    362           lib_name = None
    363 
    364         # If a calls b which further calls c and c is inlined to b, we want to
    365         # display "a -> b -> c" in the stack trace instead of just "a -> c"
    366         info = symbol.SymbolInformation(lib, code_addr)
    367         nest_count = len(info) - 1
    368         for (source_symbol, source_location, object_symbol_with_offset) in info:
    369           if not source_symbol:
    370             if symbol_present:
    371               source_symbol = symbol.CallCppFilt(symbol_name)
    372             else:
    373               source_symbol = "<unknown>"
    374           if not source_location:
    375             source_location = area
    376             if lib_name:
    377               source_location += "(" + lib_name + ")"
    378           if nest_count > 0:
    379             nest_count = nest_count - 1
    380             arrow = "v------>"
    381             if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
    382               arrow = "v-------------->"
    383             self.trace_lines.append((arrow, source_symbol, source_location))
    384           else:
    385             if not object_symbol_with_offset:
    386               object_symbol_with_offset = source_symbol
    387             self.trace_lines.append((code_addr,
    388                                 object_symbol_with_offset,
    389                                 source_location))
    390     if self.code_line.match(line):
    391       # Code lines should be ignored. If this were exluded the 'code around'
    392       # sections would trigger value_line matches.
    393       return ret
    394     if self.value_line.match(line):
    395       ret = True
    396       match = self.value_line.match(line)
    397       (unused_, addr, value, area, symbol_present, symbol_name) = match.groups()
    398       if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area:
    399         self.value_lines.append((addr, value, "", area))
    400       else:
    401         info = symbol.SymbolInformation(area, value)
    402         (source_symbol, source_location, object_symbol_with_offset) = info.pop()
    403         if not source_symbol:
    404           if symbol_present:
    405             source_symbol = symbol.CallCppFilt(symbol_name)
    406           else:
    407             source_symbol = "<unknown>"
    408         if not source_location:
    409           source_location = area
    410         if not object_symbol_with_offset:
    411           object_symbol_with_offset = source_symbol
    412         self.value_lines.append((addr,
    413                             value,
    414                             object_symbol_with_offset,
    415                             source_location))
    416 
    417     return ret
    418 
    419 
    420 class RegisterPatternTests(unittest.TestCase):
    421   def assert_register_matches(self, abi, example_crash, stupid_pattern):
    422     tc = TraceConverter()
    423     lines = example_crash.split('\n')
    424     symbol.SetAbi(lines)
    425     tc.UpdateAbiRegexes()
    426     for line in lines:
    427       tc.ProcessLine(line)
    428       is_register = (re.search(stupid_pattern, line) is not None)
    429       matched = (tc.register_line.search(line) is not None)
    430       self.assertEquals(matched, is_register, line)
    431     tc.PrintOutput(tc.trace_lines, tc.value_lines)
    432 
    433   def test_arm_registers(self):
    434     self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b')
    435 
    436   def test_arm64_registers(self):
    437     self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b')
    438 
    439   def test_mips_registers(self):
    440     self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b')
    441 
    442   def test_mips64_registers(self):
    443     self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b')
    444 
    445   def test_x86_registers(self):
    446     self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b')
    447 
    448   def test_x86_64_registers(self):
    449     self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b')
    450 
    451 
    452 if __name__ == '__main__':
    453     unittest.main()
    454