Home | History | Annotate | Download | only in findit
      1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import re
      6 
      7 import crash_utils
      8 
      9 
     10 SYZYASAN_STACK_FRAME_PATTERN = re.compile(
     11     r'(CF: )?(.*?)( \(FPO: .*\) )?( \(CONV: .*\) )?\[(.*) @ (\d+)\]')
     12 FILE_PATH_AND_LINE_PATTERN = re.compile(r'(.*?):(\d+)(:\d+)?')
     13 
     14 
     15 class StackFrame(object):
     16   """Represents a frame in stacktrace.
     17 
     18   Attributes:
     19     index: An index of the stack frame.
     20     component_path: The path of the component this frame represents.
     21     component_name: The name of the component this frame represents.
     22     file_name: The name of the file that crashed.
     23     function: The function that caused the crash.
     24     file_path: The path of the crashed file.
     25     crashed_line_range: The line of the file that caused the crash.
     26   """
     27 
     28   def __init__(self, stack_frame_index, component_path, component_name,
     29                file_name, function, file_path, crashed_line_range):
     30     self.index = stack_frame_index
     31     self.component_path = component_path
     32     self.component_name = component_name
     33     self.file_name = file_name
     34     self.function = function
     35     self.file_path = file_path
     36     self.crashed_line_range = crashed_line_range
     37 
     38 
     39 class CallStack(object):
     40   """Represents a call stack within a stacktrace.
     41 
     42   It is a list of StackFrame object, and the object keeps track of whether
     43   the stack is crash stack, freed or previously-allocated.
     44   """
     45 
     46   def __init__(self, stack_priority):
     47     self.frame_list = []
     48     self.priority = stack_priority
     49 
     50   def Add(self, stacktrace_line):
     51     self.frame_list.append(stacktrace_line)
     52 
     53   def GetTopNFrames(self, n):
     54     return self.frame_list[:n]
     55 
     56 
     57 class Stacktrace(object):
     58   """Represents Stacktrace object.
     59 
     60   Contains a list of callstacks, because one stacktrace might have more than
     61   one callstacks.
     62   """
     63 
     64   def __init__(self, stacktrace, build_type, parsed_deps):
     65     self.stack_list = None
     66     self.ParseStacktrace(stacktrace, build_type, parsed_deps)
     67 
     68   def ParseStacktrace(self, stacktrace, build_type, parsed_deps):
     69     """Parses stacktrace and normalizes it.
     70 
     71     If there are multiple callstacks within the stacktrace,
     72     it will parse each of them separately, and store them in the stack_list
     73     variable.
     74 
     75     Args:
     76       stacktrace: A string containing stacktrace.
     77       build_type: A string containing the build type of the crash.
     78       parsed_deps: A parsed DEPS file to normalize path with.
     79     """
     80     # If the passed in string is empty, the object does not represent anything.
     81     if not stacktrace:
     82       return
     83     # Reset the stack list.
     84     self.stack_list = []
     85     reached_new_callstack = False
     86     # Note that we do not need exact stack frame index, we only need relative
     87     # position of a frame within a callstack. The reason for not extracting
     88     # index from a line is that some stack frames do not have index.
     89     stack_frame_index = 0
     90     current_stack = CallStack(-1)
     91 
     92     for line in stacktrace:
     93       line = line.strip()
     94       (is_new_callstack, stack_priority) = self.__IsStartOfNewCallStack(
     95           line, build_type)
     96       if is_new_callstack:
     97         # If this callstack is crash stack, update the boolean.
     98         if not reached_new_callstack:
     99           reached_new_callstack = True
    100           current_stack = CallStack(stack_priority)
    101 
    102         # If this is from freed or allocation, add the callstack we have
    103         # to the list of callstacks, and increment the stack priority.
    104         else:
    105           stack_frame_index = 0
    106           if current_stack and current_stack.frame_list:
    107             self.stack_list.append(current_stack)
    108           current_stack = CallStack(stack_priority)
    109 
    110       # Generate stack frame object from the line.
    111       parsed_stack_frame = self.__GenerateStackFrame(
    112           stack_frame_index, line, build_type, parsed_deps)
    113 
    114       # If the line does not represent the stack frame, ignore this line.
    115       if not parsed_stack_frame:
    116         continue
    117 
    118       # Add the parsed stack frame object to the current stack.
    119       current_stack.Add(parsed_stack_frame)
    120       stack_frame_index += 1
    121 
    122     # Add the current callstack only if there are frames in it.
    123     if current_stack and current_stack.frame_list:
    124       self.stack_list.append(current_stack)
    125 
    126   def __IsStartOfNewCallStack(self, line, build_type):
    127     """Check if this line is the start of the new callstack.
    128 
    129     Since each builds have different format of stacktrace, the logic for
    130     checking the line for all builds is handled in here.
    131 
    132     Args:
    133       line: Line to check for.
    134       build_type: The name of the build.
    135 
    136     Returns:
    137       True if the line is the start of new callstack, False otherwise. If True,
    138       it also returns the priority of the line.
    139     """
    140     if 'syzyasan' in build_type:
    141       # In syzyasan build, new stack starts with 'crash stack:',
    142       # 'freed stack:', etc.
    143       callstack_start_pattern = re.compile(r'^(.*) stack:$')
    144       match = callstack_start_pattern.match(line)
    145 
    146       # If the line matches the callstack start pattern.
    147       if match:
    148         # Check the type of the new match.
    149         stack_type = match.group(1)
    150 
    151         # Crash stack gets priority 0.
    152         if stack_type == 'Crash':
    153           return (True, 0)
    154 
    155         # Other callstacks all get priority 1.
    156         else:
    157           return (True, 1)
    158 
    159     elif 'tsan' in build_type:
    160       # Create patterns for each callstack type.
    161       crash_callstack_start_pattern1 = re.compile(
    162           r'^(Read|Write) of size \d+')
    163 
    164       crash_callstack_start_pattern2 = re.compile(
    165           r'^[A-Z]+: ThreadSanitizer')
    166 
    167       allocation_callstack_start_pattern = re.compile(
    168           r'^Previous (write|read) of size \d+')
    169 
    170       location_callstack_start_pattern = re.compile(
    171           r'^Location is heap block of size \d+')
    172 
    173       # Crash stack gets priority 0.
    174       if (crash_callstack_start_pattern1.match(line) or
    175           crash_callstack_start_pattern2.match(line)):
    176         return (True, 0)
    177 
    178       # All other stacks get priority 1.
    179       if allocation_callstack_start_pattern.match(line):
    180         return (True, 1)
    181 
    182       if location_callstack_start_pattern.match(line):
    183         return (True, 1)
    184 
    185     else:
    186       # In asan and other build types, crash stack can start
    187       # in two different ways.
    188       crash_callstack_start_pattern1 = re.compile(r'^==\d+== ?[A-Z]+:')
    189       crash_callstack_start_pattern2 = re.compile(
    190           r'^(READ|WRITE) of size \d+ at')
    191       crash_callstack_start_pattern3 = re.compile(r'^backtrace:')
    192 
    193       freed_callstack_start_pattern = re.compile(
    194           r'^freed by thread T\d+ (.* )?here:')
    195 
    196       allocation_callstack_start_pattern = re.compile(
    197           r'^previously allocated by thread T\d+ (.* )?here:')
    198 
    199       other_callstack_start_pattern = re.compile(
    200           r'^Thread T\d+ (.* )?created by')
    201 
    202       # Crash stack gets priority 0.
    203       if (crash_callstack_start_pattern1.match(line) or
    204           crash_callstack_start_pattern2.match(line) or
    205           crash_callstack_start_pattern3.match(line)):
    206         return (True, 0)
    207 
    208       # All other callstack gets priority 1.
    209       if freed_callstack_start_pattern.match(line):
    210         return (True, 1)
    211 
    212       if allocation_callstack_start_pattern.match(line):
    213         return (True, 1)
    214 
    215       if other_callstack_start_pattern.match(line):
    216         return (True, 1)
    217 
    218     # If the line does not match any pattern, return false and a dummy for
    219     # stack priority.
    220     return (False, -1)
    221 
    222   def __GenerateStackFrame(self, stack_frame_index, line, build_type,
    223                            parsed_deps):
    224     """Extracts information from a line in stacktrace.
    225 
    226     Args:
    227       stack_frame_index: A stack frame index of this line.
    228       line: A stacktrace string to extract data from.
    229       build_type: A string containing the build type
    230                     of this crash (e.g. linux_asan_chrome_mp).
    231       parsed_deps: A parsed DEPS file to normalize path with.
    232 
    233     Returns:
    234       A triple containing the name of the function, the path of the file and
    235       the crashed line number.
    236     """
    237     line_parts = line.split()
    238     try:
    239 
    240       if 'syzyasan' in build_type:
    241         stack_frame_match = SYZYASAN_STACK_FRAME_PATTERN.match(line)
    242 
    243         if not stack_frame_match:
    244           return None
    245         file_path = stack_frame_match.group(5)
    246         crashed_line_range = [int(stack_frame_match.group(6))]
    247         function = stack_frame_match.group(2)
    248 
    249       else:
    250         if not line_parts[0].startswith('#'):
    251           return None
    252 
    253         if 'tsan' in build_type:
    254           file_path_and_line = line_parts[-2]
    255           function = ' '.join(line_parts[1:-2])
    256         else:
    257           file_path_and_line = line_parts[-1]
    258           function = ' '.join(line_parts[3:-1])
    259 
    260         # Get file path and line info from the line.
    261         file_path_and_line_match = FILE_PATH_AND_LINE_PATTERN.match(
    262             file_path_and_line)
    263 
    264         # Return None if the file path information is not available
    265         if not file_path_and_line_match:
    266           return None
    267 
    268         file_path = file_path_and_line_match.group(1)
    269 
    270         # Get the crashed line range. For example, file_path:line_number:range.
    271         crashed_line_range_num = file_path_and_line_match.group(3)
    272 
    273         if crashed_line_range_num:
    274           # Strip ':' prefix.
    275           crashed_line_range_num = int(crashed_line_range_num[1:])
    276         else:
    277           crashed_line_range_num = 0
    278 
    279         crashed_line_number = int(file_path_and_line_match.group(2))
    280         # For example, 655:1 has crashed lines 655 and 656.
    281         crashed_line_range = \
    282             range(crashed_line_number,
    283                   crashed_line_number + crashed_line_range_num + 1)
    284 
    285     # Return None if the line is malformed.
    286     except IndexError:
    287       return None
    288     except ValueError:
    289       return None
    290 
    291     # Normalize the file path so that it can be compared to repository path.
    292     (component_path, component_name, file_path) = (
    293         crash_utils.NormalizePath(file_path, parsed_deps))
    294 
    295     # Return a new stack frame object with the parsed information.
    296     file_name = file_path.split('/')[-1]
    297 
    298     # If we have the common stack frame index pattern, then use it
    299     # since it is more reliable.
    300     index_match = re.match('\s*#(\d+)\s.*', line)
    301     if index_match:
    302       stack_frame_index = int(index_match.group(1))
    303 
    304     return StackFrame(stack_frame_index, component_path, component_name,
    305                       file_name, function, file_path, crashed_line_range)
    306 
    307   def __getitem__(self, index):
    308     return self.stack_list[index]
    309 
    310   def GetCrashStack(self):
    311     """Returns the callstack with the highest priority.
    312 
    313     Crash stack has priority 0, and allocation/freed/other thread stacks
    314     get priority 1.
    315 
    316     Returns:
    317       The highest priority callstack in the stacktrace.
    318     """
    319     sorted_stacklist = sorted(self.stack_list,
    320                               key=lambda callstack: callstack.priority)
    321     return sorted_stacklist[0]
    322