Home | History | Annotate | Download | only in findit
      1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import atexit
      6 import cgi
      7 import ConfigParser
      8 import json
      9 import os
     10 import Queue
     11 import threading
     12 import time
     13 
     14 from common import utils
     15 from result import Result
     16 
     17 
     18 INFINITY = float('inf')
     19 
     20 MAX_THREAD_NUMBER = 10
     21 TASK_QUEUE = None
     22 
     23 
     24 def SignalWorkerThreads():
     25   global TASK_QUEUE
     26   if not TASK_QUEUE:
     27     return
     28 
     29   for i in range(MAX_THREAD_NUMBER):
     30     TASK_QUEUE.put(None)
     31 
     32   # Give worker threads a chance to exit.
     33   # Workaround the harmless bug in python 2.7 below.
     34   time.sleep(1)
     35 
     36 
     37 atexit.register(SignalWorkerThreads)
     38 
     39 
     40 def Worker():
     41   global TASK_QUEUE
     42   while True:
     43     try:
     44       task = TASK_QUEUE.get()
     45       if not task:
     46         return
     47     except TypeError:
     48       # According to http://bugs.python.org/issue14623, this is a harmless bug
     49       # in python 2.7 which won't be fixed.
     50       # The exception is raised on daemon threads when python interpreter is
     51       # shutting down.
     52       return
     53 
     54     function, args, kwargs, result_semaphore = task
     55     try:
     56       function(*args, **kwargs)
     57     except:
     58       pass
     59     finally:
     60       # Signal one task is done in case of exception.
     61       result_semaphore.release()
     62 
     63 
     64 def RunTasks(tasks):
     65   """Run given tasks. Not thread-safe: no concurrent calls of this function.
     66 
     67   Return after all tasks were completed. A task is a dict as below:
     68     {
     69       'function': the function to call,
     70       'args': the positional argument to pass to the function,
     71       'kwargs': the key-value arguments to pass to the function,
     72     }
     73   """
     74   if not tasks:
     75     return
     76 
     77   global TASK_QUEUE
     78   if not TASK_QUEUE:
     79     TASK_QUEUE = Queue.Queue()
     80     for index in range(MAX_THREAD_NUMBER):
     81       thread = threading.Thread(target=Worker, name='worker_%s' % index)
     82       # Set as daemon, so no join is needed.
     83       thread.daemon = True
     84       thread.start()
     85 
     86   result_semaphore = threading.Semaphore(0)
     87   # Push task to task queue for execution.
     88   for task in tasks:
     89     TASK_QUEUE.put(
     90         (task['function'], task.get('args', []),
     91          task.get('kwargs', {}), result_semaphore))
     92 
     93   # Wait until all tasks to be executed.
     94   for _ in tasks:
     95     result_semaphore.acquire()
     96 
     97 
     98 def GetRepositoryType(revision_number):
     99   """Returns the repository type of this revision number.
    100 
    101   Args:
    102     revision_number: A revision number or git hash.
    103 
    104   Returns:
    105     'git' or 'svn', depending on the revision_number.
    106   """
    107   if utils.IsGitHash(revision_number):
    108     return 'git'
    109   else:
    110     return 'svn'
    111 
    112 
    113 def ParseURLsFromConfig(file_name):
    114   """Parses URLS from the config file.
    115 
    116   The file should be in python config format, where svn section is in the
    117   format "svn:component_path".
    118   Each of the section for svn should contain changelog_url, revision_url,
    119   diff_url and blame_url.
    120 
    121   Args:
    122     file_name: The name of the file that contains URL information.
    123 
    124   Returns:
    125     A dictionary that maps repository type to list of URLs. For svn, it maps
    126     key 'svn' to another dictionary, which maps component path to the URLs
    127     as explained above. For git, it maps to the URLs as explained above.
    128   """
    129   config = ConfigParser.ConfigParser()
    130 
    131   # Get the absolute path of the config file, and read the file. If it fails,
    132   # return none.
    133   config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
    134                                   file_name)
    135   config.read(config_file_path)
    136   if not config:
    137     return None
    138 
    139   # Iterate through the config file, check for sections.
    140   config_dict = {}
    141   for section in config.sections():
    142     # These two do not need another layer of dictionary, so add it and go
    143     # to next section.
    144     if ':' not in section:
    145       for option in config.options(section):
    146         if section not in config_dict:
    147           config_dict[section] = {}
    148 
    149         url = config.get(section, option)
    150         config_dict[section][option] = url
    151 
    152       continue
    153 
    154     # Get repository type and component name from the section name.
    155     repository_type_and_component = section.split(':')
    156     repository_type = repository_type_and_component[0]
    157     component_path = repository_type_and_component[1]
    158 
    159     # Add 'svn' as the key, if it is not already there.
    160     if repository_type not in config_dict:
    161       config_dict[repository_type] = {}
    162     url_map_for_repository = config_dict[repository_type]
    163 
    164     # Add the path to the 'svn', if it is not already there.
    165     if component_path not in url_map_for_repository:
    166       url_map_for_repository[component_path] = {}
    167     type_to_url = url_map_for_repository[component_path]
    168 
    169     # Add all URLs to this map.
    170     for option in config.options(section):
    171       url = config.get(section, option)
    172       type_to_url[option] = url
    173 
    174   return config_dict
    175 
    176 
    177 def NormalizePath(path, parsed_deps):
    178   """Normalizes the path.
    179 
    180   Args:
    181     path: A string representing a path.
    182     parsed_deps: A map from component path to its component name, repository,
    183                  etc.
    184 
    185   Returns:
    186     A tuple containing a component this path is in (e.g blink, skia, etc)
    187     and a path in that component's repository. Returns None if the component
    188     repository is not supported, i.e from googlecode.
    189   """
    190   # First normalize the path by retreiving the normalized path.
    191   normalized_path = os.path.normpath(path).replace('\\', '/')
    192 
    193   # Iterate through all component paths in the parsed DEPS, in the decreasing
    194   # order of the length of the file path.
    195   for component_path in sorted(parsed_deps,
    196                                key=(lambda path: -len(path))):
    197     # new_component_path is the component path with 'src/' removed.
    198     new_component_path = component_path
    199     if new_component_path.startswith('src/') and new_component_path != 'src/':
    200       new_component_path = new_component_path[len('src/'):]
    201 
    202     # We need to consider when the lowercased component path is in the path,
    203     # because syzyasan build returns lowercased file path.
    204     lower_component_path = new_component_path.lower()
    205 
    206     # If this path is the part of file path, this file must be from this
    207     # component.
    208     if new_component_path in normalized_path or \
    209         lower_component_path in normalized_path:
    210 
    211       # Case when the retreived path is in lowercase.
    212       if lower_component_path in normalized_path:
    213         current_component_path = lower_component_path
    214       else:
    215         current_component_path = new_component_path
    216 
    217       # Normalize the path by stripping everything off the component's relative
    218       # path.
    219       normalized_path = normalized_path.split(current_component_path, 1)[1]
    220       lower_normalized_path = normalized_path.lower()
    221 
    222       # Add 'src/' or 'Source/' at the front of the normalized path, depending
    223       # on what prefix the component path uses. For example, blink uses
    224       # 'Source' but chromium uses 'src/', and blink component path is
    225       # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
    226       # normalized path.
    227       if not (lower_normalized_path.startswith('src/') or
    228               lower_normalized_path.startswith('source/')):
    229 
    230         if (lower_component_path.endswith('src/') or
    231             lower_component_path.endswith('source/')):
    232           normalized_path = (current_component_path.split('/')[-2] + '/' +
    233                              normalized_path)
    234 
    235         else:
    236           normalized_path = 'src/' + normalized_path
    237 
    238       component_name = parsed_deps[component_path]['name']
    239 
    240       return (component_path, component_name, normalized_path)
    241 
    242   # If the path does not match any component, default to chromium.
    243   return ('src/', 'chromium', normalized_path)
    244 
    245 
    246 def SplitRange(regression):
    247   """Splits a range as retrieved from clusterfuzz.
    248 
    249   Args:
    250     regression: A string in format 'r1234:r5678'.
    251 
    252   Returns:
    253     A list containing two numbers represented in string, for example
    254     ['1234','5678'].
    255   """
    256   if not regression:
    257     return None
    258 
    259   revisions = regression.split(':')
    260 
    261   # If regression information is not available, return none.
    262   if len(revisions) != 2:
    263     return None
    264 
    265   range_start = revisions[0]
    266   range_end = revisions[1]
    267 
    268   # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
    269   # the range is in git hash and it starts with 'r'.
    270   if range_start.startswith('r'):
    271     range_start = range_start[1:]
    272 
    273   if range_end.startswith('r'):
    274     range_end = range_end[1:]
    275 
    276   return [range_start, range_end]
    277 
    278 
    279 def LoadJSON(json_string):
    280   """Loads json object from string, or None.
    281 
    282   Args:
    283     json_string: A string to get object from.
    284 
    285   Returns:
    286     JSON object if the string represents a JSON object, None otherwise.
    287   """
    288   try:
    289     data = json.loads(json_string)
    290   except ValueError:
    291     data = None
    292 
    293   return data
    294 
    295 
    296 def GetDataFromURL(url):
    297   """Retrieves raw data from URL, tries 10 times.
    298 
    299   Args:
    300     url: URL to get data from.
    301     retries: Number of times to retry connection.
    302 
    303   Returns:
    304     None if the data retrieval fails, or the raw data.
    305   """
    306   status_code, data = utils.GetHttpClient().Get(url, retries=10)
    307   if status_code == 200:
    308     return data
    309   else:
    310     # Return None if it fails to read data.
    311     return None
    312 
    313 
    314 def FindMinLineDistance(crashed_line_list, changed_line_numbers,
    315                         line_range=3):
    316   """Calculates how far the changed line is from one of the crashes.
    317 
    318   Finds the minimum distance between the lines that the file crashed on
    319   and the lines that the file changed. For example, if the file crashed on
    320   line 200 and the CL changes line 203,204 and 205, the function returns 3.
    321 
    322   Args:
    323     crashed_line_list: A list of lines that the file crashed on.
    324     changed_line_numbers: A list of lines that the file changed.
    325     line_range: Number of lines to look back for.
    326 
    327   Returns:
    328     The minimum distance. If either of the input lists is empty,
    329     it returns inf.
    330 
    331   """
    332   min_distance = INFINITY
    333   crashed_line = -1
    334   changed_line = -1
    335 
    336   crashed_line_numbers = set()
    337   for crashed_line_range in crashed_line_list:
    338     for crashed_line in crashed_line_range:
    339       for line in range(crashed_line - line_range, crashed_line + 1):
    340         crashed_line_numbers.add(line)
    341 
    342   for line in crashed_line_numbers:
    343     for distance in changed_line_numbers:
    344       # Find the current distance and update the min if current distance is
    345       # less than current min.
    346       current_distance = abs(line - distance)
    347       if current_distance < min_distance:
    348         min_distance = current_distance
    349         crashed_line = line
    350         changed_line = distance
    351 
    352   return (min_distance, crashed_line, changed_line)
    353 
    354 
    355 def GuessIfSameSubPath(path1, path2):
    356   """Guesses if two paths represent same path.
    357 
    358   Compares the name of the folders in the path (by split('/')), and checks
    359   if they match either more than 3 or min of path lengths.
    360 
    361   Args:
    362     path1: First path.
    363     path2: Second path to compare.
    364 
    365   Returns:
    366     True if it they are thought to be a same path, False otherwise.
    367   """
    368   path1 = path1.split('/')
    369   path2 = path2.split('/')
    370 
    371   intersection = set(path1).intersection(set(path2))
    372   return len(intersection) >= (min(3, min(len(path1), len(path2))))
    373 
    374 
    375 def FindMinStackFrameNumber(stack_frame_indices, priorities):
    376   """Finds the minimum stack number, from the list of stack numbers.
    377 
    378   Args:
    379     stack_frame_indices: A list of lists containing stack position.
    380     priorities: A list of of priority for each file.
    381 
    382   Returns:
    383     Inf if stack_frame_indices is empty, minimum stack number otherwise.
    384   """
    385   # Get the indexes of the highest priority (or low priority number).
    386   highest_priority = min(priorities)
    387   highest_priority_indices = []
    388   for i in range(len(priorities)):
    389     if priorities[i] == highest_priority:
    390       highest_priority_indices.append(i)
    391 
    392   # Gather the list of stack frame numbers for the files that change the
    393   # crash lines.
    394   flattened = []
    395   for i in highest_priority_indices:
    396     flattened += stack_frame_indices[i]
    397 
    398   # If no stack frame information is available, return inf. Else, return min.
    399   if not flattened:
    400     return INFINITY
    401   else:
    402     return min(flattened)
    403 
    404 
    405 def AddHyperlink(text, link):
    406   """Returns a string with HTML link tag.
    407 
    408   Args:
    409     text: A string to add link.
    410     link: A link to add to the string.
    411 
    412   Returns:
    413     A string with hyperlink added.
    414   """
    415   sanitized_link = cgi.escape(link, quote=True)
    416   sanitized_text = cgi.escape(str(text))
    417   return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text)
    418 
    419 
    420 def PrettifyList(items):
    421   """Returns a string representation of a list.
    422 
    423   It adds comma in between the elements and removes the brackets.
    424   Args:
    425     items: A list to prettify.
    426   Returns:
    427     A string representation of the list.
    428   """
    429   return ', '.join(map(str, items))
    430 
    431 
    432 def PrettifyFrameInfo(frame_indices, functions):
    433   """Return a string to represent the frames with functions."""
    434   frames = []
    435   for frame_index, function in zip(frame_indices, functions):
    436     frames.append('frame #%s, "%s"' % (frame_index, function.split('(')[0]))
    437   return '; '.join(frames)
    438 
    439 
    440 def PrettifyFiles(file_list):
    441   """Returns a string representation of a list of file names.
    442 
    443   Args:
    444     file_list: A list of tuple, (file_name, file_url).
    445   Returns:
    446     A string representation of file names with their urls.
    447   """
    448   ret = ['\n']
    449   for file_name, file_url in file_list:
    450     ret.append('      %s\n' % AddHyperlink(file_name, file_url))
    451   return ''.join(ret)
    452 
    453 
    454 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,
    455                  function, line_range=3):
    456   """Finds the overlap betwee changed lines and crashed lines.
    457 
    458   Finds the intersection of the lines that caused the crash and
    459   lines that the file changes. The intersection looks within 3 lines
    460   of the line that caused the crash.
    461 
    462   Args:
    463     crashed_line_list: A list of lines that the file crashed on.
    464     stack_frame_index: A list of positions in stack for each of the lines.
    465     changed_line_numbers: A list of lines that the file changed.
    466     function: A list of functions that the file crashed on.
    467     line_range: Number of lines to look backwards from crashed lines.
    468 
    469   Returns:
    470     line_number_intersection: Intersection between crashed_line_list and
    471                        changed_line_numbers.
    472     stack_frame_index_intersection: Stack number for each of the intersections.
    473   """
    474   line_number_intersection = []
    475   stack_frame_index_intersection = []
    476   function_intersection = []
    477 
    478   # Iterate through the crashed lines, and its occurence in stack.
    479   for (lines, stack_frame_index, function_name) in zip(
    480       crashed_line_list, stack_frame_index, function):
    481     # Also check previous 'line_range' lines. Create a set of all changed lines
    482     # and lines within 3 lines range before the crashed line.
    483     line_minus_n = set()
    484     for line in lines:
    485       for line_in_range in range(line - line_range, line + 1):
    486         line_minus_n.add(line_in_range)
    487 
    488     for changed_line in changed_line_numbers:
    489       # If a CL does not change crahsed line, check next line.
    490       if changed_line not in line_minus_n:
    491         continue
    492 
    493       intersected_line = set()
    494       # If the changed line is exactly the crashed line, add that line.
    495       for line in lines:
    496         if line in changed_line_numbers:
    497           intersected_line.add(line)
    498 
    499         # If the changed line is in 3 lines of the crashed line, add the line.
    500         else:
    501           intersected_line.add(changed_line)
    502 
    503       # Avoid adding the same line twice.
    504       if intersected_line not in line_number_intersection:
    505         line_number_intersection.append(list(intersected_line))
    506         stack_frame_index_intersection.append(stack_frame_index)
    507         function_intersection.append(function_name)
    508       break
    509 
    510   return (line_number_intersection, stack_frame_index_intersection,
    511           function_intersection)
    512 
    513 
    514 def MatchListToResultList(matches):
    515   """Convert list of matches to the list of result objects.
    516 
    517   Args:
    518     matches: A list of match objects along with its stack priority and revision
    519              number/git hash
    520   Returns:
    521     A list of result object.
    522 
    523   """
    524   result_list = []
    525 
    526   for _, cl, match in matches:
    527     suspected_cl = cl
    528     revision_url = match.revision_url
    529     component_name = match.component_name
    530     author = match.author
    531     reason = match.reason
    532     review_url = match.review_url
    533     reviewers = match.reviewers
    534     # For matches, line content do not exist.
    535     line_content = None
    536     message = match.message
    537 
    538     result = Result(suspected_cl, revision_url, component_name, author, reason,
    539                     review_url, reviewers, line_content, message)
    540     result_list.append(result)
    541 
    542   return result_list
    543 
    544 
    545 def BlameListToResultList(blame_list):
    546   """Convert blame list to the list of result objects.
    547 
    548   Args:
    549     blame_list: A list of blame objects.
    550 
    551   Returns:
    552     A list of result objects.
    553   """
    554   result_list = []
    555 
    556   for blame in blame_list:
    557     suspected_cl = blame.revision
    558     revision_url = blame.url
    559     component_name = blame.component_name
    560     author = blame.author
    561     reason = (
    562         'The CL last changed line %s of file %s, which is stack frame %d.' %
    563         (blame.line_number, blame.file, blame.stack_frame_index))
    564     # Blame object does not have review url and reviewers.
    565     review_url = None
    566     reviewers = None
    567     line_content = blame.line_content
    568     message = blame.message
    569 
    570     result = Result(suspected_cl, revision_url, component_name, author, reason,
    571                     review_url, reviewers, line_content, message)
    572     result_list.append(result)
    573 
    574   return result_list
    575