Home | History | Annotate | Download | only in buildbot
      1 # Copyright (c) 2009, Google Inc. All rights reserved.
      2 #
      3 # Redistribution and use in source and binary forms, with or without
      4 # modification, are permitted provided that the following conditions are
      5 # met:
      6 #
      7 #     * Redistributions of source code must retain the above copyright
      8 # notice, this list of conditions and the following disclaimer.
      9 #     * Redistributions in binary form must reproduce the above
     10 # copyright notice, this list of conditions and the following disclaimer
     11 # in the documentation and/or other materials provided with the
     12 # distribution.
     13 #     * Neither the name of Google Inc. nor the names of its
     14 # contributors may be used to endorse or promote products derived from
     15 # this software without specific prior written permission.
     16 #
     17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 import json
     30 import operator
     31 import re
     32 import urllib
     33 import urllib2
     34 
     35 import webkitpy.common.config.urls as config_urls
     36 from webkitpy.common.memoized import memoized
     37 from webkitpy.common.net.layouttestresults import LayoutTestResults
     38 from webkitpy.common.net.networktransaction import NetworkTransaction
     39 from webkitpy.common.system.logutils import get_logger
     40 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
     41 
     42 
     43 _log = get_logger(__file__)
     44 
     45 
     46 class Builder(object):
     47     def __init__(self, name, buildbot):
     48         self._name = name
     49         self._buildbot = buildbot
     50         self._builds_cache = {}
     51         self._revision_to_build_number = None
     52 
     53     def name(self):
     54         return self._name
     55 
     56     def results_url(self):
     57         return "%s/results/%s" % (self._buildbot.buildbot_url, self.url_encoded_name())
     58 
     59     # In addition to per-build results, the build.chromium.org builders also
     60     # keep a directory that accumulates test results over many runs.
     61     def accumulated_results_url(self):
     62         return None
     63 
     64     def latest_layout_test_results_url(self):
     65         return self.accumulated_results_url() or self.latest_cached_build().results_url();
     66 
     67     @memoized
     68     def latest_layout_test_results(self):
     69         return self.fetch_layout_test_results(self.latest_layout_test_results_url())
     70 
     71     def _fetch_file_from_results(self, results_url, file_name):
     72         # It seems this can return None if the url redirects and then returns 404.
     73         result = urllib2.urlopen("%s/%s" % (results_url, file_name))
     74         if not result:
     75             return None
     76         # urlopen returns a file-like object which sometimes works fine with str()
     77         # but sometimes is a addinfourl object.  In either case calling read() is correct.
     78         return result.read()
     79 
     80     def fetch_layout_test_results(self, results_url):
     81         # FIXME: This should cache that the result was a 404 and stop hitting the network.
     82         results_file = NetworkTransaction(convert_404_to_None=True).run(lambda: self._fetch_file_from_results(results_url, "failing_results.json"))
     83         return LayoutTestResults.results_from_string(results_file)
     84 
     85     def url_encoded_name(self):
     86         return urllib.quote(self._name)
     87 
     88     def url(self):
     89         return "%s/builders/%s" % (self._buildbot.buildbot_url, self.url_encoded_name())
     90 
     91     # This provides a single place to mock
     92     def _fetch_build(self, build_number):
     93         build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number)
     94         if not build_dictionary:
     95             return None
     96         revision_string = build_dictionary['sourceStamp']['revision']
     97         return Build(self,
     98             build_number=int(build_dictionary['number']),
     99             # 'revision' may be None if a trunk build was started by the force-build button on the web page.
    100             revision=(int(revision_string) if revision_string else None),
    101             # Buildbot uses any nubmer other than 0 to mean fail.  Since we fetch with
    102             # filter=1, passing builds may contain no 'results' value.
    103             is_green=(not build_dictionary.get('results')),
    104         )
    105 
    106     def build(self, build_number):
    107         if not build_number:
    108             return None
    109         cached_build = self._builds_cache.get(build_number)
    110         if cached_build:
    111             return cached_build
    112 
    113         build = self._fetch_build(build_number)
    114         self._builds_cache[build_number] = build
    115         return build
    116 
    117     def latest_cached_build(self):
    118         revision_build_pairs = self.revision_build_pairs_with_results()
    119         revision_build_pairs.sort(key=lambda i: i[1])
    120         latest_build_number = revision_build_pairs[-1][1]
    121         return self.build(latest_build_number)
    122 
    123     file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
    124     def _revision_and_build_for_filename(self, filename):
    125         # Example: "r47483 (1)/" or "r47483 (1).zip"
    126         match = self.file_name_regexp.match(filename)
    127         if not match:
    128             return None
    129         return (int(match.group("revision")), int(match.group("build_number")))
    130 
    131     def _fetch_revision_to_build_map(self):
    132         # All _fetch requests go through _buildbot for easier mocking
    133         # FIXME: This should use NetworkTransaction's 404 handling instead.
    134         try:
    135             # FIXME: This method is horribly slow due to the huge network load.
    136             # FIXME: This is a poor way to do revision -> build mapping.
    137             # Better would be to ask buildbot through some sort of API.
    138             print "Loading revision/build list from %s." % self.results_url()
    139             print "This may take a while..."
    140             result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
    141         except urllib2.HTTPError, error:
    142             if error.code != 404:
    143                 raise
    144             _log.debug("Revision/build list failed to load.")
    145             result_files = []
    146         return dict(self._file_info_list_to_revision_to_build_list(result_files))
    147 
    148     def _file_info_list_to_revision_to_build_list(self, file_info_list):
    149         # This assumes there was only one build per revision, which is false but we don't care for now.
    150         revisions_and_builds = []
    151         for file_info in file_info_list:
    152             revision_and_build = self._revision_and_build_for_filename(file_info["filename"])
    153             if revision_and_build:
    154                 revisions_and_builds.append(revision_and_build)
    155         return revisions_and_builds
    156 
    157     def _revision_to_build_map(self):
    158         if not self._revision_to_build_number:
    159             self._revision_to_build_number = self._fetch_revision_to_build_map()
    160         return self._revision_to_build_number
    161 
    162     def revision_build_pairs_with_results(self):
    163         return self._revision_to_build_map().items()
    164 
    165     # This assumes there can be only one build per revision, which is false, but we don't care for now.
    166     def build_for_revision(self, revision, allow_failed_lookups=False):
    167         # NOTE: This lookup will fail if that exact revision was never built.
    168         build_number = self._revision_to_build_map().get(int(revision))
    169         if not build_number:
    170             return None
    171         build = self.build(build_number)
    172         if not build and allow_failed_lookups:
    173             # Builds for old revisions with fail to lookup via buildbot's json api.
    174             build = Build(self,
    175                 build_number=build_number,
    176                 revision=revision,
    177                 is_green=False,
    178             )
    179         return build
    180 
    181 
    182 class Build(object):
    183     def __init__(self, builder, build_number, revision, is_green):
    184         self._builder = builder
    185         self._number = build_number
    186         self._revision = revision
    187         self._is_green = is_green
    188 
    189     @staticmethod
    190     def build_url(builder, build_number):
    191         return "%s/builds/%s" % (builder.url(), build_number)
    192 
    193     def url(self):
    194         return self.build_url(self.builder(), self._number)
    195 
    196     def results_url(self):
    197         results_directory = "r%s (%s)" % (self.revision(), self._number)
    198         return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
    199 
    200     def results_zip_url(self):
    201         return "%s.zip" % self.results_url()
    202 
    203     def builder(self):
    204         return self._builder
    205 
    206     def revision(self):
    207         return self._revision
    208 
    209     def is_green(self):
    210         return self._is_green
    211 
    212     def previous_build(self):
    213         # previous_build() allows callers to avoid assuming build numbers are sequential.
    214         # They may not be sequential across all master changes, or when non-trunk builds are made.
    215         return self._builder.build(self._number - 1)
    216 
    217 
    218 class BuildBot(object):
    219     _builder_factory = Builder
    220     _default_url = config_urls.buildbot_url
    221 
    222     def __init__(self, url=None):
    223         self.buildbot_url = url if url else self._default_url
    224         self._builder_by_name = {}
    225 
    226     def _parse_last_build_cell(self, builder, cell):
    227         status_link = cell.find('a')
    228         if status_link:
    229             # Will be either a revision number or a build number
    230             revision_string = status_link.string
    231             # If revision_string has non-digits assume it's not a revision number.
    232             builder['built_revision'] = int(revision_string) \
    233                                         if not re.match('\D', revision_string) \
    234                                         else None
    235 
    236             # FIXME: We treat slave lost as green even though it is not to
    237             # work around the Qts bot being on a broken internet connection.
    238             # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
    239             builder['is_green'] = not re.search('fail', cell.renderContents()) or \
    240                                   not not re.search('lost', cell.renderContents())
    241 
    242             status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
    243             link_match = re.match(status_link_regexp, status_link['href'])
    244             builder['build_number'] = int(link_match.group("build_number"))
    245         else:
    246             # We failed to find a link in the first cell, just give up.  This
    247             # can happen if a builder is just-added, the first cell will just
    248             # be "no build"
    249             # Other parts of the code depend on is_green being present.
    250             builder['is_green'] = False
    251             builder['built_revision'] = None
    252             builder['build_number'] = None
    253 
    254     def _parse_current_build_cell(self, builder, cell):
    255         activity_lines = cell.renderContents().split("<br />")
    256         builder["activity"] = activity_lines[0] # normally "building" or "idle"
    257         # The middle lines document how long left for any current builds.
    258         match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
    259         builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
    260 
    261     def _parse_builder_status_from_row(self, status_row):
    262         status_cells = status_row.findAll('td')
    263         builder = {}
    264 
    265         # First cell is the name
    266         name_link = status_cells[0].find('a')
    267         builder["name"] = unicode(name_link.string)
    268 
    269         self._parse_last_build_cell(builder, status_cells[1])
    270         self._parse_current_build_cell(builder, status_cells[2])
    271         return builder
    272 
    273     def _matches_regexps(self, builder_name, name_regexps):
    274         for name_regexp in name_regexps:
    275             if re.match(name_regexp, builder_name):
    276                 return True
    277         return False
    278 
    279     # FIXME: This method needs to die, but is used by a unit test at the moment.
    280     def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
    281         return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
    282 
    283     # FIXME: These _fetch methods should move to a networking class.
    284     def _fetch_build_dictionary(self, builder, build_number):
    285         # Note: filter=1 will remove None and {} and '', which cuts noise but can
    286         # cause keys to be missing which you might otherwise expect.
    287         # FIXME: The bot sends a *huge* amount of data for each request, we should
    288         # find a way to reduce the response size further.
    289         json_url = "%s/json/builders/%s/builds/%s?filter=1" % (self.buildbot_url, urllib.quote(builder.name()), build_number)
    290         try:
    291             return json.load(urllib2.urlopen(json_url))
    292         except urllib2.URLError, err:
    293             build_url = Build.build_url(builder, build_number)
    294             _log.error("Error fetching data for %s build %s (%s, json: %s): %s" % (builder.name(), build_number, build_url, json_url, err))
    295             return None
    296         except ValueError, err:
    297             build_url = Build.build_url(builder, build_number)
    298             _log.error("Error decoding json data from %s: %s" % (build_url, err))
    299             return None
    300 
    301     def _fetch_one_box_per_builder(self):
    302         build_status_url = "%s/one_box_per_builder" % self.buildbot_url
    303         return urllib2.urlopen(build_status_url)
    304 
    305     def _file_cell_text(self, file_cell):
    306         """Traverses down through firstChild elements until one containing a string is found, then returns that string"""
    307         element = file_cell
    308         while element.string is None and element.contents:
    309             element = element.contents[0]
    310         return element.string
    311 
    312     def _parse_twisted_file_row(self, file_row):
    313         string_or_empty = lambda string: unicode(string) if string else u""
    314         file_cells = file_row.findAll('td')
    315         return {
    316             "filename": string_or_empty(self._file_cell_text(file_cells[0])),
    317             "size": string_or_empty(self._file_cell_text(file_cells[1])),
    318             "type": string_or_empty(self._file_cell_text(file_cells[2])),
    319             "encoding": string_or_empty(self._file_cell_text(file_cells[3])),
    320         }
    321 
    322     def _parse_twisted_directory_listing(self, page):
    323         soup = BeautifulSoup(page)
    324         # HACK: Match only table rows with a class to ignore twisted header/footer rows.
    325         file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
    326         return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
    327 
    328     # FIXME: There should be a better way to get this information directly from twisted.
    329     def _fetch_twisted_directory_listing(self, url):
    330         return self._parse_twisted_directory_listing(urllib2.urlopen(url))
    331 
    332     def builders(self):
    333         return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
    334 
    335     # This method pulls from /one_box_per_builder as an efficient way to get information about
    336     def builder_statuses(self):
    337         soup = BeautifulSoup(self._fetch_one_box_per_builder())
    338         return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
    339 
    340     def builder_with_name(self, name):
    341         builder = self._builder_by_name.get(name)
    342         if not builder:
    343             builder = self._builder_factory(name, self)
    344             self._builder_by_name[name] = builder
    345         return builder
    346 
    347     # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
    348     # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
    349     def _latest_builds_from_builders(self):
    350         builder_statuses = self.builder_statuses()
    351         return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
    352 
    353     def _build_at_or_before_revision(self, build, revision):
    354         while build:
    355             if build.revision() <= revision:
    356                 return build
    357             build = build.previous_build()
    358 
    359     def _fetch_builder_page(self, builder):
    360         builder_page_url = "%s/builders/%s?numbuilds=100" % (self.buildbot_url, urllib2.quote(builder.name()))
    361         return urllib2.urlopen(builder_page_url)
    362 
    363     def _revisions_for_builder(self, builder):
    364         soup = BeautifulSoup(self._fetch_builder_page(builder))
    365         revisions = []
    366         for status_row in soup.find('table').findAll('tr'):
    367             revision_anchor = status_row.find('a')
    368             table_cells = status_row.findAll('td')
    369             if not table_cells or len(table_cells) < 3 or not table_cells[2].string:
    370                 continue
    371             if revision_anchor and revision_anchor.string and re.match(r'^\d+$', revision_anchor.string):
    372                 revisions.append((int(revision_anchor.string), 'success' in table_cells[2].string))
    373         return revisions
    374 
    375     def _find_green_revision(self, builder_revisions):
    376         revision_statuses = {}
    377         for builder in builder_revisions:
    378             for revision, succeeded in builder_revisions[builder]:
    379                 revision_statuses.setdefault(revision, set())
    380                 if succeeded and revision_statuses[revision] != None:
    381                     revision_statuses[revision].add(builder)
    382                 else:
    383                     revision_statuses[revision] = None
    384 
    385         # In descending order, look for a revision X with successful builds
    386         # Once we found X, check if remaining builders succeeded in the neighborhood of X.
    387         revisions_in_order = sorted(revision_statuses.keys(), reverse=True)
    388         for i, revision in enumerate(revisions_in_order):
    389             if not revision_statuses[revision]:
    390                 continue
    391 
    392             builders_succeeded_in_future = set()
    393             for future_revision in sorted(revisions_in_order[:i + 1]):
    394                 if not revision_statuses[future_revision]:
    395                     break
    396                 builders_succeeded_in_future = builders_succeeded_in_future.union(revision_statuses[future_revision])
    397 
    398             builders_succeeded_in_past = set()
    399             for past_revision in revisions_in_order[i:]:
    400                 if not revision_statuses[past_revision]:
    401                     break
    402                 builders_succeeded_in_past = builders_succeeded_in_past.union(revision_statuses[past_revision])
    403 
    404             if len(builders_succeeded_in_future) == len(builder_revisions) and len(builders_succeeded_in_past) == len(builder_revisions):
    405                 return revision
    406         return None
    407