1 # Copyright (c) 2009, Google Inc. All rights reserved. 2 # 3 # Redistribution and use in source and binary forms, with or without 4 # modification, are permitted provided that the following conditions are 5 # met: 6 # 7 # * Redistributions of source code must retain the above copyright 8 # notice, this list of conditions and the following disclaimer. 9 # * Redistributions in binary form must reproduce the above 10 # copyright notice, this list of conditions and the following disclaimer 11 # in the documentation and/or other materials provided with the 12 # distribution. 13 # * Neither the name of Google Inc. nor the names of its 14 # contributors may be used to endorse or promote products derived from 15 # this software without specific prior written permission. 16 # 17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 import json 30 import operator 31 import re 32 import urllib 33 import urllib2 34 35 import webkitpy.common.config.urls as config_urls 36 from webkitpy.common.memoized import memoized 37 from webkitpy.common.net.layouttestresults import LayoutTestResults 38 from webkitpy.common.net.networktransaction import NetworkTransaction 39 from webkitpy.common.system.logutils import get_logger 40 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup 41 42 43 _log = get_logger(__file__) 44 45 46 class Builder(object): 47 def __init__(self, name, buildbot): 48 self._name = name 49 self._buildbot = buildbot 50 self._builds_cache = {} 51 self._revision_to_build_number = None 52 53 def name(self): 54 return self._name 55 56 def results_url(self): 57 return "%s/results/%s" % (self._buildbot.buildbot_url, self.url_encoded_name()) 58 59 # In addition to per-build results, the build.chromium.org builders also 60 # keep a directory that accumulates test results over many runs. 61 def accumulated_results_url(self): 62 return None 63 64 def latest_layout_test_results_url(self): 65 return self.accumulated_results_url() or self.latest_cached_build().results_url(); 66 67 @memoized 68 def latest_layout_test_results(self): 69 return self.fetch_layout_test_results(self.latest_layout_test_results_url()) 70 71 def _fetch_file_from_results(self, results_url, file_name): 72 # It seems this can return None if the url redirects and then returns 404. 73 result = urllib2.urlopen("%s/%s" % (results_url, file_name)) 74 if not result: 75 return None 76 # urlopen returns a file-like object which sometimes works fine with str() 77 # but sometimes is a addinfourl object. In either case calling read() is correct. 78 return result.read() 79 80 def fetch_layout_test_results(self, results_url): 81 # FIXME: This should cache that the result was a 404 and stop hitting the network. 82 results_file = NetworkTransaction(convert_404_to_None=True).run(lambda: self._fetch_file_from_results(results_url, "failing_results.json")) 83 return LayoutTestResults.results_from_string(results_file) 84 85 def url_encoded_name(self): 86 return urllib.quote(self._name) 87 88 def url(self): 89 return "%s/builders/%s" % (self._buildbot.buildbot_url, self.url_encoded_name()) 90 91 # This provides a single place to mock 92 def _fetch_build(self, build_number): 93 build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number) 94 if not build_dictionary: 95 return None 96 revision_string = build_dictionary['sourceStamp']['revision'] 97 return Build(self, 98 build_number=int(build_dictionary['number']), 99 # 'revision' may be None if a trunk build was started by the force-build button on the web page. 100 revision=(int(revision_string) if revision_string else None), 101 # Buildbot uses any nubmer other than 0 to mean fail. Since we fetch with 102 # filter=1, passing builds may contain no 'results' value. 103 is_green=(not build_dictionary.get('results')), 104 ) 105 106 def build(self, build_number): 107 if not build_number: 108 return None 109 cached_build = self._builds_cache.get(build_number) 110 if cached_build: 111 return cached_build 112 113 build = self._fetch_build(build_number) 114 self._builds_cache[build_number] = build 115 return build 116 117 def latest_cached_build(self): 118 revision_build_pairs = self.revision_build_pairs_with_results() 119 revision_build_pairs.sort(key=lambda i: i[1]) 120 latest_build_number = revision_build_pairs[-1][1] 121 return self.build(latest_build_number) 122 123 file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)") 124 def _revision_and_build_for_filename(self, filename): 125 # Example: "r47483 (1)/" or "r47483 (1).zip" 126 match = self.file_name_regexp.match(filename) 127 if not match: 128 return None 129 return (int(match.group("revision")), int(match.group("build_number"))) 130 131 def _fetch_revision_to_build_map(self): 132 # All _fetch requests go through _buildbot for easier mocking 133 # FIXME: This should use NetworkTransaction's 404 handling instead. 134 try: 135 # FIXME: This method is horribly slow due to the huge network load. 136 # FIXME: This is a poor way to do revision -> build mapping. 137 # Better would be to ask buildbot through some sort of API. 138 print "Loading revision/build list from %s." % self.results_url() 139 print "This may take a while..." 140 result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url()) 141 except urllib2.HTTPError, error: 142 if error.code != 404: 143 raise 144 _log.debug("Revision/build list failed to load.") 145 result_files = [] 146 return dict(self._file_info_list_to_revision_to_build_list(result_files)) 147 148 def _file_info_list_to_revision_to_build_list(self, file_info_list): 149 # This assumes there was only one build per revision, which is false but we don't care for now. 150 revisions_and_builds = [] 151 for file_info in file_info_list: 152 revision_and_build = self._revision_and_build_for_filename(file_info["filename"]) 153 if revision_and_build: 154 revisions_and_builds.append(revision_and_build) 155 return revisions_and_builds 156 157 def _revision_to_build_map(self): 158 if not self._revision_to_build_number: 159 self._revision_to_build_number = self._fetch_revision_to_build_map() 160 return self._revision_to_build_number 161 162 def revision_build_pairs_with_results(self): 163 return self._revision_to_build_map().items() 164 165 # This assumes there can be only one build per revision, which is false, but we don't care for now. 166 def build_for_revision(self, revision, allow_failed_lookups=False): 167 # NOTE: This lookup will fail if that exact revision was never built. 168 build_number = self._revision_to_build_map().get(int(revision)) 169 if not build_number: 170 return None 171 build = self.build(build_number) 172 if not build and allow_failed_lookups: 173 # Builds for old revisions with fail to lookup via buildbot's json api. 174 build = Build(self, 175 build_number=build_number, 176 revision=revision, 177 is_green=False, 178 ) 179 return build 180 181 182 class Build(object): 183 def __init__(self, builder, build_number, revision, is_green): 184 self._builder = builder 185 self._number = build_number 186 self._revision = revision 187 self._is_green = is_green 188 189 @staticmethod 190 def build_url(builder, build_number): 191 return "%s/builds/%s" % (builder.url(), build_number) 192 193 def url(self): 194 return self.build_url(self.builder(), self._number) 195 196 def results_url(self): 197 results_directory = "r%s (%s)" % (self.revision(), self._number) 198 return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory)) 199 200 def results_zip_url(self): 201 return "%s.zip" % self.results_url() 202 203 def builder(self): 204 return self._builder 205 206 def revision(self): 207 return self._revision 208 209 def is_green(self): 210 return self._is_green 211 212 def previous_build(self): 213 # previous_build() allows callers to avoid assuming build numbers are sequential. 214 # They may not be sequential across all master changes, or when non-trunk builds are made. 215 return self._builder.build(self._number - 1) 216 217 218 class BuildBot(object): 219 _builder_factory = Builder 220 _default_url = config_urls.buildbot_url 221 222 def __init__(self, url=None): 223 self.buildbot_url = url if url else self._default_url 224 self._builder_by_name = {} 225 226 def _parse_last_build_cell(self, builder, cell): 227 status_link = cell.find('a') 228 if status_link: 229 # Will be either a revision number or a build number 230 revision_string = status_link.string 231 # If revision_string has non-digits assume it's not a revision number. 232 builder['built_revision'] = int(revision_string) \ 233 if not re.match('\D', revision_string) \ 234 else None 235 236 # FIXME: We treat slave lost as green even though it is not to 237 # work around the Qts bot being on a broken internet connection. 238 # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099 239 builder['is_green'] = not re.search('fail', cell.renderContents()) or \ 240 not not re.search('lost', cell.renderContents()) 241 242 status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)" 243 link_match = re.match(status_link_regexp, status_link['href']) 244 builder['build_number'] = int(link_match.group("build_number")) 245 else: 246 # We failed to find a link in the first cell, just give up. This 247 # can happen if a builder is just-added, the first cell will just 248 # be "no build" 249 # Other parts of the code depend on is_green being present. 250 builder['is_green'] = False 251 builder['built_revision'] = None 252 builder['build_number'] = None 253 254 def _parse_current_build_cell(self, builder, cell): 255 activity_lines = cell.renderContents().split("<br />") 256 builder["activity"] = activity_lines[0] # normally "building" or "idle" 257 # The middle lines document how long left for any current builds. 258 match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1]) 259 builder["pending_builds"] = int(match.group("pending_builds")) if match else 0 260 261 def _parse_builder_status_from_row(self, status_row): 262 status_cells = status_row.findAll('td') 263 builder = {} 264 265 # First cell is the name 266 name_link = status_cells[0].find('a') 267 builder["name"] = unicode(name_link.string) 268 269 self._parse_last_build_cell(builder, status_cells[1]) 270 self._parse_current_build_cell(builder, status_cells[2]) 271 return builder 272 273 def _matches_regexps(self, builder_name, name_regexps): 274 for name_regexp in name_regexps: 275 if re.match(name_regexp, builder_name): 276 return True 277 return False 278 279 # FIXME: This method needs to die, but is used by a unit test at the moment. 280 def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps): 281 return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)] 282 283 # FIXME: These _fetch methods should move to a networking class. 284 def _fetch_build_dictionary(self, builder, build_number): 285 # Note: filter=1 will remove None and {} and '', which cuts noise but can 286 # cause keys to be missing which you might otherwise expect. 287 # FIXME: The bot sends a *huge* amount of data for each request, we should 288 # find a way to reduce the response size further. 289 json_url = "%s/json/builders/%s/builds/%s?filter=1" % (self.buildbot_url, urllib.quote(builder.name()), build_number) 290 try: 291 return json.load(urllib2.urlopen(json_url)) 292 except urllib2.URLError, err: 293 build_url = Build.build_url(builder, build_number) 294 _log.error("Error fetching data for %s build %s (%s, json: %s): %s" % (builder.name(), build_number, build_url, json_url, err)) 295 return None 296 except ValueError, err: 297 build_url = Build.build_url(builder, build_number) 298 _log.error("Error decoding json data from %s: %s" % (build_url, err)) 299 return None 300 301 def _fetch_one_box_per_builder(self): 302 build_status_url = "%s/one_box_per_builder" % self.buildbot_url 303 return urllib2.urlopen(build_status_url) 304 305 def _file_cell_text(self, file_cell): 306 """Traverses down through firstChild elements until one containing a string is found, then returns that string""" 307 element = file_cell 308 while element.string is None and element.contents: 309 element = element.contents[0] 310 return element.string 311 312 def _parse_twisted_file_row(self, file_row): 313 string_or_empty = lambda string: unicode(string) if string else u"" 314 file_cells = file_row.findAll('td') 315 return { 316 "filename": string_or_empty(self._file_cell_text(file_cells[0])), 317 "size": string_or_empty(self._file_cell_text(file_cells[1])), 318 "type": string_or_empty(self._file_cell_text(file_cells[2])), 319 "encoding": string_or_empty(self._file_cell_text(file_cells[3])), 320 } 321 322 def _parse_twisted_directory_listing(self, page): 323 soup = BeautifulSoup(page) 324 # HACK: Match only table rows with a class to ignore twisted header/footer rows. 325 file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')}) 326 return [self._parse_twisted_file_row(file_row) for file_row in file_rows] 327 328 # FIXME: There should be a better way to get this information directly from twisted. 329 def _fetch_twisted_directory_listing(self, url): 330 return self._parse_twisted_directory_listing(urllib2.urlopen(url)) 331 332 def builders(self): 333 return [self.builder_with_name(status["name"]) for status in self.builder_statuses()] 334 335 # This method pulls from /one_box_per_builder as an efficient way to get information about 336 def builder_statuses(self): 337 soup = BeautifulSoup(self._fetch_one_box_per_builder()) 338 return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')] 339 340 def builder_with_name(self, name): 341 builder = self._builder_by_name.get(name) 342 if not builder: 343 builder = self._builder_factory(name, self) 344 self._builder_by_name[name] = builder 345 return builder 346 347 # This makes fewer requests than calling Builder.latest_build would. It grabs all builder 348 # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages). 349 def _latest_builds_from_builders(self): 350 builder_statuses = self.builder_statuses() 351 return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses] 352 353 def _build_at_or_before_revision(self, build, revision): 354 while build: 355 if build.revision() <= revision: 356 return build 357 build = build.previous_build() 358 359 def _fetch_builder_page(self, builder): 360 builder_page_url = "%s/builders/%s?numbuilds=100" % (self.buildbot_url, urllib2.quote(builder.name())) 361 return urllib2.urlopen(builder_page_url) 362 363 def _revisions_for_builder(self, builder): 364 soup = BeautifulSoup(self._fetch_builder_page(builder)) 365 revisions = [] 366 for status_row in soup.find('table').findAll('tr'): 367 revision_anchor = status_row.find('a') 368 table_cells = status_row.findAll('td') 369 if not table_cells or len(table_cells) < 3 or not table_cells[2].string: 370 continue 371 if revision_anchor and revision_anchor.string and re.match(r'^\d+$', revision_anchor.string): 372 revisions.append((int(revision_anchor.string), 'success' in table_cells[2].string)) 373 return revisions 374 375 def _find_green_revision(self, builder_revisions): 376 revision_statuses = {} 377 for builder in builder_revisions: 378 for revision, succeeded in builder_revisions[builder]: 379 revision_statuses.setdefault(revision, set()) 380 if succeeded and revision_statuses[revision] != None: 381 revision_statuses[revision].add(builder) 382 else: 383 revision_statuses[revision] = None 384 385 # In descending order, look for a revision X with successful builds 386 # Once we found X, check if remaining builders succeeded in the neighborhood of X. 387 revisions_in_order = sorted(revision_statuses.keys(), reverse=True) 388 for i, revision in enumerate(revisions_in_order): 389 if not revision_statuses[revision]: 390 continue 391 392 builders_succeeded_in_future = set() 393 for future_revision in sorted(revisions_in_order[:i + 1]): 394 if not revision_statuses[future_revision]: 395 break 396 builders_succeeded_in_future = builders_succeeded_in_future.union(revision_statuses[future_revision]) 397 398 builders_succeeded_in_past = set() 399 for past_revision in revisions_in_order[i:]: 400 if not revision_statuses[past_revision]: 401 break 402 builders_succeeded_in_past = builders_succeeded_in_past.union(revision_statuses[past_revision]) 403 404 if len(builders_succeeded_in_future) == len(builder_revisions) and len(builders_succeeded_in_past) == len(builder_revisions): 405 return revision 406 return None 407