1 #!/usr/bin/env python 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 import argparse 7 import errno 8 import os 9 import re 10 import sys 11 import urllib 12 import urllib2 13 14 # Where all the data lives. 15 ROOT_URL = "http://build.chromium.org/p/chromium.memory.fyi/builders" 16 17 # TODO(groby) - support multi-line search from the command line. Useful when 18 # scanning for classes of failures, see below. 19 SEARCH_STRING = """<p class=\"failure result\"> 20 Failed memory test: content 21 </p>""" 22 23 # Location of the log cache. 24 CACHE_DIR = "buildlogs.tmp" 25 26 # If we don't find anything after searching |CUTOFF| logs, we're probably done. 27 CUTOFF = 100 28 29 def EnsurePath(path): 30 """Makes sure |path| does exist, tries to create it if it doesn't.""" 31 try: 32 os.makedirs(path) 33 except OSError as exception: 34 if exception.errno != errno.EEXIST: 35 raise 36 37 38 class Cache(object): 39 def __init__(self, root_dir): 40 self._root_dir = os.path.abspath(root_dir) 41 42 def _LocalName(self, name): 43 """If name is a relative path, treat it as relative to cache root. 44 If it is absolute and under cache root, pass it through. 45 Otherwise, raise error. 46 """ 47 if os.path.isabs(name): 48 assert os.path.commonprefix([name, self._root_dir]) == self._root_dir 49 else: 50 name = os.path.join(self._root_dir, name) 51 return name 52 53 def _FetchLocal(self, local_name): 54 local_name = self._LocalName(local_name) 55 EnsurePath(os.path.dirname(local_name)) 56 if os.path.exists(local_name): 57 f = open(local_name, 'r') 58 return f.readlines(); 59 return None 60 61 def _FetchRemote(self, remote_name): 62 try: 63 response = urllib2.urlopen(remote_name) 64 except: 65 print "Could not fetch", remote_name 66 raise 67 return response.read() 68 69 def Update(self, local_name, remote_name): 70 local_name = self._LocalName(local_name) 71 EnsurePath(os.path.dirname(local_name)) 72 blob = self._FetchRemote(remote_name) 73 f = open(local_name, "w") 74 f.write(blob) 75 return blob.splitlines() 76 77 def FetchData(self, local_name, remote_name): 78 result = self._FetchLocal(local_name) 79 if result: 80 return result 81 # If we get here, the local cache does not exist yet. Fetch, and store. 82 return self.Update(local_name, remote_name) 83 84 85 class Builder(object): 86 def __init__(self, waterfall, name): 87 self._name = name 88 self._waterfall = waterfall 89 90 def Name(self): 91 return self._name 92 93 def LatestBuild(self): 94 return self._waterfall.GetLatestBuild(self._name) 95 96 def GetBuildPath(self, build_num): 97 return "%s/%s/builds/%d" % ( 98 self._waterfall._root_url, urllib.quote(self._name), build_num) 99 100 def _FetchBuildLog(self, build_num): 101 local_build_path = "builds/%s" % self._name 102 local_build_file = os.path.join(local_build_path, "%d.log" % build_num) 103 return self._waterfall._cache.FetchData(local_build_file, 104 self.GetBuildPath(build_num)) 105 106 def _CheckLog(self, build_num, tester): 107 log_lines = self._FetchBuildLog(build_num) 108 return any(tester(line) for line in log_lines) 109 110 def ScanLogs(self, tester): 111 occurrences = [] 112 build = self.LatestBuild() 113 no_results = 0 114 while build != 0 and no_results < CUTOFF: 115 if self._CheckLog(build, tester): 116 occurrences.append(build) 117 else: 118 no_results = no_results + 1 119 build = build - 1 120 return occurrences 121 122 123 class Waterfall(object): 124 def __init__(self, root_url, cache_dir): 125 self._root_url = root_url 126 self._builders = {} 127 self._top_revision = {} 128 self._cache = Cache(cache_dir) 129 130 def Builders(self): 131 return self._builders.values() 132 133 def Update(self): 134 self._cache.Update("builders", self._root_url) 135 self.FetchInfo() 136 137 def FetchInfo(self): 138 if self._top_revision: 139 return 140 141 html = self._cache.FetchData("builders", self._root_url) 142 143 """ Search for both builders and latest build number in HTML 144 <td class="box"><a href="builders/<builder-name>"> identifies a builder 145 <a href="builders/<builder-name>/builds/<build-num>"> is the latest build. 146 """ 147 box_matcher = re.compile('.*a href[^>]*>([^<]*)\<') 148 build_matcher = re.compile('.*a href=\"builders/(.*)/builds/([0-9]+)\".*') 149 last_builder = "" 150 for line in html: 151 if 'a href="builders/' in line: 152 if 'td class="box"' in line: 153 last_builder = box_matcher.match(line).group(1) 154 self._builders[last_builder] = Builder(self, last_builder) 155 else: 156 result = build_matcher.match(line) 157 builder = result.group(1) 158 assert builder == urllib.quote(last_builder) 159 self._top_revision[last_builder] = int(result.group(2)) 160 161 def GetLatestBuild(self, name): 162 self.FetchInfo() 163 assert self._top_revision 164 return self._top_revision[name] 165 166 167 class MultiLineChange(object): 168 def __init__(self, lines): 169 self._tracked_lines = lines 170 self._current = 0 171 172 def __call__(self, line): 173 """ Test a single line against multi-line change. 174 175 If it matches the currently active line, advance one line. 176 If the current line is the last line, report a match. 177 """ 178 if self._tracked_lines[self._current] in line: 179 self._current = self._current + 1 180 if self._current == len(self._tracked_lines): 181 self._current = 0 182 return True 183 else: 184 self._current = 0 185 return False 186 187 188 def main(argv): 189 # Create argument parser. 190 parser = argparse.ArgumentParser() 191 commands = parser.add_mutually_exclusive_group(required=True) 192 commands.add_argument("--update", action='store_true') 193 commands.add_argument("--find", metavar='search term') 194 args = parser.parse_args() 195 196 path = os.path.abspath(os.path.dirname(argv[0])) 197 cache_path = os.path.join(path, CACHE_DIR) 198 199 fyi = Waterfall(ROOT_URL, cache_path) 200 201 if args.update: 202 fyi.Update() 203 for builder in fyi.Builders(): 204 print "Updating", builder.Name() 205 builder.ScanLogs(lambda x:False) 206 207 if args.find: 208 tester = MultiLineChange(args.find.splitlines()) 209 fyi.FetchInfo() 210 211 print "SCANNING FOR ", args.find 212 for builder in fyi.Builders(): 213 print "Scanning", builder.Name() 214 occurrences = builder.ScanLogs(tester) 215 if occurrences: 216 min_build = min(occurrences) 217 path = builder.GetBuildPath(min_build) 218 print "Earliest occurrence in build %d" % min_build 219 print "Latest occurrence in build %d" % max(occurrences) 220 print "Latest build: %d" % builder.LatestBuild() 221 print path 222 print "%d total" % len(occurrences) 223 224 225 if __name__ == "__main__": 226 sys.exit(main(sys.argv)) 227 228