1 ''' 2 Created on May 19, 2011 3 4 @author: bungeman 5 ''' 6 7 import re 8 import math 9 10 # bench representation algorithm constant names 11 ALGORITHM_AVERAGE = 'avg' 12 ALGORITHM_MEDIAN = 'med' 13 ALGORITHM_MINIMUM = 'min' 14 ALGORITHM_25TH_PERCENTILE = '25th' 15 16 class BenchDataPoint: 17 """A single data point produced by bench. 18 19 (str, str, str, float, {str:str})""" 20 def __init__(self, bench, config, time_type, time, settings): 21 self.bench = bench 22 self.config = config 23 self.time_type = time_type 24 self.time = time 25 self.settings = settings 26 27 def __repr__(self): 28 return "BenchDataPoint(%s, %s, %s, %s, %s)" % ( 29 str(self.bench), 30 str(self.config), 31 str(self.time_type), 32 str(self.time), 33 str(self.settings), 34 ) 35 36 class _ExtremeType(object): 37 """Instances of this class compare greater or less than other objects.""" 38 def __init__(self, cmpr, rep): 39 object.__init__(self) 40 self._cmpr = cmpr 41 self._rep = rep 42 43 def __cmp__(self, other): 44 if isinstance(other, self.__class__) and other._cmpr == self._cmpr: 45 return 0 46 return self._cmpr 47 48 def __repr__(self): 49 return self._rep 50 51 Max = _ExtremeType(1, "Max") 52 Min = _ExtremeType(-1, "Min") 53 54 class _ListAlgorithm(object): 55 """Algorithm for selecting the representation value from a given list. 56 representation is one of the ALGORITHM_XXX representation types.""" 57 def __init__(self, data, representation=None): 58 if not representation: 59 representation = ALGORITHM_AVERAGE # default algorithm 60 self._data = data 61 self._len = len(data) 62 if representation == ALGORITHM_AVERAGE: 63 self._rep = sum(self._data) / self._len 64 else: 65 self._data.sort() 66 if representation == ALGORITHM_MINIMUM: 67 self._rep = self._data[0] 68 else: 69 # for percentiles, we use the value below which x% of values are 70 # found, which allows for better detection of quantum behaviors. 71 if representation == ALGORITHM_MEDIAN: 72 x = int(round(0.5 * self._len + 0.5)) 73 elif representation == ALGORITHM_25TH_PERCENTILE: 74 x = int(round(0.25 * self._len + 0.5)) 75 else: 76 raise Exception("invalid representation algorithm %s!" % 77 representation) 78 self._rep = self._data[x - 1] 79 80 def compute(self): 81 return self._rep 82 83 def _ParseAndStoreTimes(config_re, time_re, line, bench, dic, 84 representation=None): 85 """Parses given bench time line with regex and adds data to the given dic. 86 config_re: regular expression for parsing the config line. 87 time_re: regular expression for parsing bench time. 88 line: input string line to parse. 89 bench: name of bench for the time values. 90 dic: dictionary to store bench values. See bench_dic in parse() below. 91 representation: should match one of the ALGORITHM_XXX types.""" 92 93 for config in re.finditer(config_re, line): 94 current_config = config.group(1) 95 if config_re.startswith(' tile_'): # per-tile bench, add name prefix 96 current_config = 'tile_' + current_config 97 times = config.group(2) 98 for new_time in re.finditer(time_re, times): 99 current_time_type = new_time.group(1) 100 iters = [float(i) for i in 101 new_time.group(2).strip().split(',')] 102 dic.setdefault(bench, {}).setdefault(current_config, {}).setdefault( 103 current_time_type, []).append(_ListAlgorithm( 104 iters, representation).compute()) 105 106 def parse(settings, lines, representation=None): 107 """Parses bench output into a useful data structure. 108 109 ({str:str}, __iter__ -> str) -> [BenchDataPoint] 110 representation is one of the ALGORITHM_XXX types.""" 111 112 benches = [] 113 current_bench = None 114 bench_dic = {} # [bench][config][time_type] -> [list of bench values] 115 setting_re = '([^\s=]+)(?:=(\S+))?' 116 settings_re = 'skia bench:((?:\s+' + setting_re + ')*)' 117 bench_re = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)' 118 time_re = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\d+\.\d+)*)' 119 # non-per-tile benches have configs that don't end with ']' 120 config_re = '(\S+[^\]]): ((?:' + time_re + '\s+)+)' 121 # per-tile bench lines are in the following format. Note that there are 122 # non-averaged bench numbers in separate lines, which we ignore now due to 123 # their inaccuracy. 124 tile_re = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:' 125 ' ((?:' + time_re + '\s+)+)') 126 127 for line in lines: 128 129 # see if this line is a settings line 130 settingsMatch = re.search(settings_re, line) 131 if (settingsMatch): 132 settings = dict(settings) 133 for settingMatch in re.finditer(setting_re, settingsMatch.group(1)): 134 if (settingMatch.group(2)): 135 settings[settingMatch.group(1)] = settingMatch.group(2) 136 else: 137 settings[settingMatch.group(1)] = True 138 139 # see if this line starts a new bench 140 new_bench = re.search(bench_re, line) 141 if new_bench: 142 current_bench = new_bench.group(1) 143 144 # add configs on this line to the bench_dic 145 if current_bench: 146 for regex in [config_re, tile_re]: 147 _ParseAndStoreTimes(regex, time_re, line, current_bench, 148 bench_dic, representation) 149 150 # append benches to list, use the total time as final bench value. 151 for bench in bench_dic: 152 for config in bench_dic[bench]: 153 for time_type in bench_dic[bench][config]: 154 benches.append(BenchDataPoint( 155 bench, 156 config, 157 time_type, 158 sum(bench_dic[bench][config][time_type]), 159 settings)) 160 161 return benches 162 163 class LinearRegression: 164 """Linear regression data based on a set of data points. 165 166 ([(Number,Number)]) 167 There must be at least two points for this to make sense.""" 168 def __init__(self, points): 169 n = len(points) 170 max_x = Min 171 min_x = Max 172 173 Sx = 0.0 174 Sy = 0.0 175 Sxx = 0.0 176 Sxy = 0.0 177 Syy = 0.0 178 for point in points: 179 x = point[0] 180 y = point[1] 181 max_x = max(max_x, x) 182 min_x = min(min_x, x) 183 184 Sx += x 185 Sy += y 186 Sxx += x*x 187 Sxy += x*y 188 Syy += y*y 189 190 denom = n*Sxx - Sx*Sx 191 if (denom != 0.0): 192 B = (n*Sxy - Sx*Sy) / denom 193 else: 194 B = 0.0 195 a = (1.0/n)*(Sy - B*Sx) 196 197 se2 = 0 198 sB2 = 0 199 sa2 = 0 200 if (n >= 3 and denom != 0.0): 201 se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom)) 202 sB2 = (n*se2) / denom 203 sa2 = sB2 * (1.0/n) * Sxx 204 205 206 self.slope = B 207 self.intercept = a 208 self.serror = math.sqrt(max(0, se2)) 209 self.serror_slope = math.sqrt(max(0, sB2)) 210 self.serror_intercept = math.sqrt(max(0, sa2)) 211 self.max_x = max_x 212 self.min_x = min_x 213 214 def __repr__(self): 215 return "LinearRegression(%s, %s, %s, %s, %s)" % ( 216 str(self.slope), 217 str(self.intercept), 218 str(self.serror), 219 str(self.serror_slope), 220 str(self.serror_intercept), 221 ) 222 223 def find_min_slope(self): 224 """Finds the minimal slope given one standard deviation.""" 225 slope = self.slope 226 intercept = self.intercept 227 error = self.serror 228 regr_start = self.min_x 229 regr_end = self.max_x 230 regr_width = regr_end - regr_start 231 232 if slope < 0: 233 lower_left_y = slope*regr_start + intercept - error 234 upper_right_y = slope*regr_end + intercept + error 235 return min(0, (upper_right_y - lower_left_y) / regr_width) 236 237 elif slope > 0: 238 upper_left_y = slope*regr_start + intercept + error 239 lower_right_y = slope*regr_end + intercept - error 240 return max(0, (lower_right_y - upper_left_y) / regr_width) 241 242 return 0 243 244 def CreateRevisionLink(revision_number): 245 """Returns HTML displaying the given revision number and linking to 246 that revision's change page at code.google.com, e.g. 247 http://code.google.com/p/skia/source/detail?r=2056 248 """ 249 return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%( 250 revision_number, revision_number) 251 252 def main(): 253 foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]] 254 LinearRegression(foo) 255 256 if __name__ == "__main__": 257 main() 258