Home | History | Annotate | Download | only in experimental
      1 #!/usr/bin/env python
      2 # Copyright 2015 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Print statistics about the rate of commits to a repository."""
      7 
      8 import datetime
      9 import itertools
     10 import json
     11 import math
     12 import urllib
     13 import urllib2
     14 
     15 
     16 _BASE_URL = 'https://chromium.googlesource.com/'
     17 # Can be up to 10,000.
     18 _REVISION_COUNT = 1000
     19 
     20 _REPOSITORIES = [
     21     'chromium/src',
     22     'angle/angle',
     23     'skia',
     24     'v8/v8',
     25 ]
     26 
     27 
     28 def Pairwise(iterable):
     29   """s -> (s0,s1), (s1,s2), (s2, s3), ..."""
     30   a, b = itertools.tee(iterable)
     31   next(b, None)
     32   return itertools.izip(a, b)
     33 
     34 
     35 def Percentile(data, percentile):
     36   """Find a percentile of a list of values.
     37 
     38   Parameters:
     39     data: A sorted list of values.
     40     percentile: The percentile to look up, from 0.0 to 1.0.
     41 
     42   Returns:
     43     The percentile.
     44 
     45   Raises:
     46     ValueError: If data is empty.
     47   """
     48   if not data:
     49     raise ValueError()
     50 
     51   k = (len(data) - 1) * percentile
     52   f = math.floor(k)
     53   c = math.ceil(k)
     54 
     55   if f == c:
     56     return data[int(k)]
     57   return data[int(f)] * (c - k) + data[int(c)] * (k - f)
     58 
     59 
     60 def CommitTimes(repository, revision_count):
     61   parameters = urllib.urlencode((('n', revision_count), ('format', 'JSON')))
     62   url = '%s/%s/+log?%s' % (_BASE_URL, urllib.quote(repository), parameters)
     63   data = json.loads(''.join(urllib2.urlopen(url).read().splitlines()[1:]))
     64 
     65   commit_times = []
     66   for revision in data['log']:
     67     commit_time_string = revision['committer']['time']
     68     commit_time = datetime.datetime.strptime(
     69         commit_time_string, '%a %b %d %H:%M:%S %Y')
     70     commit_times.append(commit_time)
     71 
     72   return commit_times
     73 
     74 
     75 def main():
     76   for repository in _REPOSITORIES:
     77     commit_times = CommitTimes(repository, _REVISION_COUNT)
     78 
     79     commit_durations = []
     80     for time1, time2 in Pairwise(commit_times):
     81       commit_durations.append((time1 - time2).total_seconds())
     82     commit_durations.sort()
     83 
     84     print 'REPOSITORY:', repository
     85     print 'Start Date:', min(commit_times)
     86     print '  End Date:', max(commit_times)
     87     print '  Duration:', max(commit_times) - min(commit_times)
     88     print '         n:', len(commit_times)
     89 
     90     for p in (0.00, 0.05, 0.25, 0.50, 0.75, 0.95, 1.00):
     91       percentile = Percentile(commit_durations, p)
     92       print '%3d%% commit duration:' % (p * 100), '%6ds' % percentile
     93     mean = math.fsum(commit_durations) / len(commit_durations)
     94     print ' Min commit duration:', '%6ds' % min(commit_durations)
     95     print 'Mean commit duration:', '%6ds' % mean
     96     print ' Max commit duration:', '%6ds' % max(commit_durations)
     97     print
     98 
     99 
    100 if __name__ == '__main__':
    101   main()
    102