Home | History | Annotate | Download | only in Scripts
      1 #!/usr/bin/env python
      2 
      3 #  Copyright (C) 2007, 2012 Apple Inc.  All rights reserved.
      4 #
      5 #  Redistribution and use in source and binary forms, with or without
      6 #  modification, are permitted provided that the following conditions
      7 #  are met:
      8 #  1. Redistributions of source code must retain the above copyright
      9 #     notice, this list of conditions and the following disclaimer.
     10 #  2. Redistributions in binary form must reproduce the above copyright
     11 #     notice, this list of conditions and the following disclaimer in the
     12 #     documentation and/or other materials provided with the distribution.
     13 #
     14 #  THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     15 #  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16 #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17 #  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     18 #  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19 #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20 #  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21 #  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22 #  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23 #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24 #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25 
     26 import math
     27 import sys
     28 import re
     29 import fileinput
     30 from optparse import OptionParser
     31 
     32 usage = "usage: %prog [options] [FILES]\n  Compute the mean and 95% confidence interval of a sample set.\n  Standard input or files must contain two or more decimal numbers, one per line."
     33 parser = OptionParser(usage=usage)
     34 parser.add_option("-u", "--unit", dest="unit", default="",
     35                   help="assume values are in units of UNIT", metavar="UNIT")
     36 parser.add_option("-v", "--verbose",
     37                   action="store_true", dest="verbose", default=False,
     38                   help="print all values (with units)")
     39 (options, files) = parser.parse_args()
     40 
     41 def sum(items):
     42     return reduce(lambda x,y: x+y, items)
     43 
     44 def arithmeticMean(items):
     45     return sum(items) / len(items)
     46 
     47 def standardDeviation(mean, items):
     48     deltaSquares = [(item - mean) ** 2 for item in items]
     49     return math.sqrt(sum(deltaSquares) / (len(items) - 1))
     50 
     51 def standardError(stdDev, items):
     52     return stdDev / math.sqrt(len(items))
     53 
     54 # t-distribution for 2-sided 95% confidence intervals
     55 tDistribution = [float('NaN'), float('NaN'), 12.71, 4.30, 3.18, 2.78, 2.57, 2.45, 2.36, 2.31, 2.26, 2.23, 2.20, 2.18, 2.16, 2.14, 2.13, 2.12, 2.11, 2.10, 2.09, 2.09, 2.08, 2.07, 2.07, 2.06, 2.06, 2.06, 2.05, 2.05, 2.05, 2.04, 2.04, 2.04, 2.03, 2.03, 2.03, 2.03, 2.03, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.01, 2.01, 2.01, 2.01, 2.01, 2.01, 2.01, 2.01, 2.01, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.96]
     56 tMax = len(tDistribution)
     57 tLimit = 1.96
     58 
     59 def tDist(n):
     60     if n > tMax:
     61         return tLimit
     62     return tDistribution[n]
     63 
     64 def twoSidedConfidenceInterval(items):
     65     mean = arithmeticMean(items)
     66     stdDev = standardDeviation(mean, items)
     67     stdErr = standardError(stdDev, items)
     68     return tDist(len(items)) * stdErr
     69 
     70 results = []
     71 
     72 decimalNumberPattern = re.compile(r"\d+\.?\d*")
     73 for line in fileinput.input(files):
     74     match = re.search(decimalNumberPattern, line)
     75     if match:
     76         results.append(float(match.group(0)))
     77 
     78 if len(results) == 0:
     79     parser.print_help()
     80     quit()
     81 
     82 
     83 mean = arithmeticMean(results)
     84 confidenceInterval = twoSidedConfidenceInterval(results)
     85 confidencePercent = 100 * confidenceInterval / mean
     86 
     87 if options.verbose:
     88     length = 7
     89     for item in results:
     90         line = "      %.2f %s" % (item, options.unit)
     91         print line
     92         length = len(line) if len(line) > length else length
     93 
     94     print "-" * length
     95 
     96 prefix = "Mean: " if options.verbose else ""
     97 print "%s%.2f %s +/- %.2f %s (%.1f%%)" % (prefix, mean, options.unit, confidenceInterval, options.unit, confidencePercent)
     98 
     99