Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python

      2 
      3 """Show file statistics by extension."""
      4 
      5 import os
      6 import sys
      7 
      8 class Stats:
      9 
     10     def __init__(self):
     11         self.stats = {}
     12 
     13     def statargs(self, args):
     14         for arg in args:
     15             if os.path.isdir(arg):
     16                 self.statdir(arg)
     17             elif os.path.isfile(arg):
     18                 self.statfile(arg)
     19             else:
     20                 sys.stderr.write("Can't find %s\n" % arg)
     21                 self.addstats("<???>", "unknown", 1)
     22 
     23     def statdir(self, dir):
     24         self.addstats("<dir>", "dirs", 1)
     25         try:
     26             names = os.listdir(dir)
     27         except os.error, err:
     28             sys.stderr.write("Can't list %s: %s\n" % (dir, err))
     29             self.addstats("<dir>", "unlistable", 1)
     30             return
     31         names.sort()
     32         for name in names:
     33             if name.startswith(".#"):
     34                 continue # Skip CVS temp files

     35             if name.endswith("~"):
     36                 continue# Skip Emacs backup files

     37             full = os.path.join(dir, name)
     38             if os.path.islink(full):
     39                 self.addstats("<lnk>", "links", 1)
     40             elif os.path.isdir(full):
     41                 self.statdir(full)
     42             else:
     43                 self.statfile(full)
     44 
     45     def statfile(self, filename):
     46         head, ext = os.path.splitext(filename)
     47         head, base = os.path.split(filename)
     48         if ext == base:
     49             ext = "" # E.g. .cvsignore is deemed not to have an extension

     50         ext = os.path.normcase(ext)
     51         if not ext:
     52             ext = "<none>"
     53         self.addstats(ext, "files", 1)
     54         try:
     55             f = open(filename, "rb")
     56         except IOError, err:
     57             sys.stderr.write("Can't open %s: %s\n" % (filename, err))
     58             self.addstats(ext, "unopenable", 1)
     59             return
     60         data = f.read()
     61         f.close()
     62         self.addstats(ext, "bytes", len(data))
     63         if '\0' in data:
     64             self.addstats(ext, "binary", 1)
     65             return
     66         if not data:
     67             self.addstats(ext, "empty", 1)
     68         #self.addstats(ext, "chars", len(data))

     69         lines = data.splitlines()
     70         self.addstats(ext, "lines", len(lines))
     71         del lines
     72         words = data.split()
     73         self.addstats(ext, "words", len(words))
     74 
     75     def addstats(self, ext, key, n):
     76         d = self.stats.setdefault(ext, {})
     77         d[key] = d.get(key, 0) + n
     78 
     79     def report(self):
     80         exts = self.stats.keys()
     81         exts.sort()
     82         # Get the column keys

     83         columns = {}
     84         for ext in exts:
     85             columns.update(self.stats[ext])
     86         cols = columns.keys()
     87         cols.sort()
     88         colwidth = {}
     89         colwidth["ext"] = max([len(ext) for ext in exts])
     90         minwidth = 6
     91         self.stats["TOTAL"] = {}
     92         for col in cols:
     93             total = 0
     94             cw = max(minwidth, len(col))
     95             for ext in exts:
     96                 value = self.stats[ext].get(col)
     97                 if value is None:
     98                     w = 0
     99                 else:
    100                     w = len("%d" % value)
    101                     total += value
    102                 cw = max(cw, w)
    103             cw = max(cw, len(str(total)))
    104             colwidth[col] = cw
    105             self.stats["TOTAL"][col] = total
    106         exts.append("TOTAL")
    107         for ext in exts:
    108             self.stats[ext]["ext"] = ext
    109         cols.insert(0, "ext")
    110         def printheader():
    111             for col in cols:
    112                 print "%*s" % (colwidth[col], col),
    113             print
    114         printheader()
    115         for ext in exts:
    116             for col in cols:
    117                 value = self.stats[ext].get(col, "")
    118                 print "%*s" % (colwidth[col], value),
    119             print
    120         printheader() # Another header at the bottom

    121 
    122 def main():
    123     args = sys.argv[1:]
    124     if not args:
    125         args = [os.curdir]
    126     s = Stats()
    127     s.statargs(args)
    128     s.report()
    129 
    130 if __name__ == "__main__":
    131     main()
    132