Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python
      2 
      3 """Show file statistics by extension."""
      4 
      5 from __future__ import print_function
      6 
      7 import os
      8 import sys
      9 
     10 class Stats:
     11 
     12     def __init__(self):
     13         self.stats = {}
     14 
     15     def statargs(self, args):
     16         for arg in args:
     17             if os.path.isdir(arg):
     18                 self.statdir(arg)
     19             elif os.path.isfile(arg):
     20                 self.statfile(arg)
     21             else:
     22                 sys.stderr.write("Can't find %s\n" % arg)
     23                 self.addstats("<???>", "unknown", 1)
     24 
     25     def statdir(self, dir):
     26         self.addstats("<dir>", "dirs", 1)
     27         try:
     28             names = sorted(os.listdir(dir))
     29         except os.error as err:
     30             sys.stderr.write("Can't list %s: %s\n" % (dir, err))
     31             self.addstats("<dir>", "unlistable", 1)
     32             return
     33         for name in names:
     34             if name.startswith(".#"):
     35                 continue # Skip CVS temp files
     36             if name.endswith("~"):
     37                 continue# Skip Emacs backup files
     38             full = os.path.join(dir, name)
     39             if os.path.islink(full):
     40                 self.addstats("<lnk>", "links", 1)
     41             elif os.path.isdir(full):
     42                 self.statdir(full)
     43             else:
     44                 self.statfile(full)
     45 
     46     def statfile(self, filename):
     47         head, ext = os.path.splitext(filename)
     48         head, base = os.path.split(filename)
     49         if ext == base:
     50             ext = "" # E.g. .cvsignore is deemed not to have an extension
     51         ext = os.path.normcase(ext)
     52         if not ext:
     53             ext = "<none>"
     54         self.addstats(ext, "files", 1)
     55         try:
     56             f = open(filename, "rb")
     57         except IOError as err:
     58             sys.stderr.write("Can't open %s: %s\n" % (filename, err))
     59             self.addstats(ext, "unopenable", 1)
     60             return
     61         data = f.read()
     62         f.close()
     63         self.addstats(ext, "bytes", len(data))
     64         if b'\0' in data:
     65             self.addstats(ext, "binary", 1)
     66             return
     67         if not data:
     68             self.addstats(ext, "empty", 1)
     69         #self.addstats(ext, "chars", len(data))
     70         lines = data.splitlines()
     71         self.addstats(ext, "lines", len(lines))
     72         del lines
     73         words = data.split()
     74         self.addstats(ext, "words", len(words))
     75 
     76     def addstats(self, ext, key, n):
     77         d = self.stats.setdefault(ext, {})
     78         d[key] = d.get(key, 0) + n
     79 
     80     def report(self):
     81         exts = sorted(self.stats.keys())
     82         # Get the column keys
     83         columns = {}
     84         for ext in exts:
     85             columns.update(self.stats[ext])
     86         cols = sorted(columns.keys())
     87         colwidth = {}
     88         colwidth["ext"] = max([len(ext) for ext in exts])
     89         minwidth = 6
     90         self.stats["TOTAL"] = {}
     91         for col in cols:
     92             total = 0
     93             cw = max(minwidth, len(col))
     94             for ext in exts:
     95                 value = self.stats[ext].get(col)
     96                 if value is None:
     97                     w = 0
     98                 else:
     99                     w = len("%d" % value)
    100                     total += value
    101                 cw = max(cw, w)
    102             cw = max(cw, len(str(total)))
    103             colwidth[col] = cw
    104             self.stats["TOTAL"][col] = total
    105         exts.append("TOTAL")
    106         for ext in exts:
    107             self.stats[ext]["ext"] = ext
    108         cols.insert(0, "ext")
    109         def printheader():
    110             for col in cols:
    111                 print("%*s" % (colwidth[col], col), end=" ")
    112             print()
    113         printheader()
    114         for ext in exts:
    115             for col in cols:
    116                 value = self.stats[ext].get(col, "")
    117                 print("%*s" % (colwidth[col], value), end=" ")
    118             print()
    119         printheader() # Another header at the bottom
    120 
    121 def main():
    122     args = sys.argv[1:]
    123     if not args:
    124         args = [os.curdir]
    125     s = Stats()
    126     s.statargs(args)
    127     s.report()
    128 
    129 if __name__ == "__main__":
    130     main()
    131