Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python3
      2 
      3 """Show file statistics by extension."""
      4 
      5 import os
      6 import sys
      7 
      8 
      9 class Stats:
     10 
     11     def __init__(self):
     12         self.stats = {}
     13 
     14     def statargs(self, args):
     15         for arg in args:
     16             if os.path.isdir(arg):
     17                 self.statdir(arg)
     18             elif os.path.isfile(arg):
     19                 self.statfile(arg)
     20             else:
     21                 sys.stderr.write("Can't find %s\n" % arg)
     22                 self.addstats("<???>", "unknown", 1)
     23 
     24     def statdir(self, dir):
     25         self.addstats("<dir>", "dirs", 1)
     26         try:
     27             names = os.listdir(dir)
     28         except OSError as err:
     29             sys.stderr.write("Can't list %s: %s\n" % (dir, err))
     30             self.addstats("<dir>", "unlistable", 1)
     31             return
     32         for name in sorted(names):
     33             if name.startswith(".#"):
     34                 continue  # Skip CVS temp files
     35             if name.endswith("~"):
     36                 continue  # Skip Emacs backup files
     37             full = os.path.join(dir, name)
     38             if os.path.islink(full):
     39                 self.addstats("<lnk>", "links", 1)
     40             elif os.path.isdir(full):
     41                 self.statdir(full)
     42             else:
     43                 self.statfile(full)
     44 
     45     def statfile(self, filename):
     46         head, ext = os.path.splitext(filename)
     47         head, base = os.path.split(filename)
     48         if ext == base:
     49             ext = ""  # E.g. .cvsignore is deemed not to have an extension
     50         ext = os.path.normcase(ext)
     51         if not ext:
     52             ext = "<none>"
     53         self.addstats(ext, "files", 1)
     54         try:
     55             with open(filename, "rb") as f:
     56                 data = f.read()
     57         except IOError as err:
     58             sys.stderr.write("Can't open %s: %s\n" % (filename, err))
     59             self.addstats(ext, "unopenable", 1)
     60             return
     61         self.addstats(ext, "bytes", len(data))
     62         if b'\0' in data:
     63             self.addstats(ext, "binary", 1)
     64             return
     65         if not data:
     66             self.addstats(ext, "empty", 1)
     67         # self.addstats(ext, "chars", len(data))
     68         lines = str(data, "latin-1").splitlines()
     69         self.addstats(ext, "lines", len(lines))
     70         del lines
     71         words = data.split()
     72         self.addstats(ext, "words", len(words))
     73 
     74     def addstats(self, ext, key, n):
     75         d = self.stats.setdefault(ext, {})
     76         d[key] = d.get(key, 0) + n
     77 
     78     def report(self):
     79         exts = sorted(self.stats)
     80         # Get the column keys
     81         columns = {}
     82         for ext in exts:
     83             columns.update(self.stats[ext])
     84         cols = sorted(columns)
     85         colwidth = {}
     86         colwidth["ext"] = max(map(len, exts))
     87         minwidth = 6
     88         self.stats["TOTAL"] = {}
     89         for col in cols:
     90             total = 0
     91             cw = max(minwidth, len(col))
     92             for ext in exts:
     93                 value = self.stats[ext].get(col)
     94                 if value is None:
     95                     w = 0
     96                 else:
     97                     w = len("%d" % value)
     98                     total += value
     99                 cw = max(cw, w)
    100             cw = max(cw, len(str(total)))
    101             colwidth[col] = cw
    102             self.stats["TOTAL"][col] = total
    103         exts.append("TOTAL")
    104         for ext in exts:
    105             self.stats[ext]["ext"] = ext
    106         cols.insert(0, "ext")
    107 
    108         def printheader():
    109             for col in cols:
    110                 print("%*s" % (colwidth[col], col), end=' ')
    111             print()
    112 
    113         printheader()
    114         for ext in exts:
    115             for col in cols:
    116                 value = self.stats[ext].get(col, "")
    117                 print("%*s" % (colwidth[col], value), end=' ')
    118             print()
    119         printheader()  # Another header at the bottom
    120 
    121 
    122 def main():
    123     args = sys.argv[1:]
    124     if not args:
    125         args = [os.curdir]
    126     s = Stats()
    127     s.statargs(args)
    128     s.report()
    129 
    130 
    131 if __name__ == "__main__":
    132     main()
    133