1 #! /usr/bin/env python 2 3 """Show file statistics by extension.""" 4 5 import os 6 import sys 7 8 class Stats: 9 10 def __init__(self): 11 self.stats = {} 12 13 def statargs(self, args): 14 for arg in args: 15 if os.path.isdir(arg): 16 self.statdir(arg) 17 elif os.path.isfile(arg): 18 self.statfile(arg) 19 else: 20 sys.stderr.write("Can't find %s\n" % arg) 21 self.addstats("<???>", "unknown", 1) 22 23 def statdir(self, dir): 24 self.addstats("<dir>", "dirs", 1) 25 try: 26 names = os.listdir(dir) 27 except os.error, err: 28 sys.stderr.write("Can't list %s: %s\n" % (dir, err)) 29 self.addstats("<dir>", "unlistable", 1) 30 return 31 names.sort() 32 for name in names: 33 if name.startswith(".#"): 34 continue # Skip CVS temp files 35 if name.endswith("~"): 36 continue# Skip Emacs backup files 37 full = os.path.join(dir, name) 38 if os.path.islink(full): 39 self.addstats("<lnk>", "links", 1) 40 elif os.path.isdir(full): 41 self.statdir(full) 42 else: 43 self.statfile(full) 44 45 def statfile(self, filename): 46 head, ext = os.path.splitext(filename) 47 head, base = os.path.split(filename) 48 if ext == base: 49 ext = "" # E.g. .cvsignore is deemed not to have an extension 50 ext = os.path.normcase(ext) 51 if not ext: 52 ext = "<none>" 53 self.addstats(ext, "files", 1) 54 try: 55 f = open(filename, "rb") 56 except IOError, err: 57 sys.stderr.write("Can't open %s: %s\n" % (filename, err)) 58 self.addstats(ext, "unopenable", 1) 59 return 60 data = f.read() 61 f.close() 62 self.addstats(ext, "bytes", len(data)) 63 if '\0' in data: 64 self.addstats(ext, "binary", 1) 65 return 66 if not data: 67 self.addstats(ext, "empty", 1) 68 #self.addstats(ext, "chars", len(data)) 69 lines = data.splitlines() 70 self.addstats(ext, "lines", len(lines)) 71 del lines 72 words = data.split() 73 self.addstats(ext, "words", len(words)) 74 75 def addstats(self, ext, key, n): 76 d = self.stats.setdefault(ext, {}) 77 d[key] = d.get(key, 0) + n 78 79 def report(self): 80 exts = self.stats.keys() 81 exts.sort() 82 # Get the column keys 83 columns = {} 84 for ext in exts: 85 columns.update(self.stats[ext]) 86 cols = columns.keys() 87 cols.sort() 88 colwidth = {} 89 colwidth["ext"] = max([len(ext) for ext in exts]) 90 minwidth = 6 91 self.stats["TOTAL"] = {} 92 for col in cols: 93 total = 0 94 cw = max(minwidth, len(col)) 95 for ext in exts: 96 value = self.stats[ext].get(col) 97 if value is None: 98 w = 0 99 else: 100 w = len("%d" % value) 101 total += value 102 cw = max(cw, w) 103 cw = max(cw, len(str(total))) 104 colwidth[col] = cw 105 self.stats["TOTAL"][col] = total 106 exts.append("TOTAL") 107 for ext in exts: 108 self.stats[ext]["ext"] = ext 109 cols.insert(0, "ext") 110 def printheader(): 111 for col in cols: 112 print "%*s" % (colwidth[col], col), 113 print 114 printheader() 115 for ext in exts: 116 for col in cols: 117 value = self.stats[ext].get(col, "") 118 print "%*s" % (colwidth[col], value), 119 print 120 printheader() # Another header at the bottom 121 122 def main(): 123 args = sys.argv[1:] 124 if not args: 125 args = [os.curdir] 126 s = Stats() 127 s.statargs(args) 128 s.report() 129 130 if __name__ == "__main__": 131 main() 132