1 #! /usr/bin/env python 2 3 # 1) Regular Expressions Test 4 # 5 # Read a file of (extended per egrep) regular expressions (one per line), 6 # and apply those to all files whose names are listed on the command line. 7 # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns 8 # against a five /etc/termcap files. Tests using more elaborate patters 9 # would also be interesting. Your code should not break if given hundreds 10 # of regular expressions or binary files to scan. 11 12 # This implementation: 13 # - combines all patterns into a single one using ( ... | ... | ... ) 14 # - reads patterns from stdin, scans files given as command line arguments 15 # - produces output in the format <file>:<lineno>:<line> 16 # - is only about 2.5 times as slow as egrep (though I couldn't run 17 # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) 18 19 import string 20 import sys 21 import re 22 23 def main(): 24 pats = map(chomp, sys.stdin.readlines()) 25 bigpat = '(' + '|'.join(pats) + ')' 26 prog = re.compile(bigpat) 27 28 for file in sys.argv[1:]: 29 try: 30 fp = open(file, 'r') 31 except IOError, msg: 32 print "%s: %s" % (file, msg) 33 continue 34 lineno = 0 35 while 1: 36 line = fp.readline() 37 if not line: 38 break 39 lineno = lineno + 1 40 if prog.search(line): 41 print "%s:%s:%s" % (file, lineno, line), 42 43 def chomp(s): 44 return s.rstrip('\n') 45 46 if __name__ == '__main__': 47 main() 48