Home | History | Annotate | Download | only in comparisons
      1 #! /usr/bin/env python
      2 
      3 # 1)  Regular Expressions Test
      4 #
      5 #     Read a file of (extended per egrep) regular expressions (one per line),
      6 #     and apply those to all files whose names are listed on the command line.
      7 #     Basically, an 'egrep -f' simulator.  Test it with 20 "vt100" patterns
      8 #     against a five /etc/termcap files.  Tests using more elaborate patters
      9 #     would also be interesting.  Your code should not break if given hundreds
     10 #     of regular expressions or binary files to scan.
     11 
     12 # This implementation:
     13 # - combines all patterns into a single one using ( ... | ... | ... )
     14 # - reads patterns from stdin, scans files given as command line arguments
     15 # - produces output in the format <file>:<lineno>:<line>
     16 # - is only about 2.5 times as slow as egrep (though I couldn't run
     17 #   Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
     18 
     19 import string
     20 import sys
     21 import re
     22 
     23 def main():
     24     pats = map(chomp, sys.stdin.readlines())
     25     bigpat = '(' + '|'.join(pats) + ')'
     26     prog = re.compile(bigpat)
     27 
     28     for file in sys.argv[1:]:
     29         try:
     30             fp = open(file, 'r')
     31         except IOError, msg:
     32             print "%s: %s" % (file, msg)
     33             continue
     34         lineno = 0
     35         while 1:
     36             line = fp.readline()
     37             if not line:
     38                 break
     39             lineno = lineno + 1
     40             if prog.search(line):
     41                 print "%s:%s:%s" % (file, lineno, line),
     42 
     43 def chomp(s):
     44     return s.rstrip('\n')
     45 
     46 if __name__ == '__main__':
     47     main()
     48