Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 """mailerdaemon - classes to parse mailer-daemon messages"""
      3 
      4 import rfc822
      5 import calendar
      6 import re
      7 import os
      8 import sys
      9 
     10 Unparseable = 'mailerdaemon.Unparseable'
     11 
     12 class ErrorMessage(rfc822.Message):
     13     def __init__(self, fp):
     14         rfc822.Message.__init__(self, fp)
     15         self.sub = ''
     16 
     17     def is_warning(self):
     18         sub = self.getheader('Subject')
     19         if not sub:
     20             return 0
     21         sub = sub.lower()
     22         if sub.startswith('waiting mail'): return 1
     23         if 'warning' in sub: return 1
     24         self.sub = sub
     25         return 0
     26 
     27     def get_errors(self):
     28         for p in EMPARSERS:
     29             self.rewindbody()
     30             try:
     31                 return p(self.fp, self.sub)
     32             except Unparseable:
     33                 pass
     34         raise Unparseable
     35 
     36 # List of re's or tuples of re's.
     37 # If a re, it should contain at least a group (?P<email>...) which
     38 # should refer to the email address.  The re can also contain a group
     39 # (?P<reason>...) which should refer to the reason (error message).
     40 # If no reason is present, the emparse_list_reason list is used to
     41 # find a reason.
     42 # If a tuple, the tuple should contain 2 re's.  The first re finds a
     43 # location, the second re is repeated one or more times to find
     44 # multiple email addresses.  The second re is matched (not searched)
     45 # where the previous match ended.
     46 # The re's are compiled using the re module.
     47 emparse_list_list = [
     48     'error: (?P<reason>unresolvable): (?P<email>.+)',
     49     ('----- The following addresses had permanent fatal errors -----\n',
     50      '(?P<email>[^ \n].*)\n( .*\n)?'),
     51     'remote execution.*\n.*rmail (?P<email>.+)',
     52     ('The following recipients did not receive your message:\n\n',
     53      ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
     54     '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
     55     '^<(?P<email>.*)>:\n(?P<reason>.*)',
     56     '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
     57     '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
     58     '^Original-Recipient: rfc822;(?P<email>.*)',
     59     '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
     60     '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
     61     '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
     62     '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
     63     '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
     64     ]
     65 # compile the re's in the list and store them in-place.
     66 for i in range(len(emparse_list_list)):
     67     x = emparse_list_list[i]
     68     if type(x) is type(''):
     69         x = re.compile(x, re.MULTILINE)
     70     else:
     71         xl = []
     72         for x in x:
     73             xl.append(re.compile(x, re.MULTILINE))
     74         x = tuple(xl)
     75         del xl
     76     emparse_list_list[i] = x
     77     del x
     78 del i
     79 
     80 # list of re's used to find reasons (error messages).
     81 # if a string, "<>" is replaced by a copy of the email address.
     82 # The expressions are searched for in order.  After the first match,
     83 # no more expressions are searched for.  So, order is important.
     84 emparse_list_reason = [
     85     r'^5\d{2} <>\.\.\. (?P<reason>.*)',
     86     '<>\.\.\. (?P<reason>.*)',
     87     re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
     88     re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
     89     re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
     90     ]
     91 emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
     92 def emparse_list(fp, sub):
     93     data = fp.read()
     94     res = emparse_list_from.search(data)
     95     if res is None:
     96         from_index = len(data)
     97     else:
     98         from_index = res.start(0)
     99     errors = []
    100     emails = []
    101     reason = None
    102     for regexp in emparse_list_list:
    103         if type(regexp) is type(()):
    104             res = regexp[0].search(data, 0, from_index)
    105             if res is not None:
    106                 try:
    107                     reason = res.group('reason')
    108                 except IndexError:
    109                     pass
    110                 while 1:
    111                     res = regexp[1].match(data, res.end(0), from_index)
    112                     if res is None:
    113                         break
    114                     emails.append(res.group('email'))
    115                 break
    116         else:
    117             res = regexp.search(data, 0, from_index)
    118             if res is not None:
    119                 emails.append(res.group('email'))
    120                 try:
    121                     reason = res.group('reason')
    122                 except IndexError:
    123                     pass
    124                 break
    125     if not emails:
    126         raise Unparseable
    127     if not reason:
    128         reason = sub
    129         if reason[:15] == 'returned mail: ':
    130             reason = reason[15:]
    131         for regexp in emparse_list_reason:
    132             if type(regexp) is type(''):
    133                 for i in range(len(emails)-1,-1,-1):
    134                     email = emails[i]
    135                     exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
    136                     res = exp.search(data)
    137                     if res is not None:
    138                         errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
    139                         del emails[i]
    140                 continue
    141             res = regexp.search(data)
    142             if res is not None:
    143                 reason = res.group('reason')
    144                 break
    145     for email in emails:
    146         errors.append(' '.join((email.strip()+': '+reason).split()))
    147     return errors
    148 
    149 EMPARSERS = [emparse_list, ]
    150 
    151 def sort_numeric(a, b):
    152     a = int(a)
    153     b = int(b)
    154     if a < b: return -1
    155     elif a > b: return 1
    156     else: return 0
    157 
    158 def parsedir(dir, modify):
    159     os.chdir(dir)
    160     pat = re.compile('^[0-9]*$')
    161     errordict = {}
    162     errorfirst = {}
    163     errorlast = {}
    164     nok = nwarn = nbad = 0
    165 
    166     # find all numeric file names and sort them
    167     files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
    168     files.sort(sort_numeric)
    169 
    170     for fn in files:
    171         # Lets try to parse the file.
    172         fp = open(fn)
    173         m = ErrorMessage(fp)
    174         sender = m.getaddr('From')
    175         print '%s\t%-40s\t'%(fn, sender[1]),
    176 
    177         if m.is_warning():
    178             fp.close()
    179             print 'warning only'
    180             nwarn = nwarn + 1
    181             if modify:
    182                 os.rename(fn, ','+fn)
    183 ##              os.unlink(fn)
    184             continue
    185 
    186         try:
    187             errors = m.get_errors()
    188         except Unparseable:
    189             print '** Not parseable'
    190             nbad = nbad + 1
    191             fp.close()
    192             continue
    193         print len(errors), 'errors'
    194 
    195         # Remember them
    196         for e in errors:
    197             try:
    198                 mm, dd = m.getdate('date')[1:1+2]
    199                 date = '%s %02d' % (calendar.month_abbr[mm], dd)
    200             except:
    201                 date = '??????'
    202             if not errordict.has_key(e):
    203                 errordict[e] = 1
    204                 errorfirst[e] = '%s (%s)' % (fn, date)
    205             else:
    206                 errordict[e] = errordict[e] + 1
    207             errorlast[e] = '%s (%s)' % (fn, date)
    208 
    209         fp.close()
    210         nok = nok + 1
    211         if modify:
    212             os.rename(fn, ','+fn)
    213 ##          os.unlink(fn)
    214 
    215     print '--------------'
    216     print nok, 'files parsed,',nwarn,'files warning-only,',
    217     print nbad,'files unparseable'
    218     print '--------------'
    219     list = []
    220     for e in errordict.keys():
    221         list.append((errordict[e], errorfirst[e], errorlast[e], e))
    222     list.sort()
    223     for num, first, last, e in list:
    224         print '%d %s - %s\t%s' % (num, first, last, e)
    225 
    226 def main():
    227     modify = 0
    228     if len(sys.argv) > 1 and sys.argv[1] == '-d':
    229         modify = 1
    230         del sys.argv[1]
    231     if len(sys.argv) > 1:
    232         for folder in sys.argv[1:]:
    233             parsedir(folder, modify)
    234     else:
    235         parsedir('/ufs/jack/Mail/errorsinbox', modify)
    236 
    237 if __name__ == '__main__' or sys.argv[0] == __name__:
    238     main()
    239