Home | History | Annotate | Download | only in scripts
      1 """mailerdaemon - classes to parse mailer-daemon messages"""
      2 
      3 import rfc822
      4 import calendar
      5 import re
      6 import os
      7 import sys
      8 
      9 Unparseable = 'mailerdaemon.Unparseable'
     10 
     11 class ErrorMessage(rfc822.Message):
     12     def __init__(self, fp):
     13         rfc822.Message.__init__(self, fp)
     14         self.sub = ''
     15 
     16     def is_warning(self):
     17         sub = self.getheader('Subject')
     18         if not sub:
     19             return 0
     20         sub = sub.lower()
     21         if sub.startswith('waiting mail'): return 1
     22         if 'warning' in sub: return 1
     23         self.sub = sub
     24         return 0
     25 
     26     def get_errors(self):
     27         for p in EMPARSERS:
     28             self.rewindbody()
     29             try:
     30                 return p(self.fp, self.sub)
     31             except Unparseable:
     32                 pass
     33         raise Unparseable
     34 
     35 # List of re's or tuples of re's.

     36 # If a re, it should contain at least a group (?P<email>...) which

     37 # should refer to the email address.  The re can also contain a group

     38 # (?P<reason>...) which should refer to the reason (error message).

     39 # If no reason is present, the emparse_list_reason list is used to

     40 # find a reason.

     41 # If a tuple, the tuple should contain 2 re's.  The first re finds a

     42 # location, the second re is repeated one or more times to find

     43 # multiple email addresses.  The second re is matched (not searched)

     44 # where the previous match ended.

     45 # The re's are compiled using the re module.

     46 emparse_list_list = [
     47     'error: (?P<reason>unresolvable): (?P<email>.+)',
     48     ('----- The following addresses had permanent fatal errors -----\n',
     49      '(?P<email>[^ \n].*)\n( .*\n)?'),
     50     'remote execution.*\n.*rmail (?P<email>.+)',
     51     ('The following recipients did not receive your message:\n\n',
     52      ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
     53     '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
     54     '^<(?P<email>.*)>:\n(?P<reason>.*)',
     55     '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
     56     '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
     57     '^Original-Recipient: rfc822;(?P<email>.*)',
     58     '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
     59     '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
     60     '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
     61     '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
     62     '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
     63     ]
     64 # compile the re's in the list and store them in-place.

     65 for i in range(len(emparse_list_list)):
     66     x = emparse_list_list[i]
     67     if type(x) is type(''):
     68         x = re.compile(x, re.MULTILINE)
     69     else:
     70         xl = []
     71         for x in x:
     72             xl.append(re.compile(x, re.MULTILINE))
     73         x = tuple(xl)
     74         del xl
     75     emparse_list_list[i] = x
     76     del x
     77 del i
     78 
     79 # list of re's used to find reasons (error messages).

     80 # if a string, "<>" is replaced by a copy of the email address.

     81 # The expressions are searched for in order.  After the first match,

     82 # no more expressions are searched for.  So, order is important.

     83 emparse_list_reason = [
     84     r'^5\d{2} <>\.\.\. (?P<reason>.*)',
     85     '<>\.\.\. (?P<reason>.*)',
     86     re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
     87     re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
     88     re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
     89     ]
     90 emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
     91 def emparse_list(fp, sub):
     92     data = fp.read()
     93     res = emparse_list_from.search(data)
     94     if res is None:
     95         from_index = len(data)
     96     else:
     97         from_index = res.start(0)
     98     errors = []
     99     emails = []
    100     reason = None
    101     for regexp in emparse_list_list:
    102         if type(regexp) is type(()):
    103             res = regexp[0].search(data, 0, from_index)
    104             if res is not None:
    105                 try:
    106                     reason = res.group('reason')
    107                 except IndexError:
    108                     pass
    109                 while 1:
    110                     res = regexp[1].match(data, res.end(0), from_index)
    111                     if res is None:
    112                         break
    113                     emails.append(res.group('email'))
    114                 break
    115         else:
    116             res = regexp.search(data, 0, from_index)
    117             if res is not None:
    118                 emails.append(res.group('email'))
    119                 try:
    120                     reason = res.group('reason')
    121                 except IndexError:
    122                     pass
    123                 break
    124     if not emails:
    125         raise Unparseable
    126     if not reason:
    127         reason = sub
    128         if reason[:15] == 'returned mail: ':
    129             reason = reason[15:]
    130         for regexp in emparse_list_reason:
    131             if type(regexp) is type(''):
    132                 for i in range(len(emails)-1,-1,-1):
    133                     email = emails[i]
    134                     exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
    135                     res = exp.search(data)
    136                     if res is not None:
    137                         errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
    138                         del emails[i]
    139                 continue
    140             res = regexp.search(data)
    141             if res is not None:
    142                 reason = res.group('reason')
    143                 break
    144     for email in emails:
    145         errors.append(' '.join((email.strip()+': '+reason).split()))
    146     return errors
    147 
    148 EMPARSERS = [emparse_list, ]
    149 
    150 def sort_numeric(a, b):
    151     a = int(a)
    152     b = int(b)
    153     if a < b: return -1
    154     elif a > b: return 1
    155     else: return 0
    156 
    157 def parsedir(dir, modify):
    158     os.chdir(dir)
    159     pat = re.compile('^[0-9]*$')
    160     errordict = {}
    161     errorfirst = {}
    162     errorlast = {}
    163     nok = nwarn = nbad = 0
    164 
    165     # find all numeric file names and sort them

    166     files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
    167     files.sort(sort_numeric)
    168 
    169     for fn in files:
    170         # Lets try to parse the file.

    171         fp = open(fn)
    172         m = ErrorMessage(fp)
    173         sender = m.getaddr('From')
    174         print '%s\t%-40s\t'%(fn, sender[1]),
    175 
    176         if m.is_warning():
    177             fp.close()
    178             print 'warning only'
    179             nwarn = nwarn + 1
    180             if modify:
    181                 os.rename(fn, ','+fn)
    182 ##              os.unlink(fn)

    183             continue
    184 
    185         try:
    186             errors = m.get_errors()
    187         except Unparseable:
    188             print '** Not parseable'
    189             nbad = nbad + 1
    190             fp.close()
    191             continue
    192         print len(errors), 'errors'
    193 
    194         # Remember them

    195         for e in errors:
    196             try:
    197                 mm, dd = m.getdate('date')[1:1+2]
    198                 date = '%s %02d' % (calendar.month_abbr[mm], dd)
    199             except:
    200                 date = '??????'
    201             if not errordict.has_key(e):
    202                 errordict[e] = 1
    203                 errorfirst[e] = '%s (%s)' % (fn, date)
    204             else:
    205                 errordict[e] = errordict[e] + 1
    206             errorlast[e] = '%s (%s)' % (fn, date)
    207 
    208         fp.close()
    209         nok = nok + 1
    210         if modify:
    211             os.rename(fn, ','+fn)
    212 ##          os.unlink(fn)

    213 
    214     print '--------------'
    215     print nok, 'files parsed,',nwarn,'files warning-only,',
    216     print nbad,'files unparseable'
    217     print '--------------'
    218     list = []
    219     for e in errordict.keys():
    220         list.append((errordict[e], errorfirst[e], errorlast[e], e))
    221     list.sort()
    222     for num, first, last, e in list:
    223         print '%d %s - %s\t%s' % (num, first, last, e)
    224 
    225 def main():
    226     modify = 0
    227     if len(sys.argv) > 1 and sys.argv[1] == '-d':
    228         modify = 1
    229         del sys.argv[1]
    230     if len(sys.argv) > 1:
    231         for folder in sys.argv[1:]:
    232             parsedir(folder, modify)
    233     else:
    234         parsedir('/ufs/jack/Mail/errorsinbox', modify)
    235 
    236 if __name__ == '__main__' or sys.argv[0] == __name__:
    237     main()
    238