Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python3
      2 """Classes to parse mailer-daemon messages."""
      3 
      4 import calendar
      5 import email.message
      6 import re
      7 import os
      8 import sys
      9 
     10 
     11 class Unparseable(Exception):
     12     pass
     13 
     14 
     15 class ErrorMessage(email.message.Message):
     16     def __init__(self):
     17         email.message.Message.__init__(self)
     18         self.sub = ''
     19 
     20     def is_warning(self):
     21         sub = self.get('Subject')
     22         if not sub:
     23             return 0
     24         sub = sub.lower()
     25         if sub.startswith('waiting mail'):
     26             return 1
     27         if 'warning' in sub:
     28             return 1
     29         self.sub = sub
     30         return 0
     31 
     32     def get_errors(self):
     33         for p in EMPARSERS:
     34             self.rewindbody()
     35             try:
     36                 return p(self.fp, self.sub)
     37             except Unparseable:
     38                 pass
     39         raise Unparseable
     40 
     41 # List of re's or tuples of re's.
     42 # If a re, it should contain at least a group (?P<email>...) which
     43 # should refer to the email address.  The re can also contain a group
     44 # (?P<reason>...) which should refer to the reason (error message).
     45 # If no reason is present, the emparse_list_reason list is used to
     46 # find a reason.
     47 # If a tuple, the tuple should contain 2 re's.  The first re finds a
     48 # location, the second re is repeated one or more times to find
     49 # multiple email addresses.  The second re is matched (not searched)
     50 # where the previous match ended.
     51 # The re's are compiled using the re module.
     52 emparse_list_list = [
     53     'error: (?P<reason>unresolvable): (?P<email>.+)',
     54     ('----- The following addresses had permanent fatal errors -----\n',
     55      '(?P<email>[^ \n].*)\n( .*\n)?'),
     56     'remote execution.*\n.*rmail (?P<email>.+)',
     57     ('The following recipients did not receive your message:\n\n',
     58      ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
     59     '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
     60     '^<(?P<email>.*)>:\n(?P<reason>.*)',
     61     '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
     62     '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
     63     '^Original-Recipient: rfc822;(?P<email>.*)',
     64     '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
     65     '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
     66     '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
     67     '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
     68     '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
     69     ]
     70 # compile the re's in the list and store them in-place.
     71 for i in range(len(emparse_list_list)):
     72     x = emparse_list_list[i]
     73     if type(x) is type(''):
     74         x = re.compile(x, re.MULTILINE)
     75     else:
     76         xl = []
     77         for x in x:
     78             xl.append(re.compile(x, re.MULTILINE))
     79         x = tuple(xl)
     80         del xl
     81     emparse_list_list[i] = x
     82     del x
     83 del i
     84 
     85 # list of re's used to find reasons (error messages).
     86 # if a string, "<>" is replaced by a copy of the email address.
     87 # The expressions are searched for in order.  After the first match,
     88 # no more expressions are searched for.  So, order is important.
     89 emparse_list_reason = [
     90     r'^5\d{2} <>\.\.\. (?P<reason>.*)',
     91     r'<>\.\.\. (?P<reason>.*)',
     92     re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
     93     re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
     94     re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
     95     ]
     96 emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
     97 def emparse_list(fp, sub):
     98     data = fp.read()
     99     res = emparse_list_from.search(data)
    100     if res is None:
    101         from_index = len(data)
    102     else:
    103         from_index = res.start(0)
    104     errors = []
    105     emails = []
    106     reason = None
    107     for regexp in emparse_list_list:
    108         if type(regexp) is type(()):
    109             res = regexp[0].search(data, 0, from_index)
    110             if res is not None:
    111                 try:
    112                     reason = res.group('reason')
    113                 except IndexError:
    114                     pass
    115                 while 1:
    116                     res = regexp[1].match(data, res.end(0), from_index)
    117                     if res is None:
    118                         break
    119                     emails.append(res.group('email'))
    120                 break
    121         else:
    122             res = regexp.search(data, 0, from_index)
    123             if res is not None:
    124                 emails.append(res.group('email'))
    125                 try:
    126                     reason = res.group('reason')
    127                 except IndexError:
    128                     pass
    129                 break
    130     if not emails:
    131         raise Unparseable
    132     if not reason:
    133         reason = sub
    134         if reason[:15] == 'returned mail: ':
    135             reason = reason[15:]
    136         for regexp in emparse_list_reason:
    137             if type(regexp) is type(''):
    138                 for i in range(len(emails)-1,-1,-1):
    139                     email = emails[i]
    140                     exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
    141                     res = exp.search(data)
    142                     if res is not None:
    143                         errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
    144                         del emails[i]
    145                 continue
    146             res = regexp.search(data)
    147             if res is not None:
    148                 reason = res.group('reason')
    149                 break
    150     for email in emails:
    151         errors.append(' '.join((email.strip()+': '+reason).split()))
    152     return errors
    153 
    154 EMPARSERS = [emparse_list]
    155 
    156 def sort_numeric(a, b):
    157     a = int(a)
    158     b = int(b)
    159     if a < b:
    160         return -1
    161     elif a > b:
    162         return 1
    163     else:
    164         return 0
    165 
    166 def parsedir(dir, modify):
    167     os.chdir(dir)
    168     pat = re.compile('^[0-9]*$')
    169     errordict = {}
    170     errorfirst = {}
    171     errorlast = {}
    172     nok = nwarn = nbad = 0
    173 
    174     # find all numeric file names and sort them
    175     files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
    176     files.sort(sort_numeric)
    177 
    178     for fn in files:
    179         # Lets try to parse the file.
    180         fp = open(fn)
    181         m = email.message_from_file(fp, _class=ErrorMessage)
    182         sender = m.getaddr('From')
    183         print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
    184 
    185         if m.is_warning():
    186             fp.close()
    187             print('warning only')
    188             nwarn = nwarn + 1
    189             if modify:
    190                 os.rename(fn, ','+fn)
    191 ##              os.unlink(fn)
    192             continue
    193 
    194         try:
    195             errors = m.get_errors()
    196         except Unparseable:
    197             print('** Not parseable')
    198             nbad = nbad + 1
    199             fp.close()
    200             continue
    201         print(len(errors), 'errors')
    202 
    203         # Remember them
    204         for e in errors:
    205             try:
    206                 mm, dd = m.getdate('date')[1:1+2]
    207                 date = '%s %02d' % (calendar.month_abbr[mm], dd)
    208             except:
    209                 date = '??????'
    210             if e not in errordict:
    211                 errordict[e] = 1
    212                 errorfirst[e] = '%s (%s)' % (fn, date)
    213             else:
    214                 errordict[e] = errordict[e] + 1
    215             errorlast[e] = '%s (%s)' % (fn, date)
    216 
    217         fp.close()
    218         nok = nok + 1
    219         if modify:
    220             os.rename(fn, ','+fn)
    221 ##          os.unlink(fn)
    222 
    223     print('--------------')
    224     print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
    225     print(nbad,'files unparseable')
    226     print('--------------')
    227     list = []
    228     for e in errordict.keys():
    229         list.append((errordict[e], errorfirst[e], errorlast[e], e))
    230     list.sort()
    231     for num, first, last, e in list:
    232         print('%d %s - %s\t%s' % (num, first, last, e))
    233 
    234 def main():
    235     modify = 0
    236     if len(sys.argv) > 1 and sys.argv[1] == '-d':
    237         modify = 1
    238         del sys.argv[1]
    239     if len(sys.argv) > 1:
    240         for folder in sys.argv[1:]:
    241             parsedir(folder, modify)
    242     else:
    243         parsedir('/ufs/jack/Mail/errorsinbox', modify)
    244 
    245 if __name__ == '__main__' or sys.argv[0] == __name__:
    246     main()
    247