1 """mailerdaemon - classes to parse mailer-daemon messages""" 2 3 import rfc822 4 import calendar 5 import re 6 import os 7 import sys 8 9 Unparseable = 'mailerdaemon.Unparseable' 10 11 class ErrorMessage(rfc822.Message): 12 def __init__(self, fp): 13 rfc822.Message.__init__(self, fp) 14 self.sub = '' 15 16 def is_warning(self): 17 sub = self.getheader('Subject') 18 if not sub: 19 return 0 20 sub = sub.lower() 21 if sub.startswith('waiting mail'): return 1 22 if 'warning' in sub: return 1 23 self.sub = sub 24 return 0 25 26 def get_errors(self): 27 for p in EMPARSERS: 28 self.rewindbody() 29 try: 30 return p(self.fp, self.sub) 31 except Unparseable: 32 pass 33 raise Unparseable 34 35 # List of re's or tuples of re's. 36 # If a re, it should contain at least a group (?P<email>...) which 37 # should refer to the email address. The re can also contain a group 38 # (?P<reason>...) which should refer to the reason (error message). 39 # If no reason is present, the emparse_list_reason list is used to 40 # find a reason. 41 # If a tuple, the tuple should contain 2 re's. The first re finds a 42 # location, the second re is repeated one or more times to find 43 # multiple email addresses. The second re is matched (not searched) 44 # where the previous match ended. 45 # The re's are compiled using the re module. 46 emparse_list_list = [ 47 'error: (?P<reason>unresolvable): (?P<email>.+)', 48 ('----- The following addresses had permanent fatal errors -----\n', 49 '(?P<email>[^ \n].*)\n( .*\n)?'), 50 'remote execution.*\n.*rmail (?P<email>.+)', 51 ('The following recipients did not receive your message:\n\n', 52 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'), 53 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)', 54 '^<(?P<email>.*)>:\n(?P<reason>.*)', 55 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)', 56 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)', 57 '^Original-Recipient: rfc822;(?P<email>.*)', 58 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)', 59 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)', 60 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)', 61 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)', 62 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n', 63 ] 64 # compile the re's in the list and store them in-place. 65 for i in range(len(emparse_list_list)): 66 x = emparse_list_list[i] 67 if type(x) is type(''): 68 x = re.compile(x, re.MULTILINE) 69 else: 70 xl = [] 71 for x in x: 72 xl.append(re.compile(x, re.MULTILINE)) 73 x = tuple(xl) 74 del xl 75 emparse_list_list[i] = x 76 del x 77 del i 78 79 # list of re's used to find reasons (error messages). 80 # if a string, "<>" is replaced by a copy of the email address. 81 # The expressions are searched for in order. After the first match, 82 # no more expressions are searched for. So, order is important. 83 emparse_list_reason = [ 84 r'^5\d{2} <>\.\.\. (?P<reason>.*)', 85 '<>\.\.\. (?P<reason>.*)', 86 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE), 87 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'), 88 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE), 89 ] 90 emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE) 91 def emparse_list(fp, sub): 92 data = fp.read() 93 res = emparse_list_from.search(data) 94 if res is None: 95 from_index = len(data) 96 else: 97 from_index = res.start(0) 98 errors = [] 99 emails = [] 100 reason = None 101 for regexp in emparse_list_list: 102 if type(regexp) is type(()): 103 res = regexp[0].search(data, 0, from_index) 104 if res is not None: 105 try: 106 reason = res.group('reason') 107 except IndexError: 108 pass 109 while 1: 110 res = regexp[1].match(data, res.end(0), from_index) 111 if res is None: 112 break 113 emails.append(res.group('email')) 114 break 115 else: 116 res = regexp.search(data, 0, from_index) 117 if res is not None: 118 emails.append(res.group('email')) 119 try: 120 reason = res.group('reason') 121 except IndexError: 122 pass 123 break 124 if not emails: 125 raise Unparseable 126 if not reason: 127 reason = sub 128 if reason[:15] == 'returned mail: ': 129 reason = reason[15:] 130 for regexp in emparse_list_reason: 131 if type(regexp) is type(''): 132 for i in range(len(emails)-1,-1,-1): 133 email = emails[i] 134 exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE) 135 res = exp.search(data) 136 if res is not None: 137 errors.append(' '.join((email.strip()+': '+res.group('reason')).split())) 138 del emails[i] 139 continue 140 res = regexp.search(data) 141 if res is not None: 142 reason = res.group('reason') 143 break 144 for email in emails: 145 errors.append(' '.join((email.strip()+': '+reason).split())) 146 return errors 147 148 EMPARSERS = [emparse_list, ] 149 150 def sort_numeric(a, b): 151 a = int(a) 152 b = int(b) 153 if a < b: return -1 154 elif a > b: return 1 155 else: return 0 156 157 def parsedir(dir, modify): 158 os.chdir(dir) 159 pat = re.compile('^[0-9]*$') 160 errordict = {} 161 errorfirst = {} 162 errorlast = {} 163 nok = nwarn = nbad = 0 164 165 # find all numeric file names and sort them 166 files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')) 167 files.sort(sort_numeric) 168 169 for fn in files: 170 # Lets try to parse the file. 171 fp = open(fn) 172 m = ErrorMessage(fp) 173 sender = m.getaddr('From') 174 print '%s\t%-40s\t'%(fn, sender[1]), 175 176 if m.is_warning(): 177 fp.close() 178 print 'warning only' 179 nwarn = nwarn + 1 180 if modify: 181 os.rename(fn, ','+fn) 182 ## os.unlink(fn) 183 continue 184 185 try: 186 errors = m.get_errors() 187 except Unparseable: 188 print '** Not parseable' 189 nbad = nbad + 1 190 fp.close() 191 continue 192 print len(errors), 'errors' 193 194 # Remember them 195 for e in errors: 196 try: 197 mm, dd = m.getdate('date')[1:1+2] 198 date = '%s %02d' % (calendar.month_abbr[mm], dd) 199 except: 200 date = '??????' 201 if not errordict.has_key(e): 202 errordict[e] = 1 203 errorfirst[e] = '%s (%s)' % (fn, date) 204 else: 205 errordict[e] = errordict[e] + 1 206 errorlast[e] = '%s (%s)' % (fn, date) 207 208 fp.close() 209 nok = nok + 1 210 if modify: 211 os.rename(fn, ','+fn) 212 ## os.unlink(fn) 213 214 print '--------------' 215 print nok, 'files parsed,',nwarn,'files warning-only,', 216 print nbad,'files unparseable' 217 print '--------------' 218 list = [] 219 for e in errordict.keys(): 220 list.append((errordict[e], errorfirst[e], errorlast[e], e)) 221 list.sort() 222 for num, first, last, e in list: 223 print '%d %s - %s\t%s' % (num, first, last, e) 224 225 def main(): 226 modify = 0 227 if len(sys.argv) > 1 and sys.argv[1] == '-d': 228 modify = 1 229 del sys.argv[1] 230 if len(sys.argv) > 1: 231 for folder in sys.argv[1:]: 232 parsedir(folder, modify) 233 else: 234 parsedir('/ufs/jack/Mail/errorsinbox', modify) 235 236 if __name__ == '__main__' or sys.argv[0] == __name__: 237 main() 238