1"""mailerdaemon - classes to parse mailer-daemon messages"""
2
3import rfc822
4import calendar
5import re
6import os
7import sys
8
9Unparseable = 'mailerdaemon.Unparseable'
10
11class ErrorMessage(rfc822.Message):
12    def __init__(self, fp):
13        rfc822.Message.__init__(self, fp)
14        self.sub = ''
15
16    def is_warning(self):
17        sub = self.getheader('Subject')
18        if not sub:
19            return 0
20        sub = sub.lower()
21        if sub.startswith('waiting mail'): return 1
22        if 'warning' in sub: return 1
23        self.sub = sub
24        return 0
25
26    def get_errors(self):
27        for p in EMPARSERS:
28            self.rewindbody()
29            try:
30                return p(self.fp, self.sub)
31            except Unparseable:
32                pass
33        raise Unparseable
34
35# List of re's or tuples of re's.
36# If a re, it should contain at least a group (?P<email>...) which
37# should refer to the email address.  The re can also contain a group
38# (?P<reason>...) which should refer to the reason (error message).
39# If no reason is present, the emparse_list_reason list is used to
40# find a reason.
41# If a tuple, the tuple should contain 2 re's.  The first re finds a
42# location, the second re is repeated one or more times to find
43# multiple email addresses.  The second re is matched (not searched)
44# where the previous match ended.
45# The re's are compiled using the re module.
46emparse_list_list = [
47    'error: (?P<reason>unresolvable): (?P<email>.+)',
48    ('----- The following addresses had permanent fatal errors -----\n',
49     '(?P<email>[^ \n].*)\n( .*\n)?'),
50    'remote execution.*\n.*rmail (?P<email>.+)',
51    ('The following recipients did not receive your message:\n\n',
52     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
53    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
54    '^<(?P<email>.*)>:\n(?P<reason>.*)',
55    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
56    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
57    '^Original-Recipient: rfc822;(?P<email>.*)',
58    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
59    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
60    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
61    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
62    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
63    ]
64# compile the re's in the list and store them in-place.
65for i in range(len(emparse_list_list)):
66    x = emparse_list_list[i]
67    if type(x) is type(''):
68        x = re.compile(x, re.MULTILINE)
69    else:
70        xl = []
71        for x in x:
72            xl.append(re.compile(x, re.MULTILINE))
73        x = tuple(xl)
74        del xl
75    emparse_list_list[i] = x
76    del x
77del i
78
79# list of re's used to find reasons (error messages).
80# if a string, "<>" is replaced by a copy of the email address.
81# The expressions are searched for in order.  After the first match,
82# no more expressions are searched for.  So, order is important.
83emparse_list_reason = [
84    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
85    '<>\.\.\. (?P<reason>.*)',
86    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
87    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
88    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
89    ]
90emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
91def emparse_list(fp, sub):
92    data = fp.read()
93    res = emparse_list_from.search(data)
94    if res is None:
95        from_index = len(data)
96    else:
97        from_index = res.start(0)
98    errors = []
99    emails = []
100    reason = None
101    for regexp in emparse_list_list:
102        if type(regexp) is type(()):
103            res = regexp[0].search(data, 0, from_index)
104            if res is not None:
105                try:
106                    reason = res.group('reason')
107                except IndexError:
108                    pass
109                while 1:
110                    res = regexp[1].match(data, res.end(0), from_index)
111                    if res is None:
112                        break
113                    emails.append(res.group('email'))
114                break
115        else:
116            res = regexp.search(data, 0, from_index)
117            if res is not None:
118                emails.append(res.group('email'))
119                try:
120                    reason = res.group('reason')
121                except IndexError:
122                    pass
123                break
124    if not emails:
125        raise Unparseable
126    if not reason:
127        reason = sub
128        if reason[:15] == 'returned mail: ':
129            reason = reason[15:]
130        for regexp in emparse_list_reason:
131            if type(regexp) is type(''):
132                for i in range(len(emails)-1,-1,-1):
133                    email = emails[i]
134                    exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
135                    res = exp.search(data)
136                    if res is not None:
137                        errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
138                        del emails[i]
139                continue
140            res = regexp.search(data)
141            if res is not None:
142                reason = res.group('reason')
143                break
144    for email in emails:
145        errors.append(' '.join((email.strip()+': '+reason).split()))
146    return errors
147
148EMPARSERS = [emparse_list, ]
149
150def sort_numeric(a, b):
151    a = int(a)
152    b = int(b)
153    if a < b: return -1
154    elif a > b: return 1
155    else: return 0
156
157def parsedir(dir, modify):
158    os.chdir(dir)
159    pat = re.compile('^[0-9]*$')
160    errordict = {}
161    errorfirst = {}
162    errorlast = {}
163    nok = nwarn = nbad = 0
164
165    # find all numeric file names and sort them
166    files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
167    files.sort(sort_numeric)
168
169    for fn in files:
170        # Lets try to parse the file.
171        fp = open(fn)
172        m = ErrorMessage(fp)
173        sender = m.getaddr('From')
174        print '%s\t%-40s\t'%(fn, sender[1]),
175
176        if m.is_warning():
177            fp.close()
178            print 'warning only'
179            nwarn = nwarn + 1
180            if modify:
181                os.rename(fn, ','+fn)
182##              os.unlink(fn)
183            continue
184
185        try:
186            errors = m.get_errors()
187        except Unparseable:
188            print '** Not parseable'
189            nbad = nbad + 1
190            fp.close()
191            continue
192        print len(errors), 'errors'
193
194        # Remember them
195        for e in errors:
196            try:
197                mm, dd = m.getdate('date')[1:1+2]
198                date = '%s %02d' % (calendar.month_abbr[mm], dd)
199            except:
200                date = '??????'
201            if not errordict.has_key(e):
202                errordict[e] = 1
203                errorfirst[e] = '%s (%s)' % (fn, date)
204            else:
205                errordict[e] = errordict[e] + 1
206            errorlast[e] = '%s (%s)' % (fn, date)
207
208        fp.close()
209        nok = nok + 1
210        if modify:
211            os.rename(fn, ','+fn)
212##          os.unlink(fn)
213
214    print '--------------'
215    print nok, 'files parsed,',nwarn,'files warning-only,',
216    print nbad,'files unparseable'
217    print '--------------'
218    list = []
219    for e in errordict.keys():
220        list.append((errordict[e], errorfirst[e], errorlast[e], e))
221    list.sort()
222    for num, first, last, e in list:
223        print '%d %s - %s\t%s' % (num, first, last, e)
224
225def main():
226    modify = 0
227    if len(sys.argv) > 1 and sys.argv[1] == '-d':
228        modify = 1
229        del sys.argv[1]
230    if len(sys.argv) > 1:
231        for folder in sys.argv[1:]:
232            parsedir(folder, modify)
233    else:
234        parsedir('/ufs/jack/Mail/errorsinbox', modify)
235
236if __name__ == '__main__' or sys.argv[0] == __name__:
237    main()
238