Projet

Général

Profil

Demande #3021 » check_bounces.py

François Poulain, 06/09/2019 15:28

 
1
#!/usr/bin/env python3
2

    
3
#   Copyright (C) 2018  François Poulain <fpoulain@metrodore.fr>
4
#
5
#   This program is free software: you can redistribute it and/or modify
6
#   it under the terms of the GNU General Public License as published by
7
#   the Free Software Foundation, either version 3 of the License, or
8
#   (at your option) any later version.
9
#
10
#   This program is distributed in the hope that it will be useful,
11
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
#   GNU General Public License for more details.
14
#
15
#   You should have received a copy of the GNU General Public License
16
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
17

    
18
"""
19
Parse mail.log and count sending statuses by domains in order to detect
20
delivrability issues and impacted domains.
21
"""
22

    
23
# TODO:
24
# [ ] intégrer comparatif par rapport à la période précédente
25
# [ ] regrouper par relais
26

    
27
import argparse, datetime, re, json
28
from itertools import groupby
29

    
30
logfiles = ['mail.log.1', 'mail.log']
31
cachefile = '/tmp/check_bounces.json'
32

    
33
# ======================================
34
# Argument parsing
35
# ======================================
36

    
37
class ConstraintsAction(argparse.Action):
38
    def __call__(self, parser, namespace, values, option_string=None):
39
        setattr(namespace, self.dest, values)
40
        vals= vars(namespace)
41
        if vals['warning'] and vals['critical'] and vals['warning'] > vals['critical']:
42
            raise argparse.ArgumentError (self,
43
                'critical threshold should be greater than warning threshold')
44

    
45
parser = argparse.ArgumentParser(description="""
46
Parse mail.log and count sending statuses by domains in order to detect
47
delivrability issues and impacted domains.
48
""")
49

    
50
ma= parser.add_argument_group(title='mandatory arguments', description=None)
51
ma.add_argument('-C', '--critical', metavar='THRESH', type=int, required= True,
52
        action=ConstraintsAction, help='Critical threshold')
53
ma.add_argument('-W', '--warning', metavar='THRESH', type=int, required= True,
54
        action=ConstraintsAction, help='Warning threshold')
55

    
56
parser.add_argument('-D', '--domains', metavar='DOMAIN.TLD', type=str, nargs='+',
57
        help='Warn when given domain(s) appears in top bounced domains.')
58
parser.add_argument('-N', '--number', metavar='INT', type=int, default= 3,
59
        help='Number of considered bounced domains domains. Default is 3.')
60
parser.add_argument('-v', '--verbose', default=0, action='count',
61
        help='Verbose output. -vv for very verbose output')
62
parser.add_argument('-V', '--version', action='version', version='%(prog)s 0.1')
63

    
64
args = parser.parse_args()
65

    
66
# ======================================
67
# log parsing and data processing
68
# ======================================
69

    
70

    
71
def parse_log (log, stamp):
72
    return re.findall(
73
            r'^' + stamp + ' (\d\d:\d\d:\d\d) .*([0-9A-F]{9}): to=<([^@]+@[^>]+)>.*relay=([^,]+), .*status=([a-z]+)',
74
            log,
75
            re.MULTILINE,
76
    )
77

    
78

    
79
def name_fields (l):
80
    return [
81
            {
82
                'time': m[0],
83
                'id': m[1],
84
                'domain': m[2],
85
                'relay': m[3],
86
                'status': m[4],
87
            }
88
            for m in l
89
    ]
90

    
91

    
92
def remove_local_relays(l):
93
    return [
94
        s for s in l
95
        if s['relay'] not in ['none', 'local']
96
        and
97
        '127.0.0.1' not in s['relay']
98
        and
99
        '172.16.0.' not in s['relay']
100
    ]
101

    
102

    
103
def drop_retries (l):
104
    return l
105

    
106

    
107
def resolve_domains(l):
108
    def revert_srs(s):
109
        if re.match(r'^srs0=', s):
110
            return re.match(r'^srs\d+=(?P<hash>[^=]+)=(?P<tt>[^=]+)=(?P<domain>[^=]+)=', s).group('domain')
111
        elif re.match(r'^srs\d+=', s):
112
            return re.match(r'^srs\d+=(?P<hash>[^=]+)=(?P<domain>[^=]+)=', s).group('domain')
113
        else:
114
            return re.match(r'^[^@]+@([^>]+)$', s).group(1)
115
    return [{**d, 'domain':revert_srs(d['domain'])} for d in l]
116

    
117

    
118
def regroupby(l, key):
119
    keyfun = lambda x:x[key]
120
    l = [(x, [{k:v for k,v in d.items() if k != key} for d in y]) for x,y in groupby(sorted(l, key=keyfun), key=keyfun)]
121
    return [{key:x, 'count':len(l), 'list':l} for x,l in l]
122

    
123
# ======================================
124
# Main call
125
# ======================================
126

    
127
status_output= ['OK', 'WARN', 'CRIT', 'UNK']
128

    
129

    
130
def get_by_domains_by_status():
131
    yesterday = datetime.date.today() - datetime.timedelta(1)
132
    yesterday_stamp = yesterday.strftime('%b %d').replace(' 0', '  ')
133

    
134
    try:
135
        with open(cachefile) as f:
136
            return json.load(f)[yesterday_stamp]
137
    except:
138
        log = ""
139
        for logfile in logfiles:
140
            try:
141
                with open(logfile) as f:
142
                    log += f.read()
143
            except:
144
                pass
145
        if log == "":
146
            raise ValueError("No logfile found")
147

    
148
        statuses = parse_log(log, yesterday_stamp)
149
        statuses = name_fields(statuses)
150
        statuses = remove_local_relays(statuses)
151
        statuses = drop_retries(statuses)
152
        statuses = resolve_domains(statuses)
153
        
154
        by_domains = regroupby(statuses, 'domain')
155

    
156
        by_domains_by_status = [{**d, 'list':regroupby(d['list'], 'status')} for d in by_domains]
157

    
158
        by_domains_by_status = [
159
                {
160
                    **d, 'list':[{**e, 'list':regroupby(d['list'], 'relay')} for e in regroupby(d['list'], 'status') ],
161
            }
162
            for d in by_domains
163
        ]
164

    
165
        with open(cachefile, 'w') as f:
166
            json.dump({yesterday_stamp:by_domains_by_status}, f, indent=2)
167

    
168
        return by_domains_by_status
169

    
170

    
171
try:
172
    by_domains_by_status = get_by_domains_by_status()
173

    
174
    scores = []
175
    for domain in by_domains_by_status:
176
        sents, bounces, defers = 0, 0, 0
177
        for status in domain['list']:
178
            if status['status'] == 'sent':
179
                sents = status['count']
180
            if status['status'] == 'bounced':
181
                bounces = status['count']
182
            if status['status'] == 'deferred':
183
                defers = status['count']
184

    
185
        scores.append({'domain':domain['domain'], 'sent':sents, 'bounced':bounces, 'deferred':defers})
186

    
187
    scores = sorted(scores, key=lambda x:(x['bounced'], x['deferred'], x['sent']), reverse=True)[:args.number]
188

    
189
    perfsdata= 'bounced={} deferred={} sent={}'.format(
190
            sum([s['bounced'] for s in scores]),
191
            sum([s['deferred'] for s in scores]),
192
            sum([s['sent'] for s in scores]),
193
    )
194

    
195
    bounces = sum([s['bounced'] for s in scores])
196

    
197
    if bounces > args.critical:
198
        status= 2
199
    elif bounces > args.warning:
200
        status= 1
201
    else:
202
        status= 0
203
    for destination in scores:
204
        if args.domains and destination['domain'] in args.domains:
205
            status= 1
206
    print ('check_bounces:', status_output[status], 'Total bounces:', bounces, '|', perfsdata)
207
    if args.verbose > 1:
208
        print(json.dumps([dest for dest in by_domains_by_status if dest['domain'] in [d['domain'] for d in scores]], indent=2))
209
    elif args.verbose > 0:
210
        for domain in scores:
211
            print (domain)
212
except Exception as e:
213
    print ('exception occured: {}'.format(e))
214
    exit (3)
215

    
216
exit (status)
(2-2/2)