From 6c67de3bc96de2875ed85cc2b06aa5381905ee64 Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Sun, 29 Sep 2019 09:22:01 +0200 Subject: [PATCH] Add Nagios checks --- check_df_files | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ check_df_space | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100755 check_df_files create mode 100755 check_df_space diff --git a/check_df_files b/check_df_files new file mode 100755 index 0000000..89ee2f7 --- /dev/null +++ b/check_df_files @@ -0,0 +1,56 @@ +#!/usr/bin/python3 + +import datetime +import os +import socket + +import psycopg2 +import psycopg2.extras + +hostname = socket.getfqdn() +db = psycopg2.connect('') +csr = db.cursor(cursor_factory=psycopg2.extras.DictCursor) + +csr.execute( + """ + with recursive t as ( + select '2 minutes'::interval as d + union + select d * 2 from t where d < '1 year'::interval + ), + a as ( + select distinct hostname, filesystem, min(ts), max(ts) + from df, t + where ts >= now() - t.d + group by hostname, filesystem, t.d + ), + forecast as ( + select + a.hostname, a.filesystem, + c.ts - b.ts as d, + c.ts + (c.ts - b.ts) as when, + c.f_used + (c.f_used - b.f_used) as used, + (c.f_used + (c.f_used - b.f_used)) / b.f_usable * 100 as percent, + (c.f_used + (c.f_used - b.f_used)) > b.f_usable as capacity_exceeded + from a + join df b on (a.hostname = b.hostname and a.filesystem = b.filesystem and a.min = b.ts) + join df c on (a.hostname = c.hostname and a.filesystem = c.filesystem and a.max = c.ts) + ) + select * from forecast + where capacity_exceeded + order by 1, 2, 3 + """ +) + +status = 0 +message = "" +for r in csr: + if r["d"] < datetime.timedelta(hours=24): + status = 2 + else: + if status < 1: + status = 1 + message += "%s:%s at %s; " % (r["hostname"], r["filesystem"].strftime("%Y-%m-%d %H:%M"), + r["when"]) +status_str = ["OK", "WARNING", "CRITICAL"] +print("df_files", status_str[status], "-", message) diff --git a/check_df_space b/check_df_space new file mode 100755 index 0000000..a82fb84 --- /dev/null +++ b/check_df_space @@ -0,0 +1,55 @@ +#!/usr/bin/python3 + +import datetime +import os +import socket + +import psycopg2 +import psycopg2.extras + +hostname = socket.getfqdn() +db = psycopg2.connect('') +csr = db.cursor(cursor_factory=psycopg2.extras.DictCursor) + +csr.execute( + """ + with recursive t as ( + select '2 minutes'::interval as d + union + select d * 2 from t where d < '1 year'::interval + ), + a as ( + select distinct hostname, filesystem, min(ts), max(ts) + from df, t + where ts >= now() - t.d + group by hostname, filesystem, t.d + ), + forecast as ( + select + a.hostname, a.filesystem, + c.ts - b.ts as d, + c.ts + (c.ts - b.ts) as when, + c.s_used + (c.s_used - b.s_used) as used, + (c.s_used + (c.s_used - b.s_used)) / b.s_usable * 100 as percent, + (c.s_used + (c.s_used - b.s_used)) > b.s_usable as capacity_exceeded + from a + join df b on (a.hostname = b.hostname and a.filesystem = b.filesystem and a.min = b.ts) + join df c on (a.hostname = c.hostname and a.filesystem = c.filesystem and a.max = c.ts) + ) + select * from forecast + where capacity_exceeded + order by 1, 2, 3 + """ +) + +status = 0 +message = "" +for r in csr: + if r["d"] < datetime.timedelta(hours=24): + status = 2 + else: + if status < 1: + status = 1 + message += "%s:%s at %s; " % (r["hostname"], r["filesystem"], r["when"].strftime("%Y-%m-%d %H:%M")) +status_str = ["OK", "WARNING", "CRITICAL"] +print("df_space", status_str[status], "-", message)