Add Nagios checks

This commit is contained in:
Peter J. Holzer 2019-09-29 09:22:01 +02:00
parent d3d79f6d16
commit 6c67de3bc9
2 changed files with 111 additions and 0 deletions

56
check_df_files Executable file
View File

@ -0,0 +1,56 @@
#!/usr/bin/python3
import datetime
import os
import socket
import psycopg2
import psycopg2.extras
hostname = socket.getfqdn()
db = psycopg2.connect('')
csr = db.cursor(cursor_factory=psycopg2.extras.DictCursor)
csr.execute(
"""
with recursive t as (
select '2 minutes'::interval as d
union
select d * 2 from t where d < '1 year'::interval
),
a as (
select distinct hostname, filesystem, min(ts), max(ts)
from df, t
where ts >= now() - t.d
group by hostname, filesystem, t.d
),
forecast as (
select
a.hostname, a.filesystem,
c.ts - b.ts as d,
c.ts + (c.ts - b.ts) as when,
c.f_used + (c.f_used - b.f_used) as used,
(c.f_used + (c.f_used - b.f_used)) / b.f_usable * 100 as percent,
(c.f_used + (c.f_used - b.f_used)) > b.f_usable as capacity_exceeded
from a
join df b on (a.hostname = b.hostname and a.filesystem = b.filesystem and a.min = b.ts)
join df c on (a.hostname = c.hostname and a.filesystem = c.filesystem and a.max = c.ts)
)
select * from forecast
where capacity_exceeded
order by 1, 2, 3
"""
)
status = 0
message = ""
for r in csr:
if r["d"] < datetime.timedelta(hours=24):
status = 2
else:
if status < 1:
status = 1
message += "%s:%s at %s; " % (r["hostname"], r["filesystem"].strftime("%Y-%m-%d %H:%M"),
r["when"])
status_str = ["OK", "WARNING", "CRITICAL"]
print("df_files", status_str[status], "-", message)

55
check_df_space Executable file
View File

@ -0,0 +1,55 @@
#!/usr/bin/python3
import datetime
import os
import socket
import psycopg2
import psycopg2.extras
hostname = socket.getfqdn()
db = psycopg2.connect('')
csr = db.cursor(cursor_factory=psycopg2.extras.DictCursor)
csr.execute(
"""
with recursive t as (
select '2 minutes'::interval as d
union
select d * 2 from t where d < '1 year'::interval
),
a as (
select distinct hostname, filesystem, min(ts), max(ts)
from df, t
where ts >= now() - t.d
group by hostname, filesystem, t.d
),
forecast as (
select
a.hostname, a.filesystem,
c.ts - b.ts as d,
c.ts + (c.ts - b.ts) as when,
c.s_used + (c.s_used - b.s_used) as used,
(c.s_used + (c.s_used - b.s_used)) / b.s_usable * 100 as percent,
(c.s_used + (c.s_used - b.s_used)) > b.s_usable as capacity_exceeded
from a
join df b on (a.hostname = b.hostname and a.filesystem = b.filesystem and a.min = b.ts)
join df c on (a.hostname = c.hostname and a.filesystem = c.filesystem and a.max = c.ts)
)
select * from forecast
where capacity_exceeded
order by 1, 2, 3
"""
)
status = 0
message = ""
for r in csr:
if r["d"] < datetime.timedelta(hours=24):
status = 2
else:
if status < 1:
status = 1
message += "%s:%s at %s; " % (r["hostname"], r["filesystem"], r["when"].strftime("%Y-%m-%d %H:%M"))
status_str = ["OK", "WARNING", "CRITICAL"]
print("df_space", status_str[status], "-", message)