Compare commits
43 Commits
Author | SHA1 | Date |
---|---|---|
|
357771b345 | |
|
2ca618eedd | |
|
33327258d1 | |
|
001d03790d | |
|
c462ca4d80 | |
|
2016fb4a0e | |
|
1a798be52d | |
|
cd1750b21a | |
|
e921031e64 | |
|
9a84e191be | |
|
d3ac6703c6 | |
|
70b68108c7 | |
|
ad04443928 | |
|
1a0ccb22b7 | |
|
aa66f8d615 | |
|
1a7003fef2 | |
|
3f21122769 | |
|
206be6a8fa | |
|
2e8641ad18 | |
|
a95ba6c51f | |
|
5047a56fe8 | |
|
1485debaae | |
|
74842b64dd | |
|
f9f9bb4025 | |
|
f0e0ea2d73 | |
|
e0a1913a64 | |
|
3d60e648e4 | |
|
c79f7de78e | |
|
ad601984d1 | |
|
d3a439bb49 | |
|
62b22d7516 | |
|
cfc514a3eb | |
|
5dbe0391dd | |
|
75ce29135c | |
|
125dd443c1 | |
|
22c790080a | |
|
7ab45d3e90 | |
|
e57e333942 | |
|
52a5f60389 | |
|
ed3f7872e9 | |
|
fdf782bc2f | |
|
a83ebfcc99 | |
|
ca0ce798da |
94
app.py
94
app.py
|
@ -5,7 +5,9 @@ import logging
|
|||
import logging.config
|
||||
import os
|
||||
|
||||
from flask import (Flask, request, jsonify, abort, render_template)
|
||||
from collections import defaultdict
|
||||
|
||||
from flask import (Flask, request, jsonify, abort, render_template, url_for)
|
||||
|
||||
from ltsdb_json import LTS
|
||||
from dashboard import Dashboard
|
||||
|
@ -25,6 +27,10 @@ def home():
|
|||
|
||||
@app.route("/report", methods=["POST"])
|
||||
def report():
|
||||
return record()
|
||||
|
||||
@app.route("/record", methods=["POST"])
|
||||
def record():
|
||||
data = request.get_json()
|
||||
n_ts = 0
|
||||
n_dp = 0
|
||||
|
@ -93,8 +99,9 @@ def verify_node(d):
|
|||
if "/" in node:
|
||||
raise ValueError("invalid node name %s", node)
|
||||
try:
|
||||
log.info("getting client config from %s", "config/" + node)
|
||||
with open("config/" + node) as fh:
|
||||
fn = "config/" + node
|
||||
log.info("getting client config from %s", fn)
|
||||
with open(fn) as fh:
|
||||
node_conf = json.load(fh)
|
||||
except Exception as e:
|
||||
log.warning("got %s opening %s", e, "config/" + node)
|
||||
|
@ -108,8 +115,19 @@ def verify_node(d):
|
|||
if timestamp > node_conf["last"]:
|
||||
node_conf["last"] = timestamp
|
||||
os.replace("config/" + node, "config/" + node + ".old")
|
||||
with open("config/" + node, "w") as fh:
|
||||
tmpfn = fn + "." + str(os.getpid())
|
||||
oldfn = fn + ".old"
|
||||
with open(tmpfn, "w") as fh:
|
||||
json.dump(node_conf, fh) # XXX
|
||||
try:
|
||||
os.unlink(oldfn)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
try:
|
||||
os.link(fn, oldfn)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
os.rename(tmpfn, fn)
|
||||
return node
|
||||
else:
|
||||
abort(409, "timestamp out of sync")
|
||||
|
@ -136,3 +154,71 @@ def dashboard_index():
|
|||
def dashboard_file(dashboard):
|
||||
d = Dashboard("dashboards/" + dashboard + ".json")
|
||||
return d.as_html()
|
||||
|
||||
@app.get("/nav")
|
||||
def nav():
|
||||
# Start with a list of all dimensions, the number of matching time series
|
||||
# and a truncated list of series.
|
||||
# If a dimension is chosen, display a choice of members
|
||||
# choosing one or more members goes back to the list of
|
||||
# (remaining) dimensions
|
||||
with open("data/.index") as fh:
|
||||
fcntl.flock(fh, fcntl.LOCK_SH)
|
||||
index = json.load(fh)
|
||||
timeseries = None
|
||||
for k, v in request.args.lists():
|
||||
if k[0] == ".":
|
||||
continue
|
||||
log.debug("search: %s -> %s", k, v)
|
||||
if timeseries is None:
|
||||
timeseries = set()
|
||||
log.debug("search: %s: %s", k, index[k])
|
||||
for m in v:
|
||||
timeseries |= set(index[k][m])
|
||||
else:
|
||||
filter = set()
|
||||
for m in v:
|
||||
filter |= set(index[k][m])
|
||||
timeseries &= filter
|
||||
if timeseries is None:
|
||||
timeseries = set()
|
||||
for mc in index.values():
|
||||
for tsl in mc.values():
|
||||
timeseries |= set(tsl)
|
||||
if d := request.args.get(".m"):
|
||||
members = []
|
||||
for m, tsl in index[d].items():
|
||||
if set(tsl) & timeseries:
|
||||
members.append(m)
|
||||
return render_template("nav_member_select.html", dimension=d, members=members)
|
||||
else:
|
||||
params = request.args.to_dict(flat=False)
|
||||
matching_dimensions = defaultdict(int)
|
||||
for d, mc in index.items():
|
||||
if d in params:
|
||||
continue
|
||||
for m, tsl in mc.items():
|
||||
mtsl = set(tsl) & timeseries
|
||||
if mtsl:
|
||||
matching_dimensions[d] += len(mtsl)
|
||||
matching_dimensions_list = []
|
||||
for d in matching_dimensions:
|
||||
params[".m"] = d
|
||||
url = url_for("nav", **params)
|
||||
app.logger.debug(f"{d=} {url=}")
|
||||
matching_dimensions_list.append(
|
||||
{"name": d, "count": matching_dimensions[d], "url": url}
|
||||
)
|
||||
total_timeseries = len(timeseries)
|
||||
timeseries = [LTS(id=ts) for ts in list(timeseries)[:100]]
|
||||
return render_template(
|
||||
"nav_dimension_list.html",
|
||||
matching_dimensions=matching_dimensions_list,
|
||||
timeseries=timeseries, total_timeseries=total_timeseries)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
|
|
|
@ -7,8 +7,9 @@ import time
|
|||
|
||||
import requests
|
||||
|
||||
def send_report(report):
|
||||
node = socket.gethostbyaddr(socket.gethostname())[0]
|
||||
node = socket.gethostbyaddr(socket.gethostname())[0]
|
||||
|
||||
def record_observations(observations):
|
||||
for dir in (".", os.environ["HOME"] + "/.config/ltsdb", "/etc/ltsdb"):
|
||||
try:
|
||||
with open(dir + "/config.json") as fh:
|
||||
|
@ -18,16 +19,16 @@ def send_report(report):
|
|||
except FileNotFoundError:
|
||||
pass
|
||||
while True:
|
||||
for r in report:
|
||||
for obs in observations:
|
||||
timestamp = time.time()
|
||||
msg = (node + " " + str(timestamp)).encode("UTF-8")
|
||||
digest = hmac.new(client_config["key"].encode("UTF-8"), msg, "SHA256").hexdigest()
|
||||
r["auth"] = {
|
||||
obs["auth"] = {
|
||||
"node": node,
|
||||
"timestamp": timestamp,
|
||||
"hmac": digest,
|
||||
}
|
||||
r = requests.post(baseurl + "report", json=report)
|
||||
r = requests.post(baseurl + "record", json=observations)
|
||||
print(r)
|
||||
if r.status_code == 200:
|
||||
return True
|
|
@ -14,7 +14,7 @@ import urllib.parse
|
|||
|
||||
import requests
|
||||
|
||||
import ltsdb_report
|
||||
import ltsdb_record
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("url")
|
||||
|
@ -57,6 +57,6 @@ report = [
|
|||
for r in report0
|
||||
]
|
||||
|
||||
success = ltsdb_report.send_report(report)
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
import re
|
||||
|
||||
import ltsdb_record
|
||||
|
||||
|
||||
now = time.time()
|
||||
report0 = []
|
||||
with open("/proc/meminfo") as mfh:
|
||||
for ln in mfh:
|
||||
m = re.match(r"^(?P<measure>.*?): *(?P<value>[0-9]*)( (?P<unit>.*))?", ln)
|
||||
measure = m.group("measure")
|
||||
value = int(m.group("value"))
|
||||
unit = m.group("unit")
|
||||
if unit is None and "Pages" in measure:
|
||||
unit = "pages"
|
||||
if unit == "kB":
|
||||
value *= 1024
|
||||
unit = "bytes"
|
||||
report0.append({ "measure": "meminfo." + measure, "unit": unit, "value": value })
|
||||
|
||||
report = [
|
||||
{
|
||||
"description": {
|
||||
"hostname": ltsdb_record.node,
|
||||
"measure": r["measure"],
|
||||
"unit": r["unit"]
|
||||
},
|
||||
"data": [
|
||||
[now, r["value"]]
|
||||
]
|
||||
}
|
||||
for r in report0
|
||||
]
|
||||
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
#!/usr/bin/python3
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import ltsdb_record
|
||||
|
||||
p = subprocess.run(["/usr/bin/lsb_release", "-ir"],
|
||||
stdout=subprocess.PIPE, universal_newlines=True)
|
||||
for ln in p.stdout.split("\n")[:-1]:
|
||||
m = re.match(r"(.*?)\s*:\s+(.*)", ln)
|
||||
if m:
|
||||
if m.group(1) == "Distributor ID":
|
||||
distributor = m.group(2).lower()
|
||||
elif m.group(1) == "Release":
|
||||
release = m.group(2)
|
||||
if distributor == "ubuntu":
|
||||
# special rule for ubuntu. The format is year.month, so we convert the
|
||||
# months into fractional years
|
||||
m = re.match(r"(\d+)\.(\d+)", release)
|
||||
release = int(m.group(1)) + (int(m.group(2)) - 1) / 12
|
||||
else:
|
||||
# for everybody else we assume its a fp number
|
||||
release = float(release)
|
||||
|
||||
report0 = []
|
||||
report0.append({ "measure": "os_version_" + distributor, "unit": "version", "value": release})
|
||||
now = time.time()
|
||||
report = [
|
||||
{
|
||||
"description": {
|
||||
"hostname": ltsdb_record.node,
|
||||
"measure": r["measure"],
|
||||
"unit": r["unit"]
|
||||
},
|
||||
"data": [
|
||||
[now, r["value"]]
|
||||
]
|
||||
}
|
||||
for r in report0
|
||||
]
|
||||
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
import re
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
import ltsdb_record
|
||||
|
||||
db = psycopg2.connect() # We only get useful results if we are postgres, but for testing we can be any user
|
||||
|
||||
csr = db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor)
|
||||
csr.execute(
|
||||
"""
|
||||
select datname, count(*)
|
||||
from pg_stat_activity
|
||||
where backend_type = 'client backend' group by datname order by datname
|
||||
""")
|
||||
|
||||
total = 0
|
||||
|
||||
now = time.time()
|
||||
report0 = []
|
||||
for r in csr:
|
||||
report0.append(
|
||||
{
|
||||
"measure": "connections",
|
||||
"database": r.datname,
|
||||
"unit": "connections",
|
||||
"value": r.count
|
||||
}
|
||||
)
|
||||
total += r.count
|
||||
report0.append(
|
||||
{
|
||||
"measure": "connections",
|
||||
"database": "ALL",
|
||||
"unit": "connections",
|
||||
"value": total
|
||||
}
|
||||
)
|
||||
|
||||
report = [
|
||||
{
|
||||
"description": {
|
||||
"hostname": ltsdb_record.node,
|
||||
"measure": r["measure"],
|
||||
"database": r["database"],
|
||||
"unit": r["unit"]
|
||||
},
|
||||
"data": [
|
||||
[now, r["value"]]
|
||||
]
|
||||
}
|
||||
for r in report0
|
||||
]
|
||||
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/python3
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import ltsdb_record
|
||||
|
||||
p = subprocess.run(["psql", "-c", "select version()", "-A", "-t", "-X", "-q",],
|
||||
stdout=subprocess.PIPE, universal_newlines=True)
|
||||
|
||||
# This works only for PostgreSQL 10.x and above. I don't expect to encounter
|
||||
# older versions any more.
|
||||
m = re.match(r"^PostgreSQL (\d+).(\d+) ", p.stdout)
|
||||
if m:
|
||||
version = int(m.group(1)) + int(m.group(2)) / 100
|
||||
|
||||
report0 = []
|
||||
report0.append({ "measure": "postgresql_version", "unit": "version", "value":
|
||||
version})
|
||||
now = time.time()
|
||||
report = [
|
||||
{
|
||||
"description": {
|
||||
"hostname": ltsdb_record.node,
|
||||
"measure": r["measure"],
|
||||
"unit": r["unit"]
|
||||
},
|
||||
"data": [
|
||||
[now, r["value"]]
|
||||
]
|
||||
}
|
||||
for r in report0
|
||||
]
|
||||
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
import glob
|
||||
import os.path
|
||||
|
||||
import ltsdb_record
|
||||
|
||||
now = time.time()
|
||||
with open("/proc/uptime") as fh:
|
||||
ln = fh.readline()
|
||||
uptime = float(ln.split()[0])
|
||||
last_reboot = now - uptime
|
||||
|
||||
last_mtime = 0
|
||||
for p in glob.glob("/boot/**/*", recursive=True):
|
||||
mtime = os.path.getmtime(p)
|
||||
if mtime > last_mtime:
|
||||
last_mtime = mtime
|
||||
|
||||
if last_mtime > last_reboot:
|
||||
overdue = now - last_mtime
|
||||
else:
|
||||
overdue = 0
|
||||
|
||||
report0 = []
|
||||
report0.append({ "measure": "reboot_overdue", "unit": "s", "value": overdue})
|
||||
report = [
|
||||
{
|
||||
"description": {
|
||||
"hostname": ltsdb_record.node,
|
||||
"measure": r["measure"],
|
||||
"unit": r["unit"]
|
||||
},
|
||||
"data": [
|
||||
[now, r["value"]]
|
||||
]
|
||||
}
|
||||
for r in report0
|
||||
]
|
||||
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/python3
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import ltsdb_record
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("device")
|
||||
args = ap.parse_args()
|
||||
|
||||
p = subprocess.run(["/usr/sbin/smartctl", "-A", args.device],
|
||||
stdout=subprocess.PIPE, universal_newlines=True)
|
||||
report0 = []
|
||||
state = 0
|
||||
for ln in p.stdout.splitlines():
|
||||
if state == 0 and ln.startswith("ID# ATTRIBUTE_NAME"):
|
||||
state = 1
|
||||
elif state == 1 and ln == "":
|
||||
state = 2
|
||||
elif state == 1:
|
||||
(id, attribute_name, flag, value, worst, thresh, type, updated, when_failed, raw_value) = ln.split(None, 9)
|
||||
|
||||
if attribute_name == "Command_Timeout":
|
||||
# This is a tuple of three values and I don't know what they mean
|
||||
# so I just skip them.
|
||||
# I guess I could just record them as smart_command_timeout_1,
|
||||
# smart_command_timeout_2 and smart_command_timeout_3 ...
|
||||
continue
|
||||
if "_Ct" in attribute_name or "_Count" in attribute_name or "_Cnt" in attribute_name:
|
||||
unit = "count"
|
||||
elif "_Hours" in attribute_name:
|
||||
unit = "hours"
|
||||
elif "Total_LBAs_Written" in attribute_name:
|
||||
unit = "blocks"
|
||||
elif "Temperature_Cel" in attribute_name:
|
||||
unit = "°C"
|
||||
else:
|
||||
unit = "unknown"
|
||||
|
||||
if unit == "°C":
|
||||
# Sometimes there is extra information included - just ignore that.
|
||||
value = int(raw_value.split()[0])
|
||||
elif unit == "hours":
|
||||
if m := re.match(r"([0-9]+)h\+([0-9]+)m\+([0-9.]+)s", raw_value):
|
||||
# e.g. 60633h+54m+11.557s
|
||||
value = (int(m.group(1)) * 3600 + int(m.group(2)) * 60 + float(m.group(2))) / 3600
|
||||
else:
|
||||
value = int(raw_value)
|
||||
else:
|
||||
value = int(raw_value)
|
||||
|
||||
report0.append(
|
||||
{
|
||||
"measure": "smart_" + attribute_name.lower(),
|
||||
"unit": unit,
|
||||
"value": value,
|
||||
})
|
||||
now = time.time()
|
||||
report = [
|
||||
{
|
||||
"description": {
|
||||
"hostname": ltsdb_record.node,
|
||||
"device": args.device,
|
||||
"measure": r["measure"],
|
||||
"unit": r["unit"],
|
||||
},
|
||||
"data": [
|
||||
[now, r["value"]]
|
||||
]
|
||||
}
|
||||
for r in report0
|
||||
]
|
||||
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
||||
|
|
@ -1,14 +1,11 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import argparse
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
|
||||
import requests
|
||||
import ltsdb_record
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--verbose", action="store_true")
|
||||
|
@ -16,23 +13,20 @@ ap.add_argument("hostname")
|
|||
ap.add_argument("port", type=int, default=443, nargs="?")
|
||||
args = ap.parse_args()
|
||||
|
||||
# It's a bit weird that this works.
|
||||
myhostname = socket.gethostbyaddr(socket.gethostname())[0]
|
||||
|
||||
now = time.time()
|
||||
report0 = []
|
||||
|
||||
with socket.create_connection((args.hostname, args.port)) as sock:
|
||||
context = ssl.create_default_context()
|
||||
try:
|
||||
try:
|
||||
with socket.create_connection((args.hostname, args.port)) as sock:
|
||||
context = ssl.create_default_context()
|
||||
with context.wrap_socket(sock, server_hostname=args.hostname) as ssock:
|
||||
cert = ssock.getpeercert()
|
||||
not_after = ssl.cert_time_to_seconds(cert["notAfter"])
|
||||
delta = not_after - now
|
||||
except ssl.SSLCertVerificationError as e:
|
||||
print("got error %s; setting delta to 0", e)
|
||||
delta = 0
|
||||
report0.append({ "measure": "tls_cert_ttl", "unit": "s", "value": delta })
|
||||
except (ssl.SSLCertVerificationError, ConnectionRefusedError) as e:
|
||||
print("got error %s; setting delta to 0" % e)
|
||||
delta = 0
|
||||
report0.append({ "measure": "tls_cert_ttl", "unit": "s", "value": delta })
|
||||
|
||||
report = [
|
||||
{
|
||||
|
@ -49,32 +43,5 @@ report = [
|
|||
for r in report0
|
||||
]
|
||||
|
||||
for dir in (".", os.environ["HOME"] + "/.config/ltsdb", "/etc/ltsdb"):
|
||||
try:
|
||||
with open(dir + "/config.json") as fh:
|
||||
client_config = json.load(fh)
|
||||
baseurl = client_config["server"]
|
||||
break
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
while True:
|
||||
for r in report:
|
||||
node = myhostname
|
||||
timestamp = time.time()
|
||||
msg = (node + " " + str(timestamp)).encode("UTF-8")
|
||||
digest = hmac.new(client_config["key"].encode("UTF-8"), msg, "SHA256").hexdigest()
|
||||
r["auth"] = {
|
||||
"node": node,
|
||||
"timestamp": timestamp,
|
||||
"hmac": digest,
|
||||
}
|
||||
#pprint.pp(report)
|
||||
r = requests.post(baseurl + "report", json=report)
|
||||
print(r)
|
||||
if r.status_code == 200:
|
||||
exit(0)
|
||||
elif r.status_code == 409:
|
||||
time.sleep(0.5 + random.random())
|
||||
continue
|
||||
else:
|
||||
exit(1)
|
||||
success = ltsdb_record.record_observations(report)
|
||||
exit(1 - success)
|
||||
|
|
163
dashboard.py
163
dashboard.py
|
@ -27,11 +27,15 @@ class Dashboard:
|
|||
if w.get("multi"):
|
||||
ts_list = LTS.find(w["data"][0])
|
||||
for ts in ts_list:
|
||||
tso = LTS(id=ts)
|
||||
try:
|
||||
tso = LTS(id=ts)
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
log.error("%s contains bad data: %s: Skipping", ts, e)
|
||||
continue
|
||||
if not tso.data:
|
||||
log.warning("%s has no data: Skipping", tso.id)
|
||||
continue
|
||||
if tso.data[-1][0] < time.time() - 86400:
|
||||
if tso.data[-1][0] < time.time() - 7 * 86400:
|
||||
log.info("%s too old; Skipping", tso.id)
|
||||
continue
|
||||
w1 = {**w, "data": [ts]}
|
||||
|
@ -56,6 +60,10 @@ class Dashboard:
|
|||
else:
|
||||
self.widgets.append(Widget(w))
|
||||
|
||||
# Sort widgets by ascending healthscore to get the most critical at the
|
||||
# top.
|
||||
self.widgets.sort(key=lambda w: w.healthscore())
|
||||
|
||||
def as_html(self):
|
||||
return render_template("dashboard.html", dashboard=self)
|
||||
|
||||
|
@ -70,65 +78,40 @@ class Widget:
|
|||
self.extra = {}
|
||||
log.debug("data = %s", d["data"])
|
||||
self.lts = LTS(id=d["data"][0]) # by default we handle only one data source
|
||||
pass
|
||||
self.lastvalue = self.lts.data[-1][1]
|
||||
|
||||
def as_html(self):
|
||||
log.debug("")
|
||||
self.lastvalue = self.lts.data[-1][1]
|
||||
return Markup(render_template("widget.html", widget=self))
|
||||
|
||||
def criticalcolor(self, value=None):
|
||||
def healthscore(self, value=None):
|
||||
"""
|
||||
Return a score between 0 (unhealthy) and 100 (healthy)
|
||||
"""
|
||||
if value == None:
|
||||
value = self.lastvalue
|
||||
log.debug("stops = %s", self.stops)
|
||||
stops = self.stops
|
||||
if stops[-1] < stops[0]:
|
||||
value = -value
|
||||
stops = [-v for v in stops]
|
||||
|
||||
if value <= stops[0]:
|
||||
log.debug("ok")
|
||||
return 100
|
||||
if value >= stops[-1]:
|
||||
log.debug("fail")
|
||||
return 0
|
||||
for i in range(0, len(stops) - 1):
|
||||
if stops[i] <= value < stops[i+1]:
|
||||
log.debug("at stop %d", i)
|
||||
return 100 - ((value - stops[i]) / (stops[i+1] - stops[i]) + i) * 100 / (len(stops) - 1)
|
||||
|
||||
def criticalcolor(self, value=None):
|
||||
healthscore = self.healthscore(value)
|
||||
hue = round(healthscore * 120 / 100)
|
||||
brightness = 30
|
||||
if self.stops[0] < self.stops[2]:
|
||||
if value < self.stops[0]:
|
||||
log.debug("definitely ok")
|
||||
return f"hsl(120, 100%, {brightness}%)"
|
||||
elif value < self.stops[1]:
|
||||
log.debug("mostly ok")
|
||||
hue = 120 - round(
|
||||
(value - self.stops[0])
|
||||
/ (self.stops[1] - self.stops[0])
|
||||
* 60
|
||||
)
|
||||
return f"hsl({hue}, 100%, {brightness}%)"
|
||||
elif value < self.stops[2]:
|
||||
log.debug("maybe fail")
|
||||
hue = 60 - round(
|
||||
(value - self.stops[1])
|
||||
/ (self.stops[2] - self.stops[1])
|
||||
* 60
|
||||
)
|
||||
return f"hsl({hue}, 100%, {brightness}%)"
|
||||
else:
|
||||
log.debug("definitely fail")
|
||||
return f"hsl(0, 100%, {brightness}%)"
|
||||
else:
|
||||
log.debug("the other side")
|
||||
if value > self.stops[0]:
|
||||
log.debug("definitely ok")
|
||||
return f"hsl(120, 100%, {brightness}%)"
|
||||
elif value > self.stops[1]:
|
||||
log.debug("mostly ok")
|
||||
hue = 120 - round(
|
||||
(value - self.stops[0])
|
||||
/ (self.stops[1] - self.stops[0])
|
||||
* 60
|
||||
)
|
||||
return f"hsl({hue}, 100%, {brightness}%)"
|
||||
elif value > self.stops[2]:
|
||||
log.debug("maybe fail")
|
||||
hue = 60 - round(
|
||||
(value - self.stops[1])
|
||||
/ (self.stops[2] - self.stops[1])
|
||||
* 60
|
||||
)
|
||||
return f"hsl({hue}, 100%, {brightness}%)"
|
||||
else:
|
||||
log.debug("definitely fail")
|
||||
return f"hsl(0, 100%, {brightness}%)"
|
||||
return f"hsl({hue}, 100%, {brightness}%)"
|
||||
|
||||
@property
|
||||
def description_formatted(self):
|
||||
|
@ -160,9 +143,13 @@ class TimeSeries(Widget):
|
|||
|
||||
def v2y(v):
|
||||
if self.yscale == "log":
|
||||
return (1 - math.log(v / min_value)
|
||||
/ math.log(max_value / min_value)
|
||||
) * 200
|
||||
try:
|
||||
return (1 - math.log(max(v, min_value) / min_value)
|
||||
/ math.log(max_value / min_value)
|
||||
) * 200
|
||||
except ValueError:
|
||||
log.error(f"ValueError: v = {v}, min_value = {min_value}, max_value = {max_value}")
|
||||
return 0
|
||||
elif self.yscale == "linear":
|
||||
return (1 - v/max_value) * 200
|
||||
else:
|
||||
|
@ -179,11 +166,12 @@ class TimeSeries(Widget):
|
|||
min_step = 25
|
||||
steps = ("s", "m", "h", "D", "10D", "M", "Y")
|
||||
step_i = 0
|
||||
while True:
|
||||
while step_i < len(steps):
|
||||
t0 = tickmarks[-1]["t"]
|
||||
x0 = tickmarks[-1]["x"]
|
||||
d0 = datetime.datetime.fromtimestamp(t0)
|
||||
|
||||
log.debug("step_i = %s", step_i)
|
||||
if steps[step_i] == "s":
|
||||
d1 = datetime.datetime(d0.year, d0.month, d0.day, d0.hour, d0.minute, d0.second)
|
||||
t1 = d1.timestamp()
|
||||
|
@ -295,6 +283,7 @@ class TimeSeries(Widget):
|
|||
t1 = d1.timestamp()
|
||||
x1 = t2x(t1)
|
||||
if x0 - x1 < min_step:
|
||||
log.debug("t0 = %s, x0 = %s, t1 = %s, x1 = %s", t0, x0, t1, x1)
|
||||
step_i += 1
|
||||
continue
|
||||
if x1 < 0:
|
||||
|
@ -315,7 +304,11 @@ class TimeSeries(Widget):
|
|||
if self.yscale == "linear":
|
||||
log.debug("")
|
||||
if unit == "s" and max_value > 3600:
|
||||
if max_value >= 4 * 7 * 86400:
|
||||
if max_value >= 16 * 7 * 86400:
|
||||
step = 4 * 7 * 86400
|
||||
step_d = 4
|
||||
unit = "w"
|
||||
elif max_value >= 4 * 7 * 86400:
|
||||
step = 7 * 86400
|
||||
step_d = 1
|
||||
unit = "w"
|
||||
|
@ -387,7 +380,7 @@ class TimeSeries(Widget):
|
|||
log.debug("")
|
||||
|
||||
|
||||
log.debug("in graph")
|
||||
log.debug("in graph for %s", self.lts.id)
|
||||
data = self.lts.data
|
||||
n = len(data)
|
||||
t_last = data[-1][0]
|
||||
|
@ -395,13 +388,18 @@ class TimeSeries(Widget):
|
|||
return "(not enough data)"
|
||||
dt = (t_last - data[-5][0]) / 4
|
||||
k = math.log((t_last - data[0][0]) / dt / n + 1)
|
||||
log.debug("times = [%s ... %s ... %s]", data[0][0], data[-5][0], data[-1][0])
|
||||
|
||||
max_value = max([d[1] for d in self.lts.data])
|
||||
max_value = max([d[3] if len(d) >= 4 else d[1] for d in self.lts.data])
|
||||
max_value = max(max_value, 0.001) # ensure positive
|
||||
unit = self.lts.description["unit"]
|
||||
if self.yscale == "log":
|
||||
try:
|
||||
min_value = min(d[1] for d in self.lts.data if d[1] > 0)
|
||||
self.extra["min"] = "%g" % min_value
|
||||
if unit == "s":
|
||||
self.extra["min"] = "%g" % min_value + " (" + self.format_time(min_value) + ")"
|
||||
else:
|
||||
self.extra["min"] = "%g" % min_value
|
||||
except ValueError:
|
||||
# no non-negative values
|
||||
min_value = max_value / 2
|
||||
|
@ -415,16 +413,28 @@ class TimeSeries(Widget):
|
|||
# Make sure min_value is less than max_value
|
||||
min_value /= 2
|
||||
log.debug("min_value = %s, max_value = %s", min_value, max_value)
|
||||
self.extra["max"] = "%g" % max_value
|
||||
self.extra["last"] = "%g" % data[-1][1]
|
||||
if unit == "s":
|
||||
self.extra["max"] = "%g" % max_value + " (" + self.format_time(max_value) + ")"
|
||||
self.extra["last"] = "%g" % data[-1][1] + " (" + self.format_time(data[-1][1]) + ")"
|
||||
else:
|
||||
self.extra["max"] = "%g" % max_value
|
||||
self.extra["last"] = "%g" % data[-1][1]
|
||||
log.debug("collecting data")
|
||||
v_data = []
|
||||
for i in range(n):
|
||||
t = data[i][0]
|
||||
v = data[i][1]
|
||||
if len(data[i]) >= 4:
|
||||
v_min = data[i][2]
|
||||
v_max = data[i][3]
|
||||
else:
|
||||
v_min = data[i][1]
|
||||
v_max = data[i][1]
|
||||
x = t2x(t)
|
||||
t_h = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t))
|
||||
y = v2y(v)
|
||||
y_min = v2y(v_min)
|
||||
y_max = v2y(v_max)
|
||||
#print(t, t_h, x)
|
||||
v_data.append(
|
||||
{
|
||||
|
@ -432,6 +442,8 @@ class TimeSeries(Widget):
|
|||
"v": v,
|
||||
"x": x,
|
||||
"y": y,
|
||||
"y_min": y_min,
|
||||
"y_max": y_max,
|
||||
"color": self.criticalcolor(v),
|
||||
})
|
||||
|
||||
|
@ -450,7 +462,8 @@ class TimeSeries(Widget):
|
|||
html += f"<line x1=0 y1={tm['y']} x2=1000 y2={tm['y']} stroke='#CCC' />"
|
||||
html += f"<text x=1005 y={tm['y']} fill='#888'>{tm['v_h']}</text>"
|
||||
for v in v_data:
|
||||
html += f"<circle cx={v['x']} cy={v['y']} r=3 fill='{v['color']}' />"
|
||||
html += f"<line x1={v['x']-3} x2={v['x']+3} y1={v['y']} y2={v['y']} stroke='{v['color']}' />"
|
||||
html += f"<line x1={v['x']} x2={v['x']} y1={v['y_min']} y2={v['y_max']} stroke='{v['color']}' />"
|
||||
html += "</svg>"
|
||||
log.debug("len(html) = %s", len(html))
|
||||
return Markup(html)
|
||||
|
@ -459,6 +472,29 @@ class TimeSeries(Widget):
|
|||
log.debug("in as_html")
|
||||
return Markup(render_template("timeseries.html", widget=self))
|
||||
|
||||
def format_time(self, seconds):
|
||||
value = seconds
|
||||
unit = "s"
|
||||
if value >= 365.25 * 86400:
|
||||
value /= 365.25 * 86400
|
||||
unit = "years"
|
||||
elif value >= 86400:
|
||||
value /= 86400
|
||||
unit = "days"
|
||||
elif value >= 3600:
|
||||
value /= 3600
|
||||
unit = "h"
|
||||
elif value >= 60:
|
||||
value /= 60
|
||||
unit = "m"
|
||||
elif value >= 1:
|
||||
pass
|
||||
elif value >= 0.001:
|
||||
value *= 1000
|
||||
unit = "ms"
|
||||
return f"{value:.2f} {unit}"
|
||||
|
||||
|
||||
class Gauge(Widget):
|
||||
def __init__(self, d):
|
||||
super().__init__(d)
|
||||
|
@ -500,3 +536,4 @@ class Gauge(Widget):
|
|||
self.lastvalue_formatted = Markup(f"<span class='value'>{value:.2f}</span><span class='unit'>{unit}</unit>")
|
||||
return Markup(render_template("gauge.html", widget=self))
|
||||
|
||||
# vim: sw=4
|
||||
|
|
|
@ -41,7 +41,36 @@ class LTS:
|
|||
with open(self.filename, "x+") as fh:
|
||||
fcntl.flock(fh, fcntl.LOCK_EX)
|
||||
json.dump({"description": self.description, "data": self.data}, fh)
|
||||
log.info(f"Created {self.filename}")
|
||||
self.rebuild_index()
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
log.exception(f"Cannot decode JSON in {self.filename}: {e}")
|
||||
raise
|
||||
|
||||
def pop(self, i):
|
||||
# Pop the element at index i and adjust the min/max values of the
|
||||
# neighbours.
|
||||
# We might also want to adjust the value of the neighbours to some
|
||||
# (weighted) average, but I'm not sure if this is actually a good idea.
|
||||
data = self.data
|
||||
old = data.pop(i) # after that the neighbours are at i-1, i
|
||||
min_v = old[2] if len(old) >= 4 else old[1]
|
||||
max_v = old[3] if len(old) >= 4 else old[1]
|
||||
if i > 0:
|
||||
if len(data[i-1]) == 2:
|
||||
data[i-1] = [data[i-1][0], data[i-1][1], data[i-1][1], data[i-1][1]]
|
||||
if min_v < data[i-1][2]:
|
||||
data[i-1][2] = min_v
|
||||
if max_v > data[i-1][3]:
|
||||
data[i-1][3] = max_v
|
||||
if i < len(data):
|
||||
if len(data[i]) == 2:
|
||||
data[i] = [data[i][0], data[i][1], data[i][1], data[i][1]]
|
||||
if min_v < data[i][2]:
|
||||
data[i][2] = min_v
|
||||
if max_v > data[i][3]:
|
||||
data[i][3] = max_v
|
||||
return old
|
||||
|
||||
def shrink(self):
|
||||
# Remove one element in such a way that the distributions gets closer
|
||||
|
@ -61,7 +90,7 @@ class LTS:
|
|||
t_ideal = (math.exp(k * (n - i)/n) - 1) * (n * dt)
|
||||
if t_last - data[i][0] > t_ideal:
|
||||
log.debug("%s - %s > %s -> popping element %s", t_last, data[i][0], t_ideal, i)
|
||||
data.pop(i)
|
||||
self.pop(i)
|
||||
break
|
||||
else:
|
||||
# Well, it works mostly. Sometimes all the real points are below
|
||||
|
@ -71,7 +100,7 @@ class LTS:
|
|||
# narrow range just before that.
|
||||
i = random.randrange(int(n*0.98), int(n*0.99))
|
||||
log.debug("no match -> popping element %s", i)
|
||||
data.pop(i)
|
||||
self.pop(i)
|
||||
|
||||
def add(self, ts, value):
|
||||
while len(self.data) >= self.limit:
|
||||
|
@ -102,7 +131,11 @@ class LTS:
|
|||
(_, _, hash) = fn.rpartition("/")
|
||||
with open(fn, "r") as fh:
|
||||
fcntl.flock(fh, fcntl.LOCK_SH)
|
||||
d = json.load(fh)
|
||||
try:
|
||||
d = json.load(fh)
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
log.exception(f"Cannot decode JSON in {fn}: {e}")
|
||||
raise
|
||||
for k, v in d["description"].items():
|
||||
d1 = index.setdefault(k, {})
|
||||
d2 = d1.setdefault(v, [])
|
||||
|
|
10
ltsdb_test
10
ltsdb_test
|
@ -1,20 +1,22 @@
|
|||
#!/usr/bin/python3
|
||||
import time
|
||||
import random
|
||||
|
||||
from ltsdb_json import LTS
|
||||
|
||||
ts1 = LTS({"hostname": "rorschach.hjp.at", "measure": "uptime"})
|
||||
ts1.add(1661026122, 4)
|
||||
ts1.add(time.time(), 4 + random.random())
|
||||
ts1.save()
|
||||
|
||||
ts1 = LTS({"hostname": "rorschach.hjp.at", "website": "i12e.hjp.at", "measure": "rtt"})
|
||||
ts1.add(1661026122, 0.06)
|
||||
ts1.add(time.time(), 0.06)
|
||||
ts1.save()
|
||||
|
||||
ts1 = LTS({"hostname": "rorschach.hjp.at", "measure": "uptime"})
|
||||
ts1.add(1661026361, 5)
|
||||
ts1.add(time.time() + 240, 5 + random.random())
|
||||
ts1.save()
|
||||
|
||||
ts1 = LTS({"hostname": "charly.wsr.ac.at", "website": "www.wifo.ac.at", "measure": "rtt"})
|
||||
ts1.add(1661026122, 0.347)
|
||||
ts1.add(time.time(), 0.347)
|
||||
ts1.save()
|
||||
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
|
||||
import logging
|
||||
import logging.config
|
||||
import math
|
||||
import os
|
||||
import socket
|
||||
import statistics
|
||||
import time
|
||||
|
||||
from ltsdb_json import LTS
|
||||
|
@ -44,13 +46,37 @@ class DiskFullPredictor:
|
|||
current_used_bytes = lts.data[-1][1]
|
||||
current_usable_bytes = usable_lts.data[-1][1]
|
||||
tuf = 1E9
|
||||
for d in reversed(lts.data):
|
||||
if d[1] < current_usable_bytes * 0.1:
|
||||
for i in reversed(range(len(lts.data))):
|
||||
m = statistics.mean(x[1] for x in lts.data[max(0, i - 2) : min(len(lts.data), i + 3)])
|
||||
if m < current_usable_bytes * 0.1:
|
||||
continue # for sanity
|
||||
if current_used_bytes ** 2 / d[1] > current_usable_bytes:
|
||||
log.info("d = %s, current_used_bytes = %s, current_usable_bytes = %s", d, current_used_bytes, current_usable_bytes)
|
||||
tuf = now - d[0]
|
||||
if current_used_bytes ** 2 / m > current_usable_bytes:
|
||||
log.info("d = %s, current_used_bytes = %s, current_usable_bytes = %s", m, current_used_bytes, current_usable_bytes)
|
||||
tuf = now - lts.data[i][0]
|
||||
break
|
||||
else:
|
||||
# Try always use the minimum of a range.
|
||||
# We prefer the first datapoint
|
||||
first_used_bytes = lts.data[0][2] if len(lts.data[0]) >= 4 else lts.data[0][1]
|
||||
# But if that's not useable we search the whole timeseries for the
|
||||
# minimum
|
||||
if first_used_bytes >= current_used_bytes:
|
||||
first_used_bytes = current_used_bytes
|
||||
first_i = None
|
||||
for i in range(len(lts.data)):
|
||||
used_bytes = lts.data[i][2] if len(lts.data[i]) >= 4 else lts.data[i][1]
|
||||
if used_bytes < first_used_bytes:
|
||||
first_used_bytes = used_bytes
|
||||
first_i = i
|
||||
else:
|
||||
first_i = 0
|
||||
|
||||
if first_i is not None:
|
||||
historic_growth = current_used_bytes / first_used_bytes
|
||||
future_growth = current_usable_bytes / current_used_bytes
|
||||
tuf = math.log(future_growth) / math.log(historic_growth) * (now - lts.data[first_i][0])
|
||||
tuf = max(tuf, now - lts.data[first_i][0])
|
||||
tuf = min(tuf, 1E9)
|
||||
desc = {**lts.description,
|
||||
"measure": "time_until_disk_full",
|
||||
"node": node,
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
[tool.pytest.ini_options]
|
||||
pythonpath = "."
|
||||
|
|
@ -8,8 +8,9 @@
|
|||
body {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
th {
|
||||
th, td {
|
||||
text-align: left;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
main {
|
||||
display: flex;
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width; initial-scale=1">
|
||||
<meta charset="utf-8">
|
||||
|
||||
<style>
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<ul>
|
||||
{% for d in matching_dimensions %}
|
||||
<li><a href="{{d['url']}}">{{d.name}}</a> ({{d.count}})</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{{timeseries|length}}/{{total_timeseries}} timeseries:
|
||||
<ul>
|
||||
{% for ts in timeseries %}
|
||||
<li>
|
||||
<a href="/v?ts={{ts.id}}">{{ts.description}}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width; initial-scale=1">
|
||||
<meta charset="utf-8">
|
||||
|
||||
<style>
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<form>
|
||||
{% for dimension, members in request.args.lists() %}
|
||||
{% for member in members %}
|
||||
{% if dimension[0] != "." %}
|
||||
<input name="{{dimension}}" value="{{member}}" type="hidden">
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
<select name="{{dimension}}" multiple size={{members|length}}>
|
||||
{% for member in members %}
|
||||
<option>{{member}}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="submit">
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,11 @@
|
|||
{"description": {"fstype": "tmpfs", "hostname": "ehoiai.int.wsr.at", "measure": "time_until_disk_full", "mountpoint": "/run/user/1054", "node": "rorschach.hjp.at", "remote_addr": "", "unit": "s"},
|
||||
"data": [
|
||||
[1674806402.1014915, 1000000000.0],
|
||||
[1674814201.3452954, 1000000000.0],
|
||||
[1675164602.4004557, 1000000000.0],
|
||||
[1675165201.6291275, 1000000000.0],
|
||||
[1675165802.2402072, 1000000000.0],
|
||||
[1675166401.542857, 1000000000.0],
|
||||
[1675167002.2809808, 1000000000.0],
|
||||
[1675167601.2632012, 1000000000.0],
|
||||
[1675168201.8321788, 1000000000.0]]}
|
Binary file not shown.
|
@ -0,0 +1,56 @@
|
|||
import pytest
|
||||
|
||||
from dashboard import Widget
|
||||
|
||||
def test_healthscore_1_asc():
|
||||
w = Widget(
|
||||
{
|
||||
"type": "gauge",
|
||||
"stops": [1, 5],
|
||||
"data": [ "605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778" ] # required by API, not used for tests
|
||||
}
|
||||
)
|
||||
# stops are ordered from best to worst
|
||||
assert w.healthscore(0) == 100
|
||||
assert w.healthscore(1) == 100
|
||||
assert w.healthscore(2) == 75
|
||||
assert w.healthscore(3) == 50
|
||||
assert w.healthscore(4) == 25
|
||||
assert w.healthscore(5) == 0
|
||||
assert w.healthscore(6) == 0
|
||||
|
||||
|
||||
def test_healthscore_2_asc():
|
||||
w = Widget(
|
||||
{
|
||||
"type": "gauge",
|
||||
"stops": [1, 10, 100],
|
||||
"data": [ "605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778" ] # required by API, not used for tests
|
||||
}
|
||||
)
|
||||
# stops are ordered from best to worst
|
||||
assert w.healthscore(0) == 100
|
||||
assert w.healthscore(1) == 100
|
||||
assert w.healthscore(5.5) == 75
|
||||
assert w.healthscore(10) == 50
|
||||
assert w.healthscore(55) == 25
|
||||
assert w.healthscore(100) == 0
|
||||
assert w.healthscore(1000) == 0
|
||||
|
||||
def test_healthscore_2_desc():
|
||||
w = Widget(
|
||||
{
|
||||
"type": "gauge",
|
||||
"stops": [100, 10, 1],
|
||||
"data": [ "605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778" ] # required by API, not used for tests
|
||||
}
|
||||
)
|
||||
# stops are ordered from best to worst
|
||||
assert w.healthscore(0) == 0
|
||||
assert w.healthscore(1) == 0
|
||||
assert w.healthscore(5.5) == 25
|
||||
assert w.healthscore(10) == 50
|
||||
assert w.healthscore(55) == 75
|
||||
assert w.healthscore(100) == 100
|
||||
assert w.healthscore(1000) == 100
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
import pytest
|
||||
|
||||
from dashboard import TimeSeries
|
||||
|
||||
def test_timeseries_1():
|
||||
config = {
|
||||
"type": "timeseries",
|
||||
"multi": True,
|
||||
"stops": [ 2592000, 604800, 86400 ],
|
||||
"data": [
|
||||
"605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778"
|
||||
],
|
||||
"yscale": "log",
|
||||
}
|
||||
ts = TimeSeries(config)
|
||||
graph = ts.graph
|
||||
assert graph
|
Loading…
Reference in New Issue