Add navigation templates

Add navigation
Cap time until full at 1E9 seconds
2024-10-21 23:55:19 +02:00 · 2024-10-20 11:27:23 +02:00 · 2024-09-07 15:13:42 +02:00 · 2024-09-07 14:36:16 +02:00 · 2024-09-07 12:01:20 +02:00 · 2024-08-24 23:07:05 +02:00
23 changed files with 724 additions and 130 deletions
--- a/app.py
+++ b/app.py
@ -5,7 +5,9 @@ import logging
 import logging.config
 import os

-from flask import (Flask, request, jsonify, abort, render_template)
+from collections import defaultdict
+
+from flask import (Flask, request, jsonify, abort, render_template, url_for)

 from ltsdb_json import LTS
 from dashboard import Dashboard
@ -25,6 +27,10 @@ def home():

@app.route("/report", methods=["POST"])
 def report():
+    return record()
+
+@app.route("/record", methods=["POST"])
+def record():
    data = request.get_json()
    n_ts = 0
    n_dp = 0
@ -93,8 +99,9 @@ def verify_node(d):
    if "/" in node:
        raise ValueError("invalid node name %s", node)
    try:
-        log.info("getting client config from %s", "config/" + node)
-        with open("config/" + node) as fh:
+        fn = "config/" + node
+        log.info("getting client config from %s", fn)
+        with open(fn) as fh:
            node_conf = json.load(fh)
    except Exception as e:
        log.warning("got %s opening %s", e, "config/" + node)
@ -108,8 +115,19 @@ def verify_node(d):
            if timestamp > node_conf["last"]:
                node_conf["last"] = timestamp
                os.replace("config/" + node, "config/" + node + ".old")
-                with open("config/" + node, "w") as fh:
+                tmpfn = fn + "." + str(os.getpid())
+                oldfn = fn + ".old"
+                with open(tmpfn, "w") as fh:
                    json.dump(node_conf, fh) # XXX
+                try:
+                    os.unlink(oldfn)
+                except FileNotFoundError:
+                    pass
+                try:
+                    os.link(fn, oldfn)
+                except FileNotFoundError:
+                    pass
+                os.rename(tmpfn, fn)
                return node
            else:
                abort(409, "timestamp out of sync")
@ -136,3 +154,71 @@ def dashboard_index():
 def dashboard_file(dashboard):
    d = Dashboard("dashboards/" + dashboard + ".json")
    return d.as_html()
+
+@app.get("/nav")
+def nav():
+    # Start with a list of all dimensions, the number of matching time series
+    # and a truncated list of series.
+    # If a dimension is chosen, display a choice of members
+    # choosing one or more members goes back to the list of
+    # (remaining) dimensions
+    with open("data/.index") as fh:
+        fcntl.flock(fh, fcntl.LOCK_SH)
+        index = json.load(fh)
+    timeseries = None
+    for k, v in request.args.lists():
+        if k[0] == ".":
+            continue
+        log.debug("search: %s -> %s", k, v)
+        if timeseries is None:
+            timeseries = set()
+            log.debug("search: %s: %s", k, index[k])
+            for m in v:
+                timeseries |= set(index[k][m])
+        else:
+            filter = set()
+            for m in v:
+                filter |= set(index[k][m])
+            timeseries &= filter
+    if timeseries is None:
+        timeseries = set()
+        for mc in index.values():
+            for tsl in mc.values():
+                timeseries |= set(tsl)
+    if d := request.args.get(".m"):
+        members = []
+        for m, tsl in index[d].items():
+            if set(tsl) & timeseries:
+                members.append(m)
+        return render_template("nav_member_select.html", dimension=d, members=members)
+    else:
+        params = request.args.to_dict(flat=False)
+        matching_dimensions = defaultdict(int)
+        for d, mc in index.items():
+            if d in params:
+                continue
+            for m, tsl in mc.items():
+                mtsl = set(tsl) & timeseries
+                if mtsl:
+                    matching_dimensions[d] += len(mtsl)
+        matching_dimensions_list = []
+        for d in matching_dimensions:
+            params[".m"] = d
+            url = url_for("nav", **params)
+            app.logger.debug(f"{d=} {url=}")
+            matching_dimensions_list.append(
+                {"name": d, "count": matching_dimensions[d], "url": url}
+            )
+        total_timeseries = len(timeseries)
+        timeseries = [LTS(id=ts) for ts in list(timeseries)[:100]]
+        return render_template(
+                    "nav_dimension_list.html",
+                    matching_dimensions=matching_dimensions_list,
+                    timeseries=timeseries, total_timeseries=total_timeseries)
+
+
+
+
+
+
+    # 
--- a/clients/ltsdb_record.py
+++ b/clients/ltsdb_record.py
@ -7,8 +7,9 @@ import time

 import requests

-def send_report(report):
-    node = socket.gethostbyaddr(socket.gethostname())[0]
+node = socket.gethostbyaddr(socket.gethostname())[0]
+
+def record_observations(observations):
    for dir in (".", os.environ["HOME"] + "/.config/ltsdb", "/etc/ltsdb"):
        try:
            with open(dir + "/config.json") as fh:
@ -18,16 +19,16 @@ def send_report(report):
        except FileNotFoundError:
            pass
    while True:
-        for r in report:
+        for obs in observations:
            timestamp = time.time()
            msg = (node + " " + str(timestamp)).encode("UTF-8")
            digest = hmac.new(client_config["key"].encode("UTF-8"), msg, "SHA256").hexdigest()
-            r["auth"] = {
+            obs["auth"] = {
                "node": node,
                "timestamp": timestamp,
                "hmac": digest,
            }
-        r = requests.post(baseurl + "report", json=report)
+        r = requests.post(baseurl + "record", json=observations)
        print(r)
        if r.status_code == 200:
            return True
--- a/clients/record_https
+++ b/clients/record_https
@ -14,7 +14,7 @@ import urllib.parse

 import requests

-import ltsdb_report
+import ltsdb_record

 ap = argparse.ArgumentParser()
 ap.add_argument("url")
@ -57,6 +57,6 @@ report = [
    for r in report0
 ]

-success = ltsdb_report.send_report(report)
+success = ltsdb_record.record_observations(report)
 exit(1 - success)

--- a/clients/record_meminfo
+++ b/clients/record_meminfo
@ -0,0 +1,40 @@
+#!/usr/bin/python3
+
+import time
+import re
+
+import ltsdb_record
+
+
+now = time.time()
+report0 = []
+with open("/proc/meminfo") as mfh:
+    for ln in mfh:
+        m = re.match(r"^(?P<measure>.*?): *(?P<value>[0-9]*)( (?P<unit>.*))?", ln)
+        measure = m.group("measure")
+        value = int(m.group("value"))
+        unit = m.group("unit")
+        if unit is None and "Pages" in measure:
+            unit = "pages"
+        if unit == "kB":
+            value *= 1024
+            unit = "bytes"
+        report0.append({ "measure": "meminfo." + measure, "unit": unit, "value": value })
+
+report = [
+    {
+        "description": {
+            "hostname": ltsdb_record.node,
+            "measure": r["measure"],
+            "unit": r["unit"]
+        },
+        "data": [
+            [now, r["value"]]
+        ]
+    }
+    for r in report0
+]
+
+success = ltsdb_record.record_observations(report)
+exit(1 - success)
+
--- a/clients/record_os_version
+++ b/clients/record_os_version
@ -0,0 +1,44 @@
+#!/usr/bin/python3
+import re
+import subprocess
+import time
+
+import ltsdb_record
+
+p = subprocess.run(["/usr/bin/lsb_release", "-ir"],
+                   stdout=subprocess.PIPE, universal_newlines=True)
+for ln in p.stdout.split("\n")[:-1]:
+    m = re.match(r"(.*?)\s*:\s+(.*)", ln)
+    if m:
+        if m.group(1) == "Distributor ID":
+            distributor = m.group(2).lower()
+        elif m.group(1) == "Release":
+            release = m.group(2)
+if distributor == "ubuntu":
+    # special rule for ubuntu. The format is year.month, so we convert the
+    # months into fractional years
+    m = re.match(r"(\d+)\.(\d+)", release)
+    release = int(m.group(1)) + (int(m.group(2)) - 1) / 12
+else:
+    # for everybody else we assume its a fp number
+    release = float(release)
+
+report0 = []
+report0.append({ "measure": "os_version_" + distributor, "unit": "version", "value": release})
+now = time.time()
+report = [
+    {
+        "description": {
+            "hostname": ltsdb_record.node,
+            "measure": r["measure"],
+            "unit": r["unit"]
+        },
+        "data": [
+            [now, r["value"]]
+        ]
+    }
+    for r in report0
+]
+
+success = ltsdb_record.record_observations(report)
+exit(1 - success)
--- a/clients/record_pg_stat_activity
+++ b/clients/record_pg_stat_activity
@ -0,0 +1,60 @@
+#!/usr/bin/python3
+
+import time
+import re
+
+import psycopg2
+import psycopg2.extras
+
+import ltsdb_record
+
+db = psycopg2.connect() # We only get useful results if we are postgres, but for testing we can be any user
+
+csr = db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor)
+csr.execute(
+        """
+        select datname, count(*)
+        from pg_stat_activity
+        where backend_type = 'client backend' group by datname order by datname
+        """)
+
+total = 0
+
+now = time.time()
+report0 = []
+for r in csr:
+        report0.append(
+                    {
+                        "measure": "connections",
+                        "database": r.datname,
+                        "unit": "connections",
+                        "value": r.count
+                    }
+                )
+        total += r.count
+report0.append(
+            {
+                "measure": "connections",
+                "database": "ALL",
+                "unit": "connections",
+                "value": total
+            }
+        )
+
+report = [
+    {
+        "description": {
+            "hostname": ltsdb_record.node,
+            "measure": r["measure"],
+            "database": r["database"],
+            "unit": r["unit"]
+        },
+        "data": [
+            [now, r["value"]]
+        ]
+    }
+    for r in report0
+]
+
+success = ltsdb_record.record_observations(report)
+exit(1 - success)
--- a/clients/record_postgresql_version
+++ b/clients/record_postgresql_version
@ -0,0 +1,36 @@
+#!/usr/bin/python3
+import re
+import subprocess
+import time
+
+import ltsdb_record
+
+p = subprocess.run(["psql", "-c", "select version()", "-A", "-t", "-X", "-q",],
+                   stdout=subprocess.PIPE, universal_newlines=True)
+
+# This works only for PostgreSQL 10.x and above. I don't expect to encounter
+# older versions any more.
+m = re.match(r"^PostgreSQL (\d+).(\d+) ", p.stdout)
+if m:
+    version = int(m.group(1)) + int(m.group(2)) / 100
+
+report0 = []
+report0.append({ "measure": "postgresql_version", "unit": "version", "value":
+version})
+now = time.time()
+report = [
+    {
+        "description": {
+            "hostname": ltsdb_record.node,
+            "measure": r["measure"],
+            "unit": r["unit"]
+        },
+        "data": [
+            [now, r["value"]]
+        ]
+    }
+    for r in report0
+]
+
+success = ltsdb_record.record_observations(report)
+exit(1 - success)
--- a/clients/record_reboot_overdue
+++ b/clients/record_reboot_overdue
@ -0,0 +1,43 @@
+#!/usr/bin/python3
+
+import time
+import glob
+import os.path
+
+import ltsdb_record
+
+now = time.time()
+with open("/proc/uptime") as fh:
+    ln = fh.readline()
+    uptime = float(ln.split()[0])
+last_reboot = now - uptime
+
+last_mtime = 0
+for p in glob.glob("/boot/**/*", recursive=True):
+    mtime = os.path.getmtime(p)
+    if mtime > last_mtime:
+        last_mtime = mtime
+
+if last_mtime > last_reboot:
+    overdue = now - last_mtime
+else:
+    overdue = 0
+
+report0 = []
+report0.append({ "measure": "reboot_overdue", "unit": "s", "value": overdue})
+report = [
+    {
+        "description": {
+            "hostname": ltsdb_record.node,
+            "measure": r["measure"],
+            "unit": r["unit"]
+        },
+        "data": [
+            [now, r["value"]]
+        ]
+    }
+    for r in report0
+]
+
+success = ltsdb_record.record_observations(report)
+exit(1 - success)
--- a/clients/record_smart
+++ b/clients/record_smart
@ -0,0 +1,78 @@
+#!/usr/bin/python3
+import argparse
+import re
+import subprocess
+import time
+
+import ltsdb_record
+
+ap = argparse.ArgumentParser()
+ap.add_argument("device")
+args = ap.parse_args()
+
+p = subprocess.run(["/usr/sbin/smartctl", "-A", args.device],
+                   stdout=subprocess.PIPE, universal_newlines=True)
+report0 = []
+state = 0
+for ln in p.stdout.splitlines():
+    if state == 0 and ln.startswith("ID# ATTRIBUTE_NAME"):
+        state = 1
+    elif state == 1 and ln == "":
+        state = 2
+    elif state == 1:
+        (id, attribute_name, flag, value, worst, thresh, type, updated, when_failed, raw_value) = ln.split(None, 9)
+
+        if attribute_name == "Command_Timeout":
+            # This is a tuple of three values and I don't know what they mean
+            # so I just skip them.
+            # I guess I could just record them as smart_command_timeout_1,
+            # smart_command_timeout_2 and smart_command_timeout_3 ...
+            continue
+        if "_Ct" in attribute_name or "_Count" in attribute_name or "_Cnt" in attribute_name:
+            unit = "count"
+        elif "_Hours" in attribute_name:
+            unit = "hours"
+        elif "Total_LBAs_Written" in attribute_name:
+            unit = "blocks"
+        elif "Temperature_Cel" in attribute_name:
+            unit = "°C"
+        else:
+            unit = "unknown"
+
+        if unit == "°C":
+            # Sometimes there is extra information included - just ignore that.
+            value = int(raw_value.split()[0])
+        elif unit == "hours":
+            if m := re.match(r"([0-9]+)h\+([0-9]+)m\+([0-9.]+)s", raw_value):
+                # e.g. 60633h+54m+11.557s
+                value = (int(m.group(1)) * 3600 + int(m.group(2)) * 60 + float(m.group(2))) / 3600
+            else:
+                value = int(raw_value)
+        else:
+            value = int(raw_value)
+
+        report0.append(
+            {
+                "measure": "smart_" + attribute_name.lower(),
+                "unit": unit,
+                "value": value,
+            })
+now = time.time()
+report = [
+    {
+        "description": {
+            "hostname": ltsdb_record.node,
+            "device": args.device,
+            "measure": r["measure"],
+            "unit": r["unit"],
+        },
+        "data": [
+            [now, r["value"]]
+        ]
+    }
+    for r in report0
+]
+
+success = ltsdb_record.record_observations(report)
+exit(1 - success)
+
--- a/clients/record_tlscert
+++ b/clients/record_tlscert
@ -1,14 +1,11 @@
 #!/usr/bin/python3

 import argparse
-import hmac
-import json
-import os
 import socket
 import ssl
 import time

-import requests
+import ltsdb_record

 ap = argparse.ArgumentParser()
 ap.add_argument("--verbose", action="store_true")
@ -16,23 +13,20 @@ ap.add_argument("hostname")
 ap.add_argument("port", type=int, default=443, nargs="?")
 args = ap.parse_args()

-# It's a bit weird that this works.
-myhostname = socket.gethostbyaddr(socket.gethostname())[0]
-
 now = time.time()
 report0 = []

-with socket.create_connection((args.hostname, args.port)) as sock:
-    context = ssl.create_default_context()
-    try:
+try:
+    with socket.create_connection((args.hostname, args.port)) as sock:
+        context = ssl.create_default_context()
        with context.wrap_socket(sock, server_hostname=args.hostname) as ssock:
            cert = ssock.getpeercert()
            not_after = ssl.cert_time_to_seconds(cert["notAfter"])
            delta = not_after - now
-    except ssl.SSLCertVerificationError as e:
-        print("got error %s; setting delta to 0", e)
-        delta = 0
-    report0.append({ "measure": "tls_cert_ttl", "unit": "s", "value": delta })
+except (ssl.SSLCertVerificationError, ConnectionRefusedError) as e:
+    print("got error %s; setting delta to 0" % e)
+    delta = 0
+report0.append({ "measure": "tls_cert_ttl", "unit": "s", "value": delta })

 report = [
    {
@ -49,32 +43,5 @@ report = [
    for r in report0
 ]

-for dir in (".", os.environ["HOME"] + "/.config/ltsdb", "/etc/ltsdb"):
-    try:
-        with open(dir + "/config.json") as fh:
-            client_config = json.load(fh)
-        baseurl = client_config["server"]
-        break
-    except FileNotFoundError:
-        pass
-while True:
-    for r in report:
-        node = myhostname
-        timestamp = time.time()
-        msg = (node + " " + str(timestamp)).encode("UTF-8")
-        digest = hmac.new(client_config["key"].encode("UTF-8"), msg, "SHA256").hexdigest()
-        r["auth"] = {
-            "node": node,
-            "timestamp": timestamp,
-            "hmac": digest,
-        }
-    #pprint.pp(report)
-    r = requests.post(baseurl + "report", json=report)
-    print(r)
-    if r.status_code == 200:
-        exit(0)
-    elif r.status_code == 409:
-        time.sleep(0.5 + random.random())
-        continue
-    else:
-        exit(1)
+success = ltsdb_record.record_observations(report)
+exit(1 - success)
--- a/dashboard.py
+++ b/dashboard.py
@ -27,11 +27,15 @@ class Dashboard:
                if w.get("multi"):
                    ts_list = LTS.find(w["data"][0])
                    for ts in ts_list:
-                        tso = LTS(id=ts)
+                        try:
+                            tso = LTS(id=ts)
+                        except json.decoder.JSONDecodeError as e:
+                            log.error("%s contains bad data: %s: Skipping", ts, e)
+                            continue
                        if not tso.data:
                            log.warning("%s has no data: Skipping", tso.id)
                            continue
-                        if tso.data[-1][0] < time.time() - 86400:
+                        if tso.data[-1][0] < time.time() - 7 * 86400:
                            log.info("%s too old; Skipping", tso.id)
                            continue
                        w1 = {**w, "data": [ts]}
@ -56,6 +60,10 @@ class Dashboard:
            else:
                self.widgets.append(Widget(w))

+        # Sort widgets by ascending healthscore to get the most critical at the
+        # top.
+        self.widgets.sort(key=lambda w: w.healthscore())
+
    def as_html(self):
        return render_template("dashboard.html", dashboard=self)

@ -70,65 +78,40 @@ class Widget:
        self.extra = {}
        log.debug("data = %s", d["data"])
        self.lts = LTS(id=d["data"][0]) # by default we handle only one data source
-        pass
+        self.lastvalue = self.lts.data[-1][1]

    def as_html(self):
        log.debug("")
-        self.lastvalue = self.lts.data[-1][1]
        return Markup(render_template("widget.html", widget=self))

-    def criticalcolor(self, value=None):
+    def healthscore(self, value=None):
+        """
+        Return a score between 0 (unhealthy) and 100 (healthy)
+        """
        if value == None:
            value = self.lastvalue
        log.debug("stops = %s", self.stops)
+        stops = self.stops
+        if stops[-1] < stops[0]:
+            value = -value
+            stops = [-v for v in stops]
+
+        if value <= stops[0]:
+            log.debug("ok")
+            return 100
+        if value >= stops[-1]:
+            log.debug("fail")
+            return 0
+        for i in range(0, len(stops) - 1):
+            if stops[i] <= value < stops[i+1]:
+                log.debug("at stop %d", i)
+                return 100 - ((value - stops[i]) / (stops[i+1] - stops[i]) + i) * 100 / (len(stops) - 1)
+
+    def criticalcolor(self, value=None):
+        healthscore = self.healthscore(value)
+        hue = round(healthscore * 120 / 100)
        brightness = 30
-        if self.stops[0] < self.stops[2]:
-            if value < self.stops[0]:
-                log.debug("definitely ok")
-                return f"hsl(120, 100%, {brightness}%)"
-            elif value < self.stops[1]:
-                log.debug("mostly ok")
-                hue = 120 - round(
-                                (value - self.stops[0])
-                              / (self.stops[1] - self.stops[0])
-                              * 60
-                            )
-                return f"hsl({hue}, 100%, {brightness}%)"
-            elif value < self.stops[2]:
-                log.debug("maybe fail")
-                hue =  60 - round(
-                                (value - self.stops[1])
-                              / (self.stops[2] - self.stops[1])
-                              * 60
-                            )
-                return f"hsl({hue}, 100%, {brightness}%)"
-            else:
-                log.debug("definitely fail")
-                return f"hsl(0, 100%, {brightness}%)"
-        else:
-            log.debug("the other side")
-            if value > self.stops[0]:
-                log.debug("definitely ok")
-                return f"hsl(120, 100%, {brightness}%)"
-            elif value > self.stops[1]:
-                log.debug("mostly ok")
-                hue = 120 - round(
-                                (value - self.stops[0])
-                              / (self.stops[1] - self.stops[0])
-                              * 60
-                            )
-                return f"hsl({hue}, 100%, {brightness}%)"
-            elif value > self.stops[2]:
-                log.debug("maybe fail")
-                hue =  60 - round(
-                                (value - self.stops[1])
-                              / (self.stops[2] - self.stops[1])
-                              * 60
-                            )
-                return f"hsl({hue}, 100%, {brightness}%)"
-            else:
-                log.debug("definitely fail")
-                return f"hsl(0, 100%, {brightness}%)"
+        return f"hsl({hue}, 100%, {brightness}%)"

    @property
    def description_formatted(self):
@ -160,9 +143,13 @@ class TimeSeries(Widget):

        def v2y(v):
            if self.yscale == "log":
-                return (1 -   math.log(v / min_value)
-                            / math.log(max_value / min_value)
-                       ) * 200
+                try:
+                    return (1 -   math.log(max(v, min_value) / min_value)
+                                / math.log(max_value / min_value)
+                           ) * 200
+                except ValueError:
+                    log.error(f"ValueError: v = {v}, min_value = {min_value}, max_value = {max_value}")
+                    return 0
            elif self.yscale == "linear":
                return (1 - v/max_value) * 200
            else:
@ -179,11 +166,12 @@ class TimeSeries(Widget):
            min_step = 25
            steps = ("s", "m", "h", "D", "10D", "M", "Y")
            step_i = 0
-            while True:
+            while step_i < len(steps):
                t0 = tickmarks[-1]["t"]
                x0 = tickmarks[-1]["x"]
                d0 = datetime.datetime.fromtimestamp(t0)

+                log.debug("step_i = %s", step_i)
                if steps[step_i] == "s":
                    d1 = datetime.datetime(d0.year, d0.month, d0.day, d0.hour, d0.minute, d0.second)
                    t1 = d1.timestamp()
@ -295,6 +283,7 @@ class TimeSeries(Widget):
                        t1 = d1.timestamp()
                        x1 = t2x(t1)
                        if x0 - x1 < min_step:
+                            log.debug("t0 = %s, x0 = %s, t1 = %s, x1 = %s", t0, x0, t1, x1)
                            step_i += 1
                            continue
                if x1 < 0:
@ -315,7 +304,11 @@ class TimeSeries(Widget):
            if self.yscale == "linear":
                log.debug("")
                if unit == "s" and max_value > 3600:
-                    if max_value >= 4 * 7 * 86400:
+                    if max_value >= 16 * 7 * 86400:
+                        step = 4 * 7 * 86400
+                        step_d = 4
+                        unit = "w"
+                    elif max_value >= 4 * 7 * 86400:
                        step = 7 * 86400
                        step_d = 1
                        unit = "w"
@ -387,7 +380,7 @@ class TimeSeries(Widget):
            log.debug("")


-        log.debug("in graph")
+        log.debug("in graph for %s", self.lts.id)
        data = self.lts.data
        n = len(data)
        t_last = data[-1][0]
@ -395,13 +388,18 @@ class TimeSeries(Widget):
            return "(not enough data)"
        dt = (t_last - data[-5][0]) / 4
        k = math.log((t_last - data[0][0]) / dt / n + 1)
+        log.debug("times = [%s ... %s ... %s]", data[0][0], data[-5][0], data[-1][0])

-        max_value = max([d[1] for d in self.lts.data])
+        max_value = max([d[3] if len(d) >= 4 else d[1] for d in self.lts.data])
        max_value = max(max_value, 0.001) # ensure positive
+        unit = self.lts.description["unit"]
        if self.yscale == "log":
            try:
                min_value = min(d[1] for d in self.lts.data if d[1] > 0)
-                self.extra["min"] = "%g" % min_value
+                if unit == "s":
+                    self.extra["min"] = "%g" % min_value  + " (" + self.format_time(min_value) + ")"
+                else:
+                    self.extra["min"] = "%g" % min_value
            except ValueError:
                # no non-negative values
                min_value = max_value / 2
@ -415,16 +413,28 @@ class TimeSeries(Widget):
                # Make sure min_value is less than max_value
                min_value /= 2
            log.debug("min_value = %s, max_value = %s", min_value, max_value)
-        self.extra["max"] = "%g" % max_value
-        self.extra["last"] = "%g" % data[-1][1]
+        if unit == "s":
+            self.extra["max"] = "%g" % max_value + " (" + self.format_time(max_value) + ")"
+            self.extra["last"] = "%g" % data[-1][1] + " (" + self.format_time(data[-1][1]) + ")"
+        else:
+            self.extra["max"] = "%g" % max_value
+            self.extra["last"] = "%g" % data[-1][1]
        log.debug("collecting data")
        v_data = []
        for i in range(n):
            t = data[i][0]
            v = data[i][1]
+            if len(data[i]) >= 4:
+                v_min = data[i][2]
+                v_max = data[i][3]
+            else:
+                v_min = data[i][1]
+                v_max = data[i][1]
            x = t2x(t)
            t_h = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t))
            y = v2y(v)
+            y_min = v2y(v_min)
+            y_max = v2y(v_max)
            #print(t, t_h, x)
            v_data.append(
                    {
@ -432,6 +442,8 @@ class TimeSeries(Widget):
                        "v": v,
                        "x": x,
                        "y": y,
+                        "y_min": y_min,
+                        "y_max": y_max,
                        "color": self.criticalcolor(v),
                    })

@ -450,7 +462,8 @@ class TimeSeries(Widget):
            html += f"<line x1=0 y1={tm['y']} x2=1000 y2={tm['y']} stroke='#CCC' />"
            html += f"<text x=1005 y={tm['y']} fill='#888'>{tm['v_h']}</text>"
        for v in v_data:
-            html += f"<circle cx={v['x']} cy={v['y']} r=3 fill='{v['color']}' />"
+            html += f"<line x1={v['x']-3} x2={v['x']+3} y1={v['y']} y2={v['y']} stroke='{v['color']}' />"
+            html += f"<line x1={v['x']} x2={v['x']} y1={v['y_min']} y2={v['y_max']} stroke='{v['color']}' />"
        html += "</svg>"
        log.debug("len(html) = %s", len(html))
        return Markup(html)
@ -459,6 +472,29 @@ class TimeSeries(Widget):
        log.debug("in as_html")
        return Markup(render_template("timeseries.html", widget=self))

+    def format_time(self, seconds):
+        value = seconds
+        unit = "s"
+        if value >= 365.25 * 86400:
+            value /= 365.25 * 86400
+            unit = "years"
+        elif value >= 86400:
+            value /= 86400
+            unit = "days"
+        elif value >= 3600:
+            value /= 3600
+            unit = "h"
+        elif value >= 60:
+            value /= 60
+            unit = "m"
+        elif value >= 1:
+            pass
+        elif value >= 0.001:
+            value *= 1000
+            unit = "ms"
+        return f"{value:.2f} {unit}"
+
+
 class Gauge(Widget):
    def __init__(self, d):
        super().__init__(d)
@ -500,3 +536,4 @@ class Gauge(Widget):
        self.lastvalue_formatted = Markup(f"<span class='value'>{value:.2f}</span><span class='unit'>{unit}</unit>")
        return Markup(render_template("gauge.html", widget=self))

+# vim: sw=4
--- a/ltsdb_json.py
+++ b/ltsdb_json.py
@ -41,7 +41,36 @@ class LTS:
            with open(self.filename, "x+") as fh:
                fcntl.flock(fh, fcntl.LOCK_EX)
                json.dump({"description": self.description, "data": self.data}, fh)
+                log.info(f"Created {self.filename}")
            self.rebuild_index()
+        except json.decoder.JSONDecodeError as e:
+            log.exception(f"Cannot decode JSON in {self.filename}: {e}")
+            raise
+
+    def pop(self, i):
+        # Pop the element at index i and adjust the min/max values of the
+        # neighbours.
+        # We might also want to adjust the value of the neighbours to some
+        # (weighted) average, but I'm not sure if this is actually a good idea.
+        data = self.data
+        old = data.pop(i) # after that the neighbours are at i-1, i
+        min_v = old[2] if len(old) >= 4 else old[1]
+        max_v = old[3] if len(old) >= 4 else old[1]
+        if i > 0:
+            if len(data[i-1]) == 2:
+                data[i-1] = [data[i-1][0], data[i-1][1], data[i-1][1], data[i-1][1]]
+            if min_v < data[i-1][2]:
+                data[i-1][2] = min_v
+            if max_v > data[i-1][3]:
+                data[i-1][3] = max_v
+        if i < len(data):
+            if len(data[i]) == 2:
+                data[i] = [data[i][0], data[i][1], data[i][1], data[i][1]]
+            if min_v < data[i][2]:
+                data[i][2] = min_v
+            if max_v > data[i][3]:
+                data[i][3] = max_v
+        return old

    def shrink(self):
        # Remove one element in such a way that the distributions gets closer
@ -61,7 +90,7 @@ class LTS:
            t_ideal = (math.exp(k * (n - i)/n) - 1) * (n * dt)
            if t_last - data[i][0] > t_ideal:
                log.debug("%s - %s > %s -> popping element %s", t_last, data[i][0], t_ideal, i)
-                data.pop(i)
+                self.pop(i)
                break
        else:
            # Well, it works mostly. Sometimes all the real points are below
@ -71,7 +100,7 @@ class LTS:
            # narrow range just before that.
            i = random.randrange(int(n*0.98), int(n*0.99))
            log.debug("no match -> popping element %s", i)
-            data.pop(i)
+            self.pop(i)

    def add(self, ts, value):
        while len(self.data) >= self.limit:
@ -102,7 +131,11 @@ class LTS:
            (_, _, hash) = fn.rpartition("/")
            with open(fn, "r") as fh:
                fcntl.flock(fh, fcntl.LOCK_SH)
-                d = json.load(fh)
+                try:
+                    d = json.load(fh)
+                except json.decoder.JSONDecodeError as e:
+                    log.exception(f"Cannot decode JSON in {fn}: {e}")
+                    raise
            for k, v in d["description"].items():
                d1 = index.setdefault(k, {})
                d2 = d1.setdefault(v, [])
--- a/10
+++ b/10
@ -1,20 +1,22 @@
 #!/usr/bin/python3
+import time
+import random

 from ltsdb_json import LTS

 ts1 = LTS({"hostname": "rorschach.hjp.at", "measure": "uptime"})
-ts1.add(1661026122, 4)
+ts1.add(time.time(), 4 + random.random())
 ts1.save()

 ts1 = LTS({"hostname": "rorschach.hjp.at", "website": "i12e.hjp.at", "measure": "rtt"})
-ts1.add(1661026122, 0.06)
+ts1.add(time.time(), 0.06)
 ts1.save()

 ts1 = LTS({"hostname": "rorschach.hjp.at", "measure": "uptime"})
-ts1.add(1661026361, 5)
+ts1.add(time.time() + 240, 5 + random.random())
 ts1.save()

 ts1 = LTS({"hostname": "charly.wsr.ac.at", "website": "www.wifo.ac.at", "measure": "rtt"})
-ts1.add(1661026122, 0.347)
+ts1.add(time.time(), 0.347)
 ts1.save()

--- a/36
+++ b/36
@ -2,8 +2,10 @@

 import logging
 import logging.config
+import math
 import os
 import socket
+import statistics
 import time

 from ltsdb_json import LTS
@ -44,13 +46,37 @@ class DiskFullPredictor:
        current_used_bytes = lts.data[-1][1]
        current_usable_bytes = usable_lts.data[-1][1]
        tuf = 1E9
-        for d in reversed(lts.data):
-            if d[1] < current_usable_bytes * 0.1:
+        for i in reversed(range(len(lts.data))):
+            m = statistics.mean(x[1] for x in lts.data[max(0, i - 2) : min(len(lts.data), i + 3)])
+            if m < current_usable_bytes * 0.1:
                continue # for sanity
-            if current_used_bytes ** 2 / d[1] > current_usable_bytes:
-                log.info("d = %s, current_used_bytes = %s, current_usable_bytes = %s", d, current_used_bytes, current_usable_bytes)
-                tuf = now - d[0]
+            if current_used_bytes ** 2 / m > current_usable_bytes:
+                log.info("d = %s, current_used_bytes = %s, current_usable_bytes = %s", m, current_used_bytes, current_usable_bytes)
+                tuf = now - lts.data[i][0]
                break
+        else:
+            # Try always use the minimum of a range.
+            # We prefer the first datapoint 
+            first_used_bytes = lts.data[0][2] if len(lts.data[0]) >= 4 else lts.data[0][1]
+            # But if that's not useable we search the whole timeseries for the
+            # minimum
+            if first_used_bytes >= current_used_bytes:
+                first_used_bytes = current_used_bytes
+                first_i = None
+                for i in range(len(lts.data)):
+                    used_bytes = lts.data[i][2] if len(lts.data[i]) >= 4 else lts.data[i][1]
+                    if used_bytes < first_used_bytes:
+                        first_used_bytes = used_bytes
+                        first_i = i
+            else:
+                first_i = 0
+
+            if first_i is not None:
+                historic_growth = current_used_bytes / first_used_bytes
+                future_growth = current_usable_bytes  / current_used_bytes
+                tuf = math.log(future_growth) / math.log(historic_growth) * (now - lts.data[first_i][0])
+                tuf = max(tuf, now - lts.data[first_i][0])
+                tuf = min(tuf, 1E9)
        desc = {**lts.description,
             "measure": "time_until_disk_full",
             "node": node,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,3 @@
+[tool.pytest.ini_options]
+pythonpath = "."
+
--- a/templates/dashboard.html
+++ b/templates/dashboard.html
@ -8,8 +8,9 @@
      body {
        font-family: sans-serif; 
      }
-      th {
+      th, td {
        text-align: left;
+        vertical-align: baseline;
      }
      main {
        display: flex;
--- a/templates/nav_dimension_list.html
+++ b/templates/nav_dimension_list.html
@ -0,0 +1,26 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta name="viewport" content="width=device-width; initial-scale=1">
+    <meta charset="utf-8">
+
+    <style>
+    </style>
+  </head>
+  <body>
+    <ul>
+      {% for d in matching_dimensions %}
+        <li><a href="{{d['url']}}">{{d.name}}</a> ({{d.count}})</li>
+      {% endfor %}
+    </ul>
+    {{timeseries|length}}/{{total_timeseries}} timeseries:
+    <ul>
+      {% for ts in timeseries %}
+        <li>
+          <a href="/v?ts={{ts.id}}">{{ts.description}}</a>
+        </li>
+      {% endfor %}
+    </ul>
+  </body>
+</html>
+
--- a/templates/nav_member_select.html
+++ b/templates/nav_member_select.html
@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta name="viewport" content="width=device-width; initial-scale=1">
+    <meta charset="utf-8">
+
+    <style>
+    </style>
+  </head>
+  <body>
+    <form>
+      {% for dimension, members in request.args.lists() %}
+        {% for member in members %}
+          {% if dimension[0] != "." %}
+            <input name="{{dimension}}" value="{{member}}" type="hidden">
+          {% endif %}
+        {% endfor %}
+      {% endfor %}
+      <select name="{{dimension}}" multiple size={{members|length}}>
+        {% for member in members %}
+          <option>{{member}}</option>
+        {% endfor %}
+      </select>
+      <input type="submit">
+    </form>
+  </body>
+</html>
--- a/test_data/605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778
+++ b/test_data/605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778
@ -0,0 +1,11 @@
+{"description": {"fstype": "tmpfs", "hostname": "ehoiai.int.wsr.at", "measure": "time_until_disk_full", "mountpoint": "/run/user/1054", "node": "rorschach.hjp.at", "remote_addr": "", "unit": "s"},
+"data": [
+    [1674806402.1014915, 1000000000.0],
+    [1674814201.3452954, 1000000000.0],
+    [1675164602.4004557, 1000000000.0],
+    [1675165201.6291275, 1000000000.0],
+    [1675165802.2402072, 1000000000.0],
+    [1675166401.542857, 1000000000.0],
+    [1675167002.2809808, 1000000000.0],
+    [1675167601.2632012, 1000000000.0],
+    [1675168201.8321788, 1000000000.0]]}
--- a/tests/init,py
+++ b/tests/init,py
--- a/tests/pycache/test_timeseries.cpython-39-pytest-7.2.1.pyc
+++ b/tests/pycache/test_timeseries.cpython-39-pytest-7.2.1.pyc
--- a/tests/test_healthscore.py
+++ b/tests/test_healthscore.py
@ -0,0 +1,56 @@
+import pytest
+
+from dashboard import Widget
+
+def test_healthscore_1_asc():
+    w = Widget(
+            {
+                "type": "gauge",
+                "stops": [1, 5],
+                "data": [ "605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778" ] # required by API, not used for tests
+            }
+        )
+    # stops are ordered from best to worst
+    assert w.healthscore(0) == 100
+    assert w.healthscore(1) == 100
+    assert w.healthscore(2) ==  75
+    assert w.healthscore(3) ==  50
+    assert w.healthscore(4) ==  25
+    assert w.healthscore(5) ==   0
+    assert w.healthscore(6) ==   0
+
+
+def test_healthscore_2_asc():
+    w = Widget(
+            {
+                "type": "gauge",
+                "stops": [1, 10, 100],
+                "data": [ "605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778" ] # required by API, not used for tests
+            }
+        )
+    # stops are ordered from best to worst
+    assert w.healthscore(0)    == 100
+    assert w.healthscore(1)    == 100
+    assert w.healthscore(5.5)  ==  75
+    assert w.healthscore(10)   ==  50
+    assert w.healthscore(55)   ==  25
+    assert w.healthscore(100)  ==   0
+    assert w.healthscore(1000) ==   0
+
+def test_healthscore_2_desc():
+    w = Widget(
+            {
+                "type": "gauge",
+                "stops": [100, 10, 1],
+                "data": [ "605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778" ] # required by API, not used for tests
+            }
+        )
+    # stops are ordered from best to worst
+    assert w.healthscore(0)    ==   0
+    assert w.healthscore(1)    ==   0
+    assert w.healthscore(5.5)  ==  25
+    assert w.healthscore(10)   ==  50
+    assert w.healthscore(55)   ==  75
+    assert w.healthscore(100)  == 100
+    assert w.healthscore(1000) == 100
+
--- a/tests/test_timeseries.py
+++ b/tests/test_timeseries.py
@ -0,0 +1,17 @@
+import pytest
+
+from dashboard import TimeSeries 
+
+def test_timeseries_1():
+    config = {
+        "type": "timeseries",
+        "multi": True,
+        "stops": [ 2592000, 604800, 86400 ],
+        "data": [
+            "605da6f41f58b122f41283823a99faa36286961a106ac901bb2b2d730fddc778"
+        ],
+        "yscale": "log",
+    }
+    ts = TimeSeries(config)
+    graph = ts.graph
+    assert graph
Author	SHA1	Message	Date
Peter J. Holzer	357771b345	Add navigation templates	2024-10-21 23:55:19 +02:00
Peter J. Holzer	2ca618eedd	Add navigation	2024-10-20 11:27:23 +02:00
Peter J. Holzer	33327258d1	Cap time until full at 1E9 seconds	2024-09-07 15:13:42 +02:00
Peter J. Holzer	001d03790d	Search for global minimum if start of timeseries is unusable	2024-09-07 14:36:16 +02:00
Peter J. Holzer	c462ca4d80	Extrapolate further into the future So far we have only extapolated as far into the future as we could look into the past. Everything beyond that was "infinity". Now we use the first and last observation to extrapolate beyond that.	2024-09-07 12:01:20 +02:00
Peter J. Holzer	2016fb4a0e	Fix formatting of time values in graph descriptions	2024-08-24 23:07:05 +02:00
Peter J. Holzer	1a798be52d	Merge branch 'master' of git.hjp.at:hjp/ltsdb	2024-08-24 22:51:43 +02:00
Peter J. Holzer	cd1750b21a	Log JSON decode errors	2024-08-24 22:51:29 +02:00
Peter J. Holzer	e921031e64	Format time velues in graph descriptions	2024-08-24 22:49:43 +02:00
Peter J. Holzer	9a84e191be	Log files with invalid JSON	2024-05-24 22:32:25 +02:00
Peter J. Holzer	d3ac6703c6	Record postgresql version	2024-02-06 11:45:21 +01:00
Peter J. Holzer	70b68108c7	Treat ConnectionRefusedError like SSLCertVerificationError	2023-10-10 10:28:56 +02:00
Peter J. Holzer	ad04443928	Use ltsdb_record.record_observations	2023-09-20 11:00:39 +02:00
Peter J. Holzer	1a0ccb22b7	Avoid race condition during config update	2023-09-20 10:50:29 +02:00
Peter J. Holzer	aa66f8d615	Handle some raw values which aren't simple integers	2023-08-19 13:55:43 +02:00
Peter J. Holzer	1a7003fef2	Record raw smart vendor attributes	2023-08-19 13:13:09 +02:00
Peter J. Holzer	3f21122769	Use random time stamps Don't remember what I wanted to test there ...	2023-08-18 21:14:15 +02:00
Peter J. Holzer	206be6a8fa	Make log scales more robust	2023-08-18 21:11:01 +02:00
Peter J. Holzer	2e8641ad18	Smooth out old data to avoid false positives in disk full prediction	2023-08-18 21:07:57 +02:00
Peter J. Holzer	a95ba6c51f	Backport to Python 3.6	2023-05-06 00:13:52 +02:00
Peter J. Holzer	5047a56fe8	Record number of connections per dataabase from pg_stat_activity	2023-04-17 13:21:34 +02:00
Peter J. Holzer	1485debaae	Merge branch 'master' of git.hjp.at:hjp/ltsdb	2023-03-19 11:39:40 +01:00
Peter J. Holzer	74842b64dd	Rename data to test_data to prevent clash with live layout	2023-03-19 11:38:37 +01:00
Peter J. Holzer	f9f9bb4025	Allow arbitrary number of stops	2023-03-19 11:34:38 +01:00
Peter J. Holzer	f0e0ea2d73	Remove dead code	2023-03-19 11:33:19 +01:00
Peter J. Holzer	e0a1913a64	Merge branch 'master' of git.hjp.at:hjp/ltsdb	2023-02-04 14:30:59 +01:00
Peter J. Holzer	3d60e648e4	Add test case	2023-02-04 12:43:04 +01:00
Peter J. Holzer	c79f7de78e	Merge branch 'master' of git.hjp.at:hjp/ltsdb	2023-02-04 12:19:19 +01:00
Peter J. Holzer	ad601984d1	Add some debug messages	2023-02-04 12:19:01 +01:00
Peter J. Holzer	d3a439bb49	Stop if we run out of tickmark frequencies	2023-02-04 12:16:37 +01:00
Peter J. Holzer	62b22d7516	Record OS version	2023-02-03 13:27:25 +01:00
Peter J. Holzer	cfc514a3eb	Merge branch 'master' of git.hjp.at:hjp/ltsdb	2023-01-26 21:28:24 +01:00
Peter J. Holzer	5dbe0391dd	Extend linear time scale	2023-01-26 21:27:21 +01:00
Peter J. Holzer	75ce29135c	Increase obsolescence limit to 1 week	2023-01-26 21:26:25 +01:00
Peter J. Holzer	125dd443c1	Record reboot overdue time	2023-01-14 19:20:57 +01:00
Peter J. Holzer	22c790080a	Sort by healthscore	2023-01-10 21:34:44 +01:00
Peter J. Holzer	7ab45d3e90	Display min and max values in timeseries	2023-01-10 21:33:47 +01:00
Peter J. Holzer	e57e333942	Make record_meminfo executable	2023-01-07 23:40:14 +01:00
Peter J. Holzer	52a5f60389	Record meminfo	2023-01-07 13:45:03 +01:00
Peter J. Holzer	ed3f7872e9	Expose node	2023-01-07 13:26:40 +01:00
Peter J. Holzer	fdf782bc2f	Rename ltsdb_report.send_report to ltsdb_record.record_observations	2023-01-07 13:08:44 +01:00
Peter J. Holzer	a83ebfcc99	Add new API endpoint /record as a (preferred) alias for /report	2023-01-07 12:55:21 +01:00
Peter J. Holzer	ca0ce798da	Keep min and max values of dropped data points	2023-01-05 15:14:28 +01:00