From 001d03790db6d165d032a7ed34cecc303316beb7 Mon Sep 17 00:00:00 2001
From: "Peter J. Holzer" <hjp@hjp.at>
Date: Sat, 7 Sep 2024 14:36:16 +0200
Subject: [PATCH] Search for global minimum if start of timeseries is unusable

---
 process_queue | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/process_queue b/process_queue
index c1973e8..85c17b2 100644
--- a/process_queue
+++ b/process_queue
@@ -55,14 +55,27 @@ class DiskFullPredictor:
                 tuf = now - lts.data[i][0]
                 break
         else:
-            # XXX - this is probably a range, so maybe we should use some kind
-            # of average. It might also be zero, so maybe we have to search for
-            # the first non-zero value? For now keep it simple.
-            first_used_bytes = lts.data[0][1]
-            historic_growth = current_used_bytes / first_used_bytes
-            future_growth = current_usable_bytes  / current_used_bytes
-            tuf = math.log(future_growth) / math.log(historic_growth) * (now - lts.data[0][0])
-            tuf = max(tuf, now - lts.data[0][0])
+            # Try always use the minimum of a range.
+            # We prefer the first datapoint 
+            first_used_bytes = lts.data[0][2] if len(lts.data[0]) >= 4 else lts.data[0][1]
+            # But if that's not useable we search the whole timeseries for the
+            # minimum
+            if first_used_bytes >= current_used_bytes:
+                first_used_bytes = current_used_bytes
+                first_i = None
+                for i in range(len(lts.data)):
+                    used_bytes = lts.data[i][2] if len(lts.data[i]) >= 4 else lts.data[i][1]
+                    if used_bytes < first_used_bytes:
+                        first_used_bytes = used_bytes
+                        first_i = i
+            else:
+                first_i = 0
+
+            if first_i is not None:
+                historic_growth = current_used_bytes / first_used_bytes
+                future_growth = current_usable_bytes  / current_used_bytes
+                tuf = math.log(future_growth) / math.log(historic_growth) * (now - lts.data[first_i][0])
+                tuf = max(tuf, now - lts.data[first_i][0])
         desc = {**lts.description,
              "measure": "time_until_disk_full",
              "node": node,