diff --git a/ltsdb_json.py b/ltsdb_json.py index 454f6d3..4537dcb 100644 --- a/ltsdb_json.py +++ b/ltsdb_json.py @@ -4,9 +4,13 @@ import fcntl import glob import hashlib import json +import logging +import math import random import time +log = logging.getLogger() + class LTS: base_dir = "data" queue_dir = "queue" @@ -39,10 +43,38 @@ class LTS: json.dump({"description": self.description, "data": self.data}, fh) self.rebuild_index() + def shrink(self): + # Remove one element in such a way that the distributions gets closer + # to an exponential curve through the first and the last few data + # points. + # To do this we compute the ideal t value at each point and compare it + # to the real value. We remove the first point which sticks out too + # much (I'm tempted to dub this the barber's algorithm). + # This extremely inefficient but it's simple to understand and works. + data = self.data + n = len(data) + t_last = data[-1][0] + dt = (t_last - data[-5][0]) / 4 + k = math.log((t_last - data[0][0]) / dt / n + 1) + for i in range(1, n): + t_ideal = (math.exp(k * (n - i)/n) - 1) * (n * dt) + if t_last - data[i][0] > t_ideal: + log.debug("%s - %s > %s -> popping element %s", t_last, data[i][0], t_ideal, i) + data.pop(i) + break + else: + # Well, it works mostly. Sometimes all the real points are below + # the curve but we have to remove one anyway. This needs to be + # heavily biased towards newer data points, but we don't want to + # delete the few newest data points so choose one at random from a + # narrow range just before that. + i = random.randrange(int(n*0.98), int(n*0.99)) + log.debug("no match -> popping element %s", i) + data.pop(i) + def add(self, ts, value): while len(self.data) >= self.limit: - r = random.randrange(0, self.limit) - self.data.pop(r) + self.shrink() if len(self.data) == 0 or ts >= self.data[-1][0]: self.data.append((ts, value,))