Make expiry exponential

Choosing the element to expire from a uniform random distribution tends
to expire data points much too early. We want to always keep the oldest
observation and have a roughly exponential distribution between the
newest and the oldest observation.
This commit is contained in:
Peter J. Holzer 2022-12-17 17:49:19 +01:00
parent a802f2ee27
commit b10b62e77d
1 changed files with 34 additions and 2 deletions

View File

@ -4,9 +4,13 @@ import fcntl
import glob
import hashlib
import json
import logging
import math
import random
import time
log = logging.getLogger()
class LTS:
base_dir = "data"
queue_dir = "queue"
@ -39,10 +43,38 @@ class LTS:
json.dump({"description": self.description, "data": self.data}, fh)
self.rebuild_index()
def shrink(self):
# Remove one element in such a way that the distributions gets closer
# to an exponential curve through the first and the last few data
# points.
# To do this we compute the ideal t value at each point and compare it
# to the real value. We remove the first point which sticks out too
# much (I'm tempted to dub this the barber's algorithm).
# This extremely inefficient but it's simple to understand and works.
data = self.data
n = len(data)
t_last = data[-1][0]
dt = (t_last - data[-5][0]) / 4
k = math.log((t_last - data[0][0]) / dt / n + 1)
for i in range(1, n):
t_ideal = (math.exp(k * (n - i)/n) - 1) * (n * dt)
if t_last - data[i][0] > t_ideal:
log.debug("%s - %s > %s -> popping element %s", t_last, data[i][0], t_ideal, i)
data.pop(i)
break
else:
# Well, it works mostly. Sometimes all the real points are below
# the curve but we have to remove one anyway. This needs to be
# heavily biased towards newer data points, but we don't want to
# delete the few newest data points so choose one at random from a
# narrow range just before that.
i = random.randrange(int(n*0.98), int(n*0.99))
log.debug("no match -> popping element %s", i)
data.pop(i)
def add(self, ts, value):
while len(self.data) >= self.limit:
r = random.randrange(0, self.limit)
self.data.pop(r)
self.shrink()
if len(self.data) == 0 or ts >= self.data[-1][0]:
self.data.append((ts, value,))