Make expiry exponential
Choosing the element to expire from a uniform random distribution tends to expire data points much too early. We want to always keep the oldest observation and have a roughly exponential distribution between the newest and the oldest observation.
This commit is contained in:
parent
a802f2ee27
commit
b10b62e77d
|
@ -4,9 +4,13 @@ import fcntl
|
|||
import glob
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import random
|
||||
import time
|
||||
|
||||
log = logging.getLogger()
|
||||
|
||||
class LTS:
|
||||
base_dir = "data"
|
||||
queue_dir = "queue"
|
||||
|
@ -39,10 +43,38 @@ class LTS:
|
|||
json.dump({"description": self.description, "data": self.data}, fh)
|
||||
self.rebuild_index()
|
||||
|
||||
def shrink(self):
|
||||
# Remove one element in such a way that the distributions gets closer
|
||||
# to an exponential curve through the first and the last few data
|
||||
# points.
|
||||
# To do this we compute the ideal t value at each point and compare it
|
||||
# to the real value. We remove the first point which sticks out too
|
||||
# much (I'm tempted to dub this the barber's algorithm).
|
||||
# This extremely inefficient but it's simple to understand and works.
|
||||
data = self.data
|
||||
n = len(data)
|
||||
t_last = data[-1][0]
|
||||
dt = (t_last - data[-5][0]) / 4
|
||||
k = math.log((t_last - data[0][0]) / dt / n + 1)
|
||||
for i in range(1, n):
|
||||
t_ideal = (math.exp(k * (n - i)/n) - 1) * (n * dt)
|
||||
if t_last - data[i][0] > t_ideal:
|
||||
log.debug("%s - %s > %s -> popping element %s", t_last, data[i][0], t_ideal, i)
|
||||
data.pop(i)
|
||||
break
|
||||
else:
|
||||
# Well, it works mostly. Sometimes all the real points are below
|
||||
# the curve but we have to remove one anyway. This needs to be
|
||||
# heavily biased towards newer data points, but we don't want to
|
||||
# delete the few newest data points so choose one at random from a
|
||||
# narrow range just before that.
|
||||
i = random.randrange(int(n*0.98), int(n*0.99))
|
||||
log.debug("no match -> popping element %s", i)
|
||||
data.pop(i)
|
||||
|
||||
def add(self, ts, value):
|
||||
while len(self.data) >= self.limit:
|
||||
r = random.randrange(0, self.limit)
|
||||
self.data.pop(r)
|
||||
self.shrink()
|
||||
|
||||
if len(self.data) == 0 or ts >= self.data[-1][0]:
|
||||
self.data.append((ts, value,))
|
||||
|
|
Loading…
Reference in New Issue