Make expiry exponential
Choosing the element to expire from a uniform random distribution tends to expire data points much too early. We want to always keep the oldest observation and have a roughly exponential distribution between the newest and the oldest observation.
This commit is contained in:
parent
a802f2ee27
commit
b10b62e77d
|
@ -4,9 +4,13 @@ import fcntl
|
||||||
import glob
|
import glob
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
log = logging.getLogger()
|
||||||
|
|
||||||
class LTS:
|
class LTS:
|
||||||
base_dir = "data"
|
base_dir = "data"
|
||||||
queue_dir = "queue"
|
queue_dir = "queue"
|
||||||
|
@ -39,10 +43,38 @@ class LTS:
|
||||||
json.dump({"description": self.description, "data": self.data}, fh)
|
json.dump({"description": self.description, "data": self.data}, fh)
|
||||||
self.rebuild_index()
|
self.rebuild_index()
|
||||||
|
|
||||||
|
def shrink(self):
|
||||||
|
# Remove one element in such a way that the distributions gets closer
|
||||||
|
# to an exponential curve through the first and the last few data
|
||||||
|
# points.
|
||||||
|
# To do this we compute the ideal t value at each point and compare it
|
||||||
|
# to the real value. We remove the first point which sticks out too
|
||||||
|
# much (I'm tempted to dub this the barber's algorithm).
|
||||||
|
# This extremely inefficient but it's simple to understand and works.
|
||||||
|
data = self.data
|
||||||
|
n = len(data)
|
||||||
|
t_last = data[-1][0]
|
||||||
|
dt = (t_last - data[-5][0]) / 4
|
||||||
|
k = math.log((t_last - data[0][0]) / dt / n + 1)
|
||||||
|
for i in range(1, n):
|
||||||
|
t_ideal = (math.exp(k * (n - i)/n) - 1) * (n * dt)
|
||||||
|
if t_last - data[i][0] > t_ideal:
|
||||||
|
log.debug("%s - %s > %s -> popping element %s", t_last, data[i][0], t_ideal, i)
|
||||||
|
data.pop(i)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Well, it works mostly. Sometimes all the real points are below
|
||||||
|
# the curve but we have to remove one anyway. This needs to be
|
||||||
|
# heavily biased towards newer data points, but we don't want to
|
||||||
|
# delete the few newest data points so choose one at random from a
|
||||||
|
# narrow range just before that.
|
||||||
|
i = random.randrange(int(n*0.98), int(n*0.99))
|
||||||
|
log.debug("no match -> popping element %s", i)
|
||||||
|
data.pop(i)
|
||||||
|
|
||||||
def add(self, ts, value):
|
def add(self, ts, value):
|
||||||
while len(self.data) >= self.limit:
|
while len(self.data) >= self.limit:
|
||||||
r = random.randrange(0, self.limit)
|
self.shrink()
|
||||||
self.data.pop(r)
|
|
||||||
|
|
||||||
if len(self.data) == 0 or ts >= self.data[-1][0]:
|
if len(self.data) == 0 or ts >= self.data[-1][0]:
|
||||||
self.data.append((ts, value,))
|
self.data.append((ts, value,))
|
||||||
|
|
Loading…
Reference in New Issue