ltsdb/ltsdb_json.py

91 lines
3.0 KiB
Python
Raw Normal View History

2022-08-21 11:58:31 +02:00
#!/usr/bin/python3
import fcntl
import glob
import hashlib
import json
2022-09-02 14:06:47 +02:00
import random
2022-08-21 11:58:31 +02:00
import time
class LTS:
base_dir = "data"
limit = 1000
def __init__(self, description):
# Oh, I think we need to be able to load by hash, too
canonical_description = {x: description[x] for x in sorted(description.keys())}
self.description = canonical_description
serialized_description = json.dumps(canonical_description)
m = hashlib.sha256()
m.update(bytes(serialized_description, encoding="UTF-8"))
self.filename = self.base_dir + "/" + m.hexdigest()
try:
with open(self.filename, "r") as fh:
fcntl.flock(fh, fcntl.LOCK_SH)
d = json.load(fh)
self.new = False
self.data = d["data"]
except FileNotFoundError as e:
self.new = True
self.data = []
# Create the file immediately. Makes saving later simpler if we can
# assume it exists
with open(self.filename, "x+") as fh:
fcntl.flock(fh, fcntl.LOCK_EX)
json.dump({"description": self.description, "data": self.data}, fh)
self.rebuild_index()
def add(self, ts, value):
while len(self.data) >= self.limit:
r = random.randrange(0, self.limit)
self.data.pop(r)
if len(self.data) == 0 or ts >= self.data[-1][0]:
self.data.append((ts, value,))
else:
# Shouldn't happen that often, so I do a simple linear search instead
# of a binary search
for i in range(len(self.data)):
if self.data[i][0] >= ts:
break
self.data.insert(i, (ts, value,))
def save(self):
with open(self.filename, "r+") as fh:
fcntl.flock(fh, fcntl.LOCK_EX)
json.dump({"description": self.description, "data": self.data}, fh)
fh.truncate()
def rebuild_index(self):
t0 = time.time()
index = {}
for fn in glob.glob(self.base_dir + "/*"):
(_, _, hash) = fn.rpartition("/")
with open(fn, "r") as fh:
fcntl.flock(fh, fcntl.LOCK_SH)
d = json.load(fh)
for k, v in d["description"].items():
d1 = index.setdefault(k, {})
d2 = d1.setdefault(v, [])
d2.append(hash)
with open(self.base_dir + "/.index", "r+") as fh:
fcntl.flock(fh, fcntl.LOCK_EX)
json.dump(index, fh)
t1 = time.time()
print("index rebuilt in", t1 - t0, "seconds")
@classmethod
2022-09-02 14:06:47 +02:00
def find(cls, match):
result = None
with open(cls.base_dir + "/.index", "r") as fh:
2022-08-21 11:58:31 +02:00
fcntl.flock(fh, fcntl.LOCK_SH)
2022-09-02 14:06:47 +02:00
index = json.load(fh)
for d, v in match.items():
ts = set(index[d][v])
if result is None:
result = ts
else:
result &= ts
return result
2022-08-21 11:58:31 +02:00