#!/usr/bin/python3 import fcntl import glob import hashlib import json import random import time class LTS: base_dir = "data" limit = 1000 def __init__(self, description=None, id=None): if description: canonical_description = {x: description[x] for x in sorted(description.keys())} self.description = canonical_description serialized_description = json.dumps(canonical_description) m = hashlib.sha256() m.update(bytes(serialized_description, encoding="UTF-8")) id = m.hexdigest() self.filename = self.base_dir + "/" + id try: with open(self.filename, "r") as fh: fcntl.flock(fh, fcntl.LOCK_SH) d = json.load(fh) self.new = False self.description = d["description"] self.data = d["data"] except FileNotFoundError as e: self.new = True self.data = [] # Create the file immediately. Makes saving later simpler if we can # assume it exists with open(self.filename, "x+") as fh: fcntl.flock(fh, fcntl.LOCK_EX) json.dump({"description": self.description, "data": self.data}, fh) self.rebuild_index() def add(self, ts, value): while len(self.data) >= self.limit: r = random.randrange(0, self.limit) self.data.pop(r) if len(self.data) == 0 or ts >= self.data[-1][0]: self.data.append((ts, value,)) else: # Shouldn't happen that often, so I do a simple linear search instead # of a binary search for i in range(len(self.data)): if self.data[i][0] >= ts: break self.data.insert(i, (ts, value,)) def save(self): with open(self.filename, "r+") as fh: fcntl.flock(fh, fcntl.LOCK_EX) json.dump({"description": self.description, "data": self.data}, fh) fh.truncate() def rebuild_index(self): t0 = time.time() index = {} for fn in glob.glob(self.base_dir + "/*"): (_, _, hash) = fn.rpartition("/") with open(fn, "r") as fh: fcntl.flock(fh, fcntl.LOCK_SH) d = json.load(fh) for k, v in d["description"].items(): d1 = index.setdefault(k, {}) d2 = d1.setdefault(v, []) d2.append(hash) with open(self.base_dir + "/.index", "r+") as fh: fcntl.flock(fh, fcntl.LOCK_EX) json.dump(index, fh) t1 = time.time() print("index rebuilt in", t1 - t0, "seconds") @classmethod def find(cls, match): result = None with open(cls.base_dir + "/.index", "r") as fh: fcntl.flock(fh, fcntl.LOCK_SH) index = json.load(fh) for d, v in match.items(): ts = set(index[d][v]) if result is None: result = ts else: result &= ts return result