ltsdb/ltsdb_json.py

#!/usr/bin/python3

import fcntl
import glob
import hashlib
import json
import random
import time

class LTS:
    base_dir = "data"
    limit = 1000

    def __init__(self, description=None, id=None):
        if description:
            canonical_description = {x: description[x] for x in sorted(description.keys())}
            self.description = canonical_description
            serialized_description = json.dumps(canonical_description)
            m = hashlib.sha256()
            m.update(bytes(serialized_description, encoding="UTF-8"))
            id = m.hexdigest()
        self.filename = self.base_dir + "/" + id
        try:
            with open(self.filename, "r") as fh:
                fcntl.flock(fh, fcntl.LOCK_SH)
                d = json.load(fh)
            self.new = False
            self.description = d["description"]
            self.data = d["data"]
        except FileNotFoundError as e:
            self.new = True
            self.data = []
            # Create the file immediately. Makes saving later simpler if we can
            # assume it exists
            with open(self.filename, "x+") as fh:
                fcntl.flock(fh, fcntl.LOCK_EX)
                json.dump({"description": self.description, "data": self.data}, fh)
            self.rebuild_index()

    def add(self, ts, value):
        while len(self.data) >= self.limit:
            r = random.randrange(0, self.limit)
            self.data.pop(r)

        if len(self.data) == 0 or ts >= self.data[-1][0]:
            self.data.append((ts, value,))
        else:
            # Shouldn't happen that often, so I do a simple linear search instead
            # of a binary search
            for i in range(len(self.data)):
                if self.data[i][0] >= ts:
                    break
            self.data.insert(i, (ts, value,))

    def save(self):
        with open(self.filename, "r+") as fh:
            fcntl.flock(fh, fcntl.LOCK_EX)
            json.dump({"description": self.description, "data": self.data}, fh)
            fh.truncate()

    def rebuild_index(self):
        t0 = time.time()
        index  = {}
        for fn in glob.glob(self.base_dir + "/*"):
            (_, _, hash) = fn.rpartition("/")
            with open(fn, "r") as fh:
                fcntl.flock(fh, fcntl.LOCK_SH)
                d = json.load(fh)
            for k, v in d["description"].items():
                d1 = index.setdefault(k, {})
                d2 = d1.setdefault(v, [])
                d2.append(hash)
        with open(self.base_dir + "/.index", "r+") as fh:
            fcntl.flock(fh, fcntl.LOCK_EX)
            json.dump(index, fh)
        t1 = time.time()
        print("index rebuilt in", t1 - t0, "seconds")

    @classmethod
    def find(cls, match):
        result = None
        with open(cls.base_dir + "/.index", "r") as fh:
            fcntl.flock(fh, fcntl.LOCK_SH)
            index = json.load(fh)
            for d, v in match.items():
                    ts = set(index[d][v])
                    if result is None:
                        result = ts
                    else:
                        result &= ts
        return result

    def data_json_by_row(self):
        d = []
        for dp in self.data:
            d.append({
                "t": dp[0],
                "v": dp[1],
                "utc": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(dp[0])),
            })
        return json.dumps(d)