From f17f07537e5d1680dc505894cc4ce197ebdb31cb Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Sat, 29 Jan 2022 11:56:10 +0100 Subject: [PATCH] Implement PoC The prototype can create tables and their columns and insert data, including data from existing tables. This is sufficient for a small demo from a real-world project. The demo uses all of the above capabilities. It also deliberately includes rules in arbitrary order to demonstrate dependency resolution. We don't have a DSL yet, so all the nodes/rules/requirements (need to decide on a catchy name) are instantiated directly in the source code by calling their constructors. This is overly verbose but gives a first impression on what a ruleset might look like. --- Notes.md | 17 ++++ README.md | 2 + demo_usradm_templates | 218 +++++++++++++++++++++++++++++++++++++++++ procrusql.py | 220 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 457 insertions(+) create mode 100755 demo_usradm_templates create mode 100644 procrusql.py diff --git a/Notes.md b/Notes.md index 13704b0..e16a96a 100644 --- a/Notes.md +++ b/Notes.md @@ -29,11 +29,28 @@ Syntax should be diff-friendly All changes should be done in a single transaction. +It should be possible to use multiple database connections (for example, +installing functions in an untrusted language requires a privileged +user). Obviously, if we have several connections, we can't have a single +transaction. + ## Non-Goals Reverting changes (instead apply the old config again). +Performance. I expect the typical configuration to be small (dozens of +tables and hundreds of records, not hundreds of tables and millions of +records) and the use cases to be infrequent (a few deployments per day, +not many per second). + ## Problems Is the order of operations always the same or is it sometimes necessary to do things in a different order depending on the current state? + +## Random ideas + +I think we should have empty nodes which are just used to group dependencies, +e.g. something like `table_x_complete` which depends on `table_x` and all its +columns, contraints, indexes, data, etc. so that other nodes can just depend on +`table_x_complete` and don't have to bother about the details. diff --git a/README.md b/README.md index e69de29..c77fe12 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,2 @@ +ProcruSQL +========= diff --git a/demo_usradm_templates b/demo_usradm_templates new file mode 100755 index 0000000..e754470 --- /dev/null +++ b/demo_usradm_templates @@ -0,0 +1,218 @@ +#!/usr/bin/python3 +import logging +import psycopg2 + +from procrusql import HaveTable, HaveColumn, HaveData, Ref, fit + +logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s %(lineno)d | %(message)s", level=logging.DEBUG) + +want = [ + HaveTable("table_service", depends=[], + table="service" + ), + HaveColumn("column_service_id", depends=["table_service"], + table="service", + column="id", + definition="serial primary key", + ), + HaveColumn("column_service_type", depends=["table_service"], + table="service", + column="type", + definition="text", + ), + HaveColumn("column_service_feature", depends=["table_service"], + table="service", + column="feature", + definition="text", + ), + HaveColumn("column_service_description", depends=["table_service"], + table="service", + column="description", + definition="text", + ), + HaveColumn("column_service_hidden", depends=["table_service"], + table="service", + column="hidden", + definition="boolean", + ), + + HaveData("data_login_unix", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "login", "feature": "unix"}, + extra={"description": "Can login on Unix systems", "hidden": False}, + ), + HaveData("data_login_wds", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "login", "feature": "wds"}, + extra={"description": "Can login into WDS", "hidden": False}, + ), + HaveData("data_login_wwa", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "login", "feature": "wwa"}, + extra={"description": "Can login into WWA", "hidden": False}, + ), + HaveData("data_group_wifo", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "group", "feature": "wifo"}, + extra={"hidden": False}, + ), + HaveData("data_group_wifo_intern", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "group", "feature": "wifo-intern"}, + extra={"hidden": False}, + ), + HaveData("data_group_wsr", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "group", "feature": "wsr"}, + extra={"hidden": False}, + ), + HaveData("data_mailinglist2_wifo_aktuell", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "mailinglist2", "feature": "wifo-aktuell"}, + extra={"hidden": False}, + ), + HaveData("data_org_wifo", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "org", "feature": "WIFO"}, + extra={"hidden": False}, + ), + HaveData("data_org_wsr", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "org", "feature": "WSR"}, + extra={"hidden": False}, + ), + HaveData("data_publ_wifo_intern", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "publ", "feature": "wifo_intern"}, + extra={"hidden": False}, + ), + HaveData("data_publ_wifo_temporary", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "publ", "feature": "wifo_temporary"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_administrative_staff_member", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "administrative_staff_member"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_Xassociate", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "Xassociate"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_economist", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "economist"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_emeritus_consultant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "emeritus_consultant"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_research_assistant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "research_assistant"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_scientific_administration_staff_member", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": + "scientific_administration_staff_member"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_scientific_administrative_assistant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "scientific_administrative_assistant"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_scientific_consultant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "scientific_consultant"}, + extra={"hidden": False}, + ), + HaveData("data_publ_rolle_senior_economist", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"], + table="service", + key={"type": "rolle", "feature": "senior_economist"}, + extra={"hidden": False}, + ), + + HaveData("data_template_std", depends=["column_template_name", "column_template_sortorder"], + table="template", + key={"name": "Standard Vorlage"}, + extra={"sortorder": 1}, + ), + HaveData("data_template_wifo_wa", depends=["column_template_name", "column_template_sortorder", "column_template_email_pattern", "column_template_email_after", "column_template_funktion", "column_template_status"], + table="template", + key={"name": "WIFO Wissenschaftliche Assistenz"}, + extra={"sortorder": 10, "email_pattern": "vorname#.#nachname", "email_after": "wifo.ac.at", "funktion": "FB (1-5)", "status": "FU, FM"}, + ), + + HaveColumn("column_template_name", depends=["table_template"], + table="template", + column="name", + definition="text not null unique", + ), + HaveTable("table_template", depends=[], + table="template" + ), + HaveColumn("column_template_sortorder", depends=["table_template"], + table="template", + column="sortorder", + definition="int", + ), + HaveColumn("column_template_email_pattern", depends=["table_template"], + table="template", + column="email_pattern", + definition="text", + ), + HaveColumn("column_template_email_after", depends=["table_template"], + table="template", + column="email_after", + definition="text", + ), + HaveColumn("column_template_funktion", depends=["table_template"], + table="template", + column="funktion", + definition="text", + ), + HaveColumn("column_template_status", depends=["table_template"], + table="template", + column="status", + definition="text", + ), + + HaveData("data_template_std_login_wwa", depends=["data_template_std", "data_login_wwa", "column_service_id", "column_template_id", "column_template_service_template", "column_template_service_service"], + table="template_service", + key={"template": Ref("data_template_std", 0, "id"), "service": Ref("data_login_wwa", 0, "id")}, + extra={} + ), + + HaveColumn("column_template_id", depends=["table_template"], + table="template", + column="id", + definition="serial primary key", + ), + + HaveColumn("column_template_service_template", depends=["table_template_service"], + table="template_service", + column="template", + definition="int references template", + ), + + HaveTable("table_template_service", depends=[], + table="template_service", + ), + HaveColumn("column_template_service_service", depends=["table_template_service"], + table="template_service", + column="service", + definition="int references service", + ), +] + +db = psycopg2.connect(dbname="procrusql_test") +fit(db, want) + + +# vim: tw=0 diff --git a/procrusql.py b/procrusql.py new file mode 100644 index 0000000..8fe32c6 --- /dev/null +++ b/procrusql.py @@ -0,0 +1,220 @@ +import logging + +import psycopg2 +from psycopg2 import sql +from psycopg2 import extras + +log = logging.getLogger(__name__) +log_action = log.getChild("action") +log_check = log.getChild("check") +log_state = log.getChild("state") + +class Node: + def __init__(self, name, depends): + self.name = name + self.depends = depends + self.ok = False + self.ready = False + + def __repr__(self): + return f"{type(self)}({self.name}{' ready' if self.ready else ''}{' ok' if self.ok else ''})" + + def is_ready(self): + # XXX - Naive O(n²) algorithm + if self.ready: + return True + for d in self.depends: + found = False + for w in want: + if w.name == d: + if not w.ok: + log_state.info("%s depends on %s which is not yet ok", self.name, d) + return False + found = True + break + if not found: + raise RuntimeError(f"Dependency {d} of {self.name} doesn't exist") + log_state.info("%s is now ready", self.name) + self.ready = True + return True + +class HaveData(Node): + def __init__(self, name, depends, table, key, extra): + super().__init__(name, depends) + self.table = table + self.key = key + self.extra = extra + + def check(self): + log_check.info("Checking %s", self.name) + csr = db.cursor(cursor_factory=extras.DictCursor) + key_checks = [ + sql.SQL(" = ").join([ sql.Identifier(x), sql.Placeholder() ]) + for x in self.key.keys() + ] + key_check = sql.SQL(" and ").join(key_checks) + q = sql.SQL( + "select * from {table} where {key_check}" + ).format( + table=sql.Identifier(self.table), + key_check=key_check + ) + key_values = [v.resolve() if isinstance(v, Ref) else v for v in self.key.values()] + csr.execute(q, key_values) + self.result = csr.fetchall() + log_check.info("Got %d rows", len(self.result)) + if self.result: + self.ok = True + log_state.info("%s is now ok", self.name) + return + + extra_values = [v.resolve() if isinstance(v, Ref) else v for v in self.extra.values()] + columns = list(self.key.keys()) + list(self.extra.keys()) + values = key_values + extra_values + q = sql.SQL( + "insert into {table}({columns}) values({placeholders}) returning *" + ).format( + table=sql.Identifier(self.table), + columns=sql.SQL(", ").join([sql.Identifier(x) for x in columns]), + placeholders=sql.SQL(", ").join([sql.Placeholder() for x in columns]), + ) + log_action.info("Inserting data") + csr.execute(q, values) + self.result = csr.fetchall() + log_action.info("Got %d rows", len(self.result)) + if self.result: + self.ok = True + log_state.info("%s is now ok", self.name) + return + # We shouldn't get here. Either the insert succeeded, or it raised an + # exception. Success with 0 rows should not happen. + raise RuntimeError("Unreachable code reached") + +class HaveTable(Node): + + def __init__(self, name, depends, table, schema="public"): + super().__init__(name, depends) + self.table = table + self.schema = "public" + + def check(self): + log_check.info("Checking %s", self.name) + csr = db.cursor(cursor_factory=extras.DictCursor) + csr.execute( + """ + select * from information_schema.tables + where table_schema = %s and table_name = %s + """, + (self.schema, self.table,)) + r = csr.fetchall() + if len(r) == 1: + # Table exists, all ok + self.ok = True + log_state.info("%s is now ok", self.name) + return + if len(r) > 1: + raise RuntimeError(f"Found {len(r)} tables with schema {self.schema} and name {self.table}") + + # Create table + # (Yes, we can actually create a table with 0 columns) + log_action.info("Creating table %s.%s", self.schema, self.table) + q = sql.SQL("create table {schema}.{table}()").format( + schema=sql.Identifier(self.schema), + table=sql.Identifier(self.table) + ) + csr.execute(q) + self.ok = True + log_state.info("%s is now ok", self.name) + + pass + +class HaveColumn(Node): + # hjp=> alter table service add id serial primary key; + # ALTER TABLE + # hjp=> alter table service add type text; + # ALTER TABLE + # ... + def __init__(self, name, depends, table, column, definition, schema="public"): + super().__init__(name, depends) + self.table = table + self.column = column + self.definition = definition + self.schema = "public" + + def check(self): + log_check.info("Checking %s", self.name) + csr = db.cursor(cursor_factory=extras.DictCursor) + # For now just check if column exists. Checking the type etc. will be implemented later + csr.execute( + """ + select * from information_schema.columns + where table_schema = %s and table_name = %s and column_name = %s + """, + (self.schema, self.table, self.column, )) + r = csr.fetchall() + if len(r) == 1: + # Column exists, all ok + self.ok = True + log_state.info("%s is now ok", self.name) + return + if len(r) > 1: + raise RuntimeError(f"Found {len(r)} columns with nam {self.columnr} in {self.schema}.{self.table}") + + # Create column + log_action.info("Adding column %s to table %s.%s", self.column, self.schema, self.table) + q = sql.SQL("alter table {schema}.{table} add {column} {definition}").format( + schema=sql.Identifier(self.schema), + table=sql.Identifier(self.table), + column=sql.Identifier(self.column), + definition=sql.SQL(self.definition), + ) + csr.execute(q) + self.ok = True + log_state.info("%s is now ok", self.name) + +class HaveUniqueConstraint(Node): + # hjp=> alter table service add unique (type, feature); + # ALTER TABLE + pass + +def findnode(name): + for w in want: + if w.name == name: + return w + +class Ref: + def __init__(self, datanode, row, column): + self.datanode = datanode + self.row = row + self.column = column + + def resolve(self): + datanode = findnode(self.datanode) + if not datanode.ok: + # XXX - We might try to resolve this, but for now the user is responsible to declare the dependency explicitely + raise RuntimeError(f"Cannot get data from {datanode.name} which is not yet ok. Please add a dependency") + return datanode.result[self.row][self.column] + + +def fit(_db, _want): + global db, want + db = _db + want = _want + while True: + progress = False + not_ok = 0 + for w in want: + if not w.ok: + if w.is_ready(): + w.check() + progress = True + else: + not_ok += 1 + if not_ok == 0: + break + if not progress: + raise RuntimeError(f"Didn't make any progress in this round, but {not_ok} requirements are still not ok") + log_state.info("%d requirements are not yet ok", not_ok) + + db.commit() + log_state.info("Done")