Implement PoC

The prototype can create tables and their columns and insert data,
including data from existing tables. This is sufficient for a small demo
from a real-world project.

The demo uses all of the above capabilities. It also deliberately
includes rules in arbitrary order to demonstrate dependency resolution.

We don't have a DSL yet, so all the nodes/rules/requirements (need to
decide on a catchy name) are instantiated directly in the source code
by calling their constructors. This is overly verbose but gives a first
impression on what a ruleset might look like.
This commit is contained in:
Peter J. Holzer 2022-01-29 11:56:10 +01:00 committed by Peter J. Holzer
parent 95985203f6
commit f17f07537e
4 changed files with 457 additions and 0 deletions

View File

@ -29,11 +29,28 @@ Syntax should be diff-friendly
All changes should be done in a single transaction. All changes should be done in a single transaction.
It should be possible to use multiple database connections (for example,
installing functions in an untrusted language requires a privileged
user). Obviously, if we have several connections, we can't have a single
transaction.
## Non-Goals ## Non-Goals
Reverting changes (instead apply the old config again). Reverting changes (instead apply the old config again).
Performance. I expect the typical configuration to be small (dozens of
tables and hundreds of records, not hundreds of tables and millions of
records) and the use cases to be infrequent (a few deployments per day,
not many per second).
## Problems ## Problems
Is the order of operations always the same or is it sometimes necessary Is the order of operations always the same or is it sometimes necessary
to do things in a different order depending on the current state? to do things in a different order depending on the current state?
## Random ideas
I think we should have empty nodes which are just used to group dependencies,
e.g. something like `table_x_complete` which depends on `table_x` and all its
columns, contraints, indexes, data, etc. so that other nodes can just depend on
`table_x_complete` and don't have to bother about the details.

View File

@ -0,0 +1,2 @@
ProcruSQL
=========

218
demo_usradm_templates Executable file
View File

@ -0,0 +1,218 @@
#!/usr/bin/python3
import logging
import psycopg2
from procrusql import HaveTable, HaveColumn, HaveData, Ref, fit
logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s %(lineno)d | %(message)s", level=logging.DEBUG)
want = [
HaveTable("table_service", depends=[],
table="service"
),
HaveColumn("column_service_id", depends=["table_service"],
table="service",
column="id",
definition="serial primary key",
),
HaveColumn("column_service_type", depends=["table_service"],
table="service",
column="type",
definition="text",
),
HaveColumn("column_service_feature", depends=["table_service"],
table="service",
column="feature",
definition="text",
),
HaveColumn("column_service_description", depends=["table_service"],
table="service",
column="description",
definition="text",
),
HaveColumn("column_service_hidden", depends=["table_service"],
table="service",
column="hidden",
definition="boolean",
),
HaveData("data_login_unix", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "login", "feature": "unix"},
extra={"description": "Can login on Unix systems", "hidden": False},
),
HaveData("data_login_wds", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "login", "feature": "wds"},
extra={"description": "Can login into WDS", "hidden": False},
),
HaveData("data_login_wwa", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "login", "feature": "wwa"},
extra={"description": "Can login into WWA", "hidden": False},
),
HaveData("data_group_wifo", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "group", "feature": "wifo"},
extra={"hidden": False},
),
HaveData("data_group_wifo_intern", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "group", "feature": "wifo-intern"},
extra={"hidden": False},
),
HaveData("data_group_wsr", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "group", "feature": "wsr"},
extra={"hidden": False},
),
HaveData("data_mailinglist2_wifo_aktuell", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "mailinglist2", "feature": "wifo-aktuell"},
extra={"hidden": False},
),
HaveData("data_org_wifo", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "org", "feature": "WIFO"},
extra={"hidden": False},
),
HaveData("data_org_wsr", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "org", "feature": "WSR"},
extra={"hidden": False},
),
HaveData("data_publ_wifo_intern", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "publ", "feature": "wifo_intern"},
extra={"hidden": False},
),
HaveData("data_publ_wifo_temporary", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "publ", "feature": "wifo_temporary"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_administrative_staff_member", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "administrative_staff_member"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_Xassociate", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "Xassociate"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_economist", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "economist"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_emeritus_consultant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "emeritus_consultant"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_research_assistant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "research_assistant"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_scientific_administration_staff_member", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature":
"scientific_administration_staff_member"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_scientific_administrative_assistant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "scientific_administrative_assistant"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_scientific_consultant", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "scientific_consultant"},
extra={"hidden": False},
),
HaveData("data_publ_rolle_senior_economist", depends=["column_service_id", "column_service_type", "column_service_feature", "column_service_description", "column_service_hidden"],
table="service",
key={"type": "rolle", "feature": "senior_economist"},
extra={"hidden": False},
),
HaveData("data_template_std", depends=["column_template_name", "column_template_sortorder"],
table="template",
key={"name": "Standard Vorlage"},
extra={"sortorder": 1},
),
HaveData("data_template_wifo_wa", depends=["column_template_name", "column_template_sortorder", "column_template_email_pattern", "column_template_email_after", "column_template_funktion", "column_template_status"],
table="template",
key={"name": "WIFO Wissenschaftliche Assistenz"},
extra={"sortorder": 10, "email_pattern": "vorname#.#nachname", "email_after": "wifo.ac.at", "funktion": "FB (1-5)", "status": "FU, FM"},
),
HaveColumn("column_template_name", depends=["table_template"],
table="template",
column="name",
definition="text not null unique",
),
HaveTable("table_template", depends=[],
table="template"
),
HaveColumn("column_template_sortorder", depends=["table_template"],
table="template",
column="sortorder",
definition="int",
),
HaveColumn("column_template_email_pattern", depends=["table_template"],
table="template",
column="email_pattern",
definition="text",
),
HaveColumn("column_template_email_after", depends=["table_template"],
table="template",
column="email_after",
definition="text",
),
HaveColumn("column_template_funktion", depends=["table_template"],
table="template",
column="funktion",
definition="text",
),
HaveColumn("column_template_status", depends=["table_template"],
table="template",
column="status",
definition="text",
),
HaveData("data_template_std_login_wwa", depends=["data_template_std", "data_login_wwa", "column_service_id", "column_template_id", "column_template_service_template", "column_template_service_service"],
table="template_service",
key={"template": Ref("data_template_std", 0, "id"), "service": Ref("data_login_wwa", 0, "id")},
extra={}
),
HaveColumn("column_template_id", depends=["table_template"],
table="template",
column="id",
definition="serial primary key",
),
HaveColumn("column_template_service_template", depends=["table_template_service"],
table="template_service",
column="template",
definition="int references template",
),
HaveTable("table_template_service", depends=[],
table="template_service",
),
HaveColumn("column_template_service_service", depends=["table_template_service"],
table="template_service",
column="service",
definition="int references service",
),
]
db = psycopg2.connect(dbname="procrusql_test")
fit(db, want)
# vim: tw=0

220
procrusql.py Normal file
View File

@ -0,0 +1,220 @@
import logging
import psycopg2
from psycopg2 import sql
from psycopg2 import extras
log = logging.getLogger(__name__)
log_action = log.getChild("action")
log_check = log.getChild("check")
log_state = log.getChild("state")
class Node:
def __init__(self, name, depends):
self.name = name
self.depends = depends
self.ok = False
self.ready = False
def __repr__(self):
return f"{type(self)}({self.name}{' ready' if self.ready else ''}{' ok' if self.ok else ''})"
def is_ready(self):
# XXX - Naive O(n²) algorithm
if self.ready:
return True
for d in self.depends:
found = False
for w in want:
if w.name == d:
if not w.ok:
log_state.info("%s depends on %s which is not yet ok", self.name, d)
return False
found = True
break
if not found:
raise RuntimeError(f"Dependency {d} of {self.name} doesn't exist")
log_state.info("%s is now ready", self.name)
self.ready = True
return True
class HaveData(Node):
def __init__(self, name, depends, table, key, extra):
super().__init__(name, depends)
self.table = table
self.key = key
self.extra = extra
def check(self):
log_check.info("Checking %s", self.name)
csr = db.cursor(cursor_factory=extras.DictCursor)
key_checks = [
sql.SQL(" = ").join([ sql.Identifier(x), sql.Placeholder() ])
for x in self.key.keys()
]
key_check = sql.SQL(" and ").join(key_checks)
q = sql.SQL(
"select * from {table} where {key_check}"
).format(
table=sql.Identifier(self.table),
key_check=key_check
)
key_values = [v.resolve() if isinstance(v, Ref) else v for v in self.key.values()]
csr.execute(q, key_values)
self.result = csr.fetchall()
log_check.info("Got %d rows", len(self.result))
if self.result:
self.ok = True
log_state.info("%s is now ok", self.name)
return
extra_values = [v.resolve() if isinstance(v, Ref) else v for v in self.extra.values()]
columns = list(self.key.keys()) + list(self.extra.keys())
values = key_values + extra_values
q = sql.SQL(
"insert into {table}({columns}) values({placeholders}) returning *"
).format(
table=sql.Identifier(self.table),
columns=sql.SQL(", ").join([sql.Identifier(x) for x in columns]),
placeholders=sql.SQL(", ").join([sql.Placeholder() for x in columns]),
)
log_action.info("Inserting data")
csr.execute(q, values)
self.result = csr.fetchall()
log_action.info("Got %d rows", len(self.result))
if self.result:
self.ok = True
log_state.info("%s is now ok", self.name)
return
# We shouldn't get here. Either the insert succeeded, or it raised an
# exception. Success with 0 rows should not happen.
raise RuntimeError("Unreachable code reached")
class HaveTable(Node):
def __init__(self, name, depends, table, schema="public"):
super().__init__(name, depends)
self.table = table
self.schema = "public"
def check(self):
log_check.info("Checking %s", self.name)
csr = db.cursor(cursor_factory=extras.DictCursor)
csr.execute(
"""
select * from information_schema.tables
where table_schema = %s and table_name = %s
""",
(self.schema, self.table,))
r = csr.fetchall()
if len(r) == 1:
# Table exists, all ok
self.ok = True
log_state.info("%s is now ok", self.name)
return
if len(r) > 1:
raise RuntimeError(f"Found {len(r)} tables with schema {self.schema} and name {self.table}")
# Create table
# (Yes, we can actually create a table with 0 columns)
log_action.info("Creating table %s.%s", self.schema, self.table)
q = sql.SQL("create table {schema}.{table}()").format(
schema=sql.Identifier(self.schema),
table=sql.Identifier(self.table)
)
csr.execute(q)
self.ok = True
log_state.info("%s is now ok", self.name)
pass
class HaveColumn(Node):
# hjp=> alter table service add id serial primary key;
# ALTER TABLE
# hjp=> alter table service add type text;
# ALTER TABLE
# ...
def __init__(self, name, depends, table, column, definition, schema="public"):
super().__init__(name, depends)
self.table = table
self.column = column
self.definition = definition
self.schema = "public"
def check(self):
log_check.info("Checking %s", self.name)
csr = db.cursor(cursor_factory=extras.DictCursor)
# For now just check if column exists. Checking the type etc. will be implemented later
csr.execute(
"""
select * from information_schema.columns
where table_schema = %s and table_name = %s and column_name = %s
""",
(self.schema, self.table, self.column, ))
r = csr.fetchall()
if len(r) == 1:
# Column exists, all ok
self.ok = True
log_state.info("%s is now ok", self.name)
return
if len(r) > 1:
raise RuntimeError(f"Found {len(r)} columns with nam {self.columnr} in {self.schema}.{self.table}")
# Create column
log_action.info("Adding column %s to table %s.%s", self.column, self.schema, self.table)
q = sql.SQL("alter table {schema}.{table} add {column} {definition}").format(
schema=sql.Identifier(self.schema),
table=sql.Identifier(self.table),
column=sql.Identifier(self.column),
definition=sql.SQL(self.definition),
)
csr.execute(q)
self.ok = True
log_state.info("%s is now ok", self.name)
class HaveUniqueConstraint(Node):
# hjp=> alter table service add unique (type, feature);
# ALTER TABLE
pass
def findnode(name):
for w in want:
if w.name == name:
return w
class Ref:
def __init__(self, datanode, row, column):
self.datanode = datanode
self.row = row
self.column = column
def resolve(self):
datanode = findnode(self.datanode)
if not datanode.ok:
# XXX - We might try to resolve this, but for now the user is responsible to declare the dependency explicitely
raise RuntimeError(f"Cannot get data from {datanode.name} which is not yet ok. Please add a dependency")
return datanode.result[self.row][self.column]
def fit(_db, _want):
global db, want
db = _db
want = _want
while True:
progress = False
not_ok = 0
for w in want:
if not w.ok:
if w.is_ready():
w.check()
progress = True
else:
not_ok += 1
if not_ok == 0:
break
if not progress:
raise RuntimeError(f"Didn't make any progress in this round, but {not_ok} requirements are still not ok")
log_state.info("%d requirements are not yet ok", not_ok)
db.commit()
log_state.info("Done")