From dfc1e8bf8bf54e33b62d090b0e5c1fcf2e4f0f5b Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Fri, 25 Feb 2022 16:21:07 +0100 Subject: [PATCH] Implement MVP of DSL I implemented the parser manually since I couldn't get pypeg2 to emit useful error messages (I confess to be a bit proud of my multi-level color-coded reporting). The DSL isn't complete yet. Explicit dependencies and nested rules are still missing, as are most column types. But it's enough to create and populate some tables from a real-world project, so I consider it to have reached to MVP stage. --- demo_usradm_templates.procrusql | 55 ++++++ parse_dsl_manual | 307 ++++++++++++++++++++++++++++++++ 2 files changed, 362 insertions(+) create mode 100644 demo_usradm_templates.procrusql create mode 100755 parse_dsl_manual diff --git a/demo_usradm_templates.procrusql b/demo_usradm_templates.procrusql new file mode 100644 index 0000000..10c2582 --- /dev/null +++ b/demo_usradm_templates.procrusql @@ -0,0 +1,55 @@ +table service_type +column service_type id serial primary key +column service_type type text not null unique +column service_type description text + +data service_type {type: "group"} {} +data service_type {type: "publ"} {} +data service_type {type: "rolle"} {} + +table service +column service id serial primary key +column service type text +column service feature text +column service description text +column service hidden boolean + +data service {type: "login", feature: "unix"} {description: "Can login on Unix systems", hidden: False} >> data_service_login_unix +data service {type: "login", feature: "wds"} {description: "Can login into WDS", hidden: False} >> data_service_login_wds +data service {type: "login", feature: "wwa"} {description: "Can login into WWA", hidden: False} >> data_service_login_wwa +data service {type: "group", feature: "wifo"} {hidden: False} >> data_service_group_wifo +data service {type: "group", feature: "wifo-intern"} {hidden: False} >> data_service_group_wifo_intern +data service {type: "group", feature: "wsr"} {hidden: False} >> data_service_group_wsr +data service {type: "mailinglist2", feature: "wifo-aktuell"} {hidden: False} >> data_service_mailinglist2_wifo_aktuell +data service {type: "org", feature: "WIFO"} {hidden: False} >> data_service_org_WIFO +data service {type: "org", feature: "WSR"} {hidden: False} >> data_service_org_WSR +data service {type: "publ", feature: "wifo_intern"} {hidden: False} >> data_service_publ_wifo_intern +data service {type: "publ", feature: "wifo_temporary"} {hidden: False} >> data_service_publ_wifo_temporary +data service {type: "rolle", feature: "administrative_staff_member"} {hidden: False} >> data_service_rolle_administrative_staff_member +data service {type: "rolle", feature: "associate"} {hidden: False} >> data_service_rolle_associate +data service {type: "rolle", feature: "economist"} {hidden: False} >> data_service_rolle_economist +data service {type: "rolle", feature: "emeritus_consultant"} {hidden: False} >> data_service_rolle_emeritus_consultant +data service {type: "rolle", feature: "research_assistant"} {hidden: False} >> data_service_rolle_research_assistant +data service {type: "rolle", feature: "scientific_administration_staff_member"} {hidden: False} >> data_service_rolle_scientific_administration_staff_member +data service {type: "rolle", feature: "scientific_administrative_assistant"} {hidden: False} >> data_service_rolle_scientific_administrative_assistant +data service {type: "rolle", feature: "scientific_consultant"} {hidden: False} >> data_service_rolle_scientific_consultant +data service {type: "rolle", feature: "senior_economist"} {hidden: False} >> data_service_rolle_senior_economist + +table template +column template id serial primary key +column template name text not null unique +column template sortorder int +column template email_pattern text +column template email_after text +column template funktion text +column template status text + +data template {name: "Standard Vorlage"} {sortorder: 1} >> data_template_std +data template {name: "WIFO Wissenschaftliche Assistenz"} {sortorder: 10, "email_pattern": "vorname#.#nachname", "email_after": "wifo.ac.at", funktion: "FB (1-5)", status: "FU, FM"} >> data_template_wifo_wa + +table template_service +column template_service id serial primary key +column template_service template int references template +column template_service service int references service + +data template_service {template: @data_template_std/0/id, service: @data_service_login_wwa/0/id} {} diff --git a/parse_dsl_manual b/parse_dsl_manual new file mode 100755 index 0000000..ecdc431 --- /dev/null +++ b/parse_dsl_manual @@ -0,0 +1,307 @@ +#!/usr/bin/python3 +import logging +import re +import sys +from pprint import pprint + +import psycopg2 + +import procrusql + +class RuleFile: + def __init__(self, filename): + with open(filename) as f: + self.text = f.read() + +class Failure: + def __init__(self, message, position, parse_state): + self.message = message + self.position = position + self.parse_state = parse_state + +class ParseState: + + def __init__(self, text, position=0): + self.text = text + self.position = position + self.child_failure = None + + def clone(self): + ps = ParseState(self.text, self.position) + return ps + + @property + def rest(self): + return self.text[self.position:] + + def printerror(self): + if not self.child_failure: + return + position = self.child_failure.position + message = self.child_failure.message + linesbefore = self.text[:position].split("\n") + linesafter = self.text[position:].split("\n") + good = "\x1B[40;32m" + bad = "\x1B[40;31m" + reset = "\x1B[0m" + s = reset + message + "\n" + lines = [] + for ln in range(max(len(linesbefore) - 3, 0), len(linesbefore)): + # XXX - join + lines.append(reset + f"{ln+1:4}: " + good + linesbefore[ln] + reset) + s += "\n".join(lines) + s += bad + linesafter[0] + reset + print(s) + + if self.child_failure.parse_state: + self.child_failure.parse_state.printerror() + + def skip_whitespace_and_comments(self): + self.match(r"(\s+|#.*)*") + + def match(self, regexp): + if m := re.match(regexp, self.text[self.position:]): + self.position += len(m.group(0)) + return m + + def match_newlines(self): + self.match(r"(\s+|#.*)*\n") + + def record_child_failure(self, ps_child, msg): + if not self.child_failure or ps_child.position > self.child_failure.position: + self.child_failure = Failure(position=ps_child.position, message=msg, parse_state=ps_child) + +def parse_ruleset(ps): + ps2 = ps.clone() + ps2.ast = [] + while ps2.rest: + ps3 = parse_table_rule(ps2) or parse_column_rule(ps2) or parse_data_rule(ps2) + if ps3: + ps2.ast.append(ps3.ast) + ps2.position = ps3.position + else: + ps.record_child_failure(ps2, "expected one of: table rule, column rule, data rule") + return + return ps2 + +def parse_table_rule(ps): + ps2 = ps.clone() + ps2.skip_whitespace_and_comments() + if not ps2.match(r"table\b"): + ps.record_child_failure(ps2, "expected “table”") + return + ps2.skip_whitespace_and_comments() + ps3 = parse_table_name(ps2) + if not ps3: + ps.record_child_failure(ps2, "expected table name") + return + + ps2.ast = procrusql.HaveTable(rulename(), [], ps3.ast[0]) + ps2.position = ps3.position + return ps2 + +def parse_column_rule(ps): + ps2 = ps.clone() + ps2.ast = [] + ps2.skip_whitespace_and_comments() + if not ps2.match(r"column\b"): + ps.record_child_failure(ps2, "expected “column”") + return + + # The table name should be omitted if this is part of a table declaration. + # I haven't decided if I want to make that optional in this rule or write a + # different rule. Probably the latter. If the former, I may have to change + # the syntax to avoid ambiguity. + ps3 = parse_table_name(ps2) + if not ps3: + ps.record_child_failure(ps2, "expected table name") + return + table_name = ps3.ast[0] + ps2.position = ps3.position + + ps3 = parse_column_name(ps2) + if not ps3: + ps.record_child_failure(ps2, "expected column name") + return + column_name = ps3.ast[0] + ps2.position = ps3.position + + ps3 = parse_column_definition(ps2) + if not ps3: + ps.record_child_failure(ps2, "expected column definition") + return + column_definition = ps3.ast[0] + ps2.position = ps3.position + + ps2.ast = procrusql.HaveColumn(rulename(), [], table_name, column_name, column_definition) + + ps2.match_newlines() + + return ps2 + +def parse_data_rule(ps): + ps2 = ps.clone() + ps2.skip_whitespace_and_comments() + if not ps2.match(r"data\b"): + ps.record_child_failure(ps2, "expected “data”") + return + + ps3 = parse_table_name(ps2) + if not ps3: + ps.record_child_failure(ps2, "expected table name") + return + table_name = ps3.ast[0] + ps2.position = ps3.position + + ps3 = parse_dict(ps2) + if not ps3: + ps.record_child_failure(ps2, "expected key data definition") + return + key_data = ps3.ast + ps2.position = ps3.position + + ps3 = parse_dict(ps2) + if not ps3: + ps.record_child_failure(ps2, "expected extra data definition") + return + extra_data = ps3.ast + ps2.position = ps3.position + + ps3 = parse_label(ps2) + if ps3: + label = ps3.ast + ps2.position = ps3.position + else: + label = rulename() + + ps2.ast = procrusql.HaveData(label, [], table_name, key_data, extra_data) + + ps2.match_newlines() + + return ps2 + +def parse_table_name(ps): + # For now this matches only simple names, not schema-qualified names or + # quoted names. + ps2 = ps.clone() + ps2.ast = [] + ps2.skip_whitespace_and_comments() + if ps2.rest[0].isalpha(): + m = ps2.match(r"\w+") # always succeeds since we already checked the first character + ps2.ast.append(m.group(0)) + else: + ps.record_child_failure(ps2, "expected table name") + return ps2 + +def parse_column_name(ps): + # For now this matches only simple names, not quoted names. + # Also, this is an exact duplicate of parse_table_name, but they will + # probably diverge, so I duplicated it. + ps2 = ps.clone() + ps2.ast = [] + ps2.skip_whitespace_and_comments() + if ps2.rest[0].isalpha(): + m = ps2.match(r"\w+") # always succeeds since we already checked the first character + ps2.ast.append(m.group(0)) + return ps2 + else: + ps.record_child_failure(ps2, "expected column name") + return + +def parse_column_definition(ps): + ps2 = ps.clone() + ps2.ast = [] + ps2.skip_whitespace_and_comments() + m = ps2.match(r"(int|serial|text|boolean)(\s+not null)?(\s+(primary key|unique|references \w+))?\b") + if not m: + ps.record_child_failure(ps2, "expected column definition") + return + ps2.ast.append(m.group(0)) + return ps2 + +def parse_dict(ps): + ps2 = ps.clone() + d = {} + ps2.skip_whitespace_and_comments() + if not ps2.match(r"{"): + ps.record_child_failure(ps2, "expected “{”") + return + while True: + ps2.skip_whitespace_and_comments() + if ps2.match(r'}'): + break + + m = ps2.match(r'\w+|"([^"]+)"') + if not m: + ps.record_child_failure(ps2, "expected column name") + return + # XXX - unquote properly + if m.group(1): + k = m.group(1) + else: + k = m.group(0) + + ps2.skip_whitespace_and_comments() + if not ps2.match(":"): + ps.record_child_failure(ps2, "expected “:”") + return + ps2.skip_whitespace_and_comments() + if m := ps2.match(r'[0-9]+'): + v = int(m.group(0)) + elif m := ps2.match(r'"([^"]*)"'): + # XXX - process backslash escapes + v = m.group(1) + elif m := ps2.match(r'[tT]rue'): + v = True + elif m := ps2.match(r'[fF]alse'): + v = False + elif m := ps2.match(r'None|null|NULL'): + v = None + elif m := ps2.match(r'@(\w+)/(\d+)/(\w+)'): + v = procrusql.Ref(m.group(1), int(m.group(2)), m.group(3)) + else: + ps.record_child_failure(ps2, "expected value") + return + + d[k] = v + + ps2.skip_whitespace_and_comments() + comma_found = ps2.match(r',') + ps2.skip_whitespace_and_comments() + if ps2.match(r'}'): + break + if not comma_found: + ps.record_child_failure(ps2, "expected comma or close brace") + return + ps2.ast = d + return ps2 + +def parse_label(ps): + ps2 = ps.clone() + if m := ps2.match(r"\s*>>\s*(\w+)"): + ps2.ast = m.group(1) + return ps2 + else: + ps.record_child_failure(ps2, "expected label definition") + return + +rulenum = 0 +def rulename(): + global rulenum + rulenum += 1 + return f"__rule_{rulenum}" + +if __name__ == "__main__": + logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s %(lineno)d | %(message)s", level=logging.DEBUG) + with open(sys.argv[1]) as rf: + text = rf.read() + ps = ParseState(text) + + ps2 = parse_ruleset(ps) + + if not ps2: + ps.printerror() + sys.exit(1) + + db = psycopg2.connect(sys.argv[2]) + procrusql.fit(db, ps2.ast)