Implement MVP of DSL

I implemented the parser manually since I couldn't get pypeg2 to emit
useful error messages (I confess to be a bit proud of my multi-level
color-coded reporting).

The DSL isn't complete yet. Explicit dependencies and nested rules are
still missing, as are most column types. But it's enough to create and
populate some tables from a real-world project, so I consider it to have
reached to MVP stage.
This commit is contained in:
Peter J. Holzer 2022-02-25 16:21:07 +01:00 committed by Peter J. Holzer
parent 1f5ba40027
commit dfc1e8bf8b
2 changed files with 362 additions and 0 deletions

View File

@ -0,0 +1,55 @@
table service_type
column service_type id serial primary key
column service_type type text not null unique
column service_type description text
data service_type {type: "group"} {}
data service_type {type: "publ"} {}
data service_type {type: "rolle"} {}
table service
column service id serial primary key
column service type text
column service feature text
column service description text
column service hidden boolean
data service {type: "login", feature: "unix"} {description: "Can login on Unix systems", hidden: False} >> data_service_login_unix
data service {type: "login", feature: "wds"} {description: "Can login into WDS", hidden: False} >> data_service_login_wds
data service {type: "login", feature: "wwa"} {description: "Can login into WWA", hidden: False} >> data_service_login_wwa
data service {type: "group", feature: "wifo"} {hidden: False} >> data_service_group_wifo
data service {type: "group", feature: "wifo-intern"} {hidden: False} >> data_service_group_wifo_intern
data service {type: "group", feature: "wsr"} {hidden: False} >> data_service_group_wsr
data service {type: "mailinglist2", feature: "wifo-aktuell"} {hidden: False} >> data_service_mailinglist2_wifo_aktuell
data service {type: "org", feature: "WIFO"} {hidden: False} >> data_service_org_WIFO
data service {type: "org", feature: "WSR"} {hidden: False} >> data_service_org_WSR
data service {type: "publ", feature: "wifo_intern"} {hidden: False} >> data_service_publ_wifo_intern
data service {type: "publ", feature: "wifo_temporary"} {hidden: False} >> data_service_publ_wifo_temporary
data service {type: "rolle", feature: "administrative_staff_member"} {hidden: False} >> data_service_rolle_administrative_staff_member
data service {type: "rolle", feature: "associate"} {hidden: False} >> data_service_rolle_associate
data service {type: "rolle", feature: "economist"} {hidden: False} >> data_service_rolle_economist
data service {type: "rolle", feature: "emeritus_consultant"} {hidden: False} >> data_service_rolle_emeritus_consultant
data service {type: "rolle", feature: "research_assistant"} {hidden: False} >> data_service_rolle_research_assistant
data service {type: "rolle", feature: "scientific_administration_staff_member"} {hidden: False} >> data_service_rolle_scientific_administration_staff_member
data service {type: "rolle", feature: "scientific_administrative_assistant"} {hidden: False} >> data_service_rolle_scientific_administrative_assistant
data service {type: "rolle", feature: "scientific_consultant"} {hidden: False} >> data_service_rolle_scientific_consultant
data service {type: "rolle", feature: "senior_economist"} {hidden: False} >> data_service_rolle_senior_economist
table template
column template id serial primary key
column template name text not null unique
column template sortorder int
column template email_pattern text
column template email_after text
column template funktion text
column template status text
data template {name: "Standard Vorlage"} {sortorder: 1} >> data_template_std
data template {name: "WIFO Wissenschaftliche Assistenz"} {sortorder: 10, "email_pattern": "vorname#.#nachname", "email_after": "wifo.ac.at", funktion: "FB (1-5)", status: "FU, FM"} >> data_template_wifo_wa
table template_service
column template_service id serial primary key
column template_service template int references template
column template_service service int references service
data template_service {template: @data_template_std/0/id, service: @data_service_login_wwa/0/id} {}

307
parse_dsl_manual Executable file
View File

@ -0,0 +1,307 @@
#!/usr/bin/python3
import logging
import re
import sys
from pprint import pprint
import psycopg2
import procrusql
class RuleFile:
def __init__(self, filename):
with open(filename) as f:
self.text = f.read()
class Failure:
def __init__(self, message, position, parse_state):
self.message = message
self.position = position
self.parse_state = parse_state
class ParseState:
def __init__(self, text, position=0):
self.text = text
self.position = position
self.child_failure = None
def clone(self):
ps = ParseState(self.text, self.position)
return ps
@property
def rest(self):
return self.text[self.position:]
def printerror(self):
if not self.child_failure:
return
position = self.child_failure.position
message = self.child_failure.message
linesbefore = self.text[:position].split("\n")
linesafter = self.text[position:].split("\n")
good = "\x1B[40;32m"
bad = "\x1B[40;31m"
reset = "\x1B[0m"
s = reset + message + "\n"
lines = []
for ln in range(max(len(linesbefore) - 3, 0), len(linesbefore)):
# XXX - join
lines.append(reset + f"{ln+1:4}: " + good + linesbefore[ln] + reset)
s += "\n".join(lines)
s += bad + linesafter[0] + reset
print(s)
if self.child_failure.parse_state:
self.child_failure.parse_state.printerror()
def skip_whitespace_and_comments(self):
self.match(r"(\s+|#.*)*")
def match(self, regexp):
if m := re.match(regexp, self.text[self.position:]):
self.position += len(m.group(0))
return m
def match_newlines(self):
self.match(r"(\s+|#.*)*\n")
def record_child_failure(self, ps_child, msg):
if not self.child_failure or ps_child.position > self.child_failure.position:
self.child_failure = Failure(position=ps_child.position, message=msg, parse_state=ps_child)
def parse_ruleset(ps):
ps2 = ps.clone()
ps2.ast = []
while ps2.rest:
ps3 = parse_table_rule(ps2) or parse_column_rule(ps2) or parse_data_rule(ps2)
if ps3:
ps2.ast.append(ps3.ast)
ps2.position = ps3.position
else:
ps.record_child_failure(ps2, "expected one of: table rule, column rule, data rule")
return
return ps2
def parse_table_rule(ps):
ps2 = ps.clone()
ps2.skip_whitespace_and_comments()
if not ps2.match(r"table\b"):
ps.record_child_failure(ps2, "expected “table”")
return
ps2.skip_whitespace_and_comments()
ps3 = parse_table_name(ps2)
if not ps3:
ps.record_child_failure(ps2, "expected table name")
return
ps2.ast = procrusql.HaveTable(rulename(), [], ps3.ast[0])
ps2.position = ps3.position
return ps2
def parse_column_rule(ps):
ps2 = ps.clone()
ps2.ast = []
ps2.skip_whitespace_and_comments()
if not ps2.match(r"column\b"):
ps.record_child_failure(ps2, "expected “column”")
return
# The table name should be omitted if this is part of a table declaration.
# I haven't decided if I want to make that optional in this rule or write a
# different rule. Probably the latter. If the former, I may have to change
# the syntax to avoid ambiguity.
ps3 = parse_table_name(ps2)
if not ps3:
ps.record_child_failure(ps2, "expected table name")
return
table_name = ps3.ast[0]
ps2.position = ps3.position
ps3 = parse_column_name(ps2)
if not ps3:
ps.record_child_failure(ps2, "expected column name")
return
column_name = ps3.ast[0]
ps2.position = ps3.position
ps3 = parse_column_definition(ps2)
if not ps3:
ps.record_child_failure(ps2, "expected column definition")
return
column_definition = ps3.ast[0]
ps2.position = ps3.position
ps2.ast = procrusql.HaveColumn(rulename(), [], table_name, column_name, column_definition)
ps2.match_newlines()
return ps2
def parse_data_rule(ps):
ps2 = ps.clone()
ps2.skip_whitespace_and_comments()
if not ps2.match(r"data\b"):
ps.record_child_failure(ps2, "expected “data”")
return
ps3 = parse_table_name(ps2)
if not ps3:
ps.record_child_failure(ps2, "expected table name")
return
table_name = ps3.ast[0]
ps2.position = ps3.position
ps3 = parse_dict(ps2)
if not ps3:
ps.record_child_failure(ps2, "expected key data definition")
return
key_data = ps3.ast
ps2.position = ps3.position
ps3 = parse_dict(ps2)
if not ps3:
ps.record_child_failure(ps2, "expected extra data definition")
return
extra_data = ps3.ast
ps2.position = ps3.position
ps3 = parse_label(ps2)
if ps3:
label = ps3.ast
ps2.position = ps3.position
else:
label = rulename()
ps2.ast = procrusql.HaveData(label, [], table_name, key_data, extra_data)
ps2.match_newlines()
return ps2
def parse_table_name(ps):
# For now this matches only simple names, not schema-qualified names or
# quoted names.
ps2 = ps.clone()
ps2.ast = []
ps2.skip_whitespace_and_comments()
if ps2.rest[0].isalpha():
m = ps2.match(r"\w+") # always succeeds since we already checked the first character
ps2.ast.append(m.group(0))
else:
ps.record_child_failure(ps2, "expected table name")
return ps2
def parse_column_name(ps):
# For now this matches only simple names, not quoted names.
# Also, this is an exact duplicate of parse_table_name, but they will
# probably diverge, so I duplicated it.
ps2 = ps.clone()
ps2.ast = []
ps2.skip_whitespace_and_comments()
if ps2.rest[0].isalpha():
m = ps2.match(r"\w+") # always succeeds since we already checked the first character
ps2.ast.append(m.group(0))
return ps2
else:
ps.record_child_failure(ps2, "expected column name")
return
def parse_column_definition(ps):
ps2 = ps.clone()
ps2.ast = []
ps2.skip_whitespace_and_comments()
m = ps2.match(r"(int|serial|text|boolean)(\s+not null)?(\s+(primary key|unique|references \w+))?\b")
if not m:
ps.record_child_failure(ps2, "expected column definition")
return
ps2.ast.append(m.group(0))
return ps2
def parse_dict(ps):
ps2 = ps.clone()
d = {}
ps2.skip_whitespace_and_comments()
if not ps2.match(r"{"):
ps.record_child_failure(ps2, "expected “{”")
return
while True:
ps2.skip_whitespace_and_comments()
if ps2.match(r'}'):
break
m = ps2.match(r'\w+|"([^"]+)"')
if not m:
ps.record_child_failure(ps2, "expected column name")
return
# XXX - unquote properly
if m.group(1):
k = m.group(1)
else:
k = m.group(0)
ps2.skip_whitespace_and_comments()
if not ps2.match(":"):
ps.record_child_failure(ps2, "expected “:”")
return
ps2.skip_whitespace_and_comments()
if m := ps2.match(r'[0-9]+'):
v = int(m.group(0))
elif m := ps2.match(r'"([^"]*)"'):
# XXX - process backslash escapes
v = m.group(1)
elif m := ps2.match(r'[tT]rue'):
v = True
elif m := ps2.match(r'[fF]alse'):
v = False
elif m := ps2.match(r'None|null|NULL'):
v = None
elif m := ps2.match(r'@(\w+)/(\d+)/(\w+)'):
v = procrusql.Ref(m.group(1), int(m.group(2)), m.group(3))
else:
ps.record_child_failure(ps2, "expected value")
return
d[k] = v
ps2.skip_whitespace_and_comments()
comma_found = ps2.match(r',')
ps2.skip_whitespace_and_comments()
if ps2.match(r'}'):
break
if not comma_found:
ps.record_child_failure(ps2, "expected comma or close brace")
return
ps2.ast = d
return ps2
def parse_label(ps):
ps2 = ps.clone()
if m := ps2.match(r"\s*>>\s*(\w+)"):
ps2.ast = m.group(1)
return ps2
else:
ps.record_child_failure(ps2, "expected label definition")
return
rulenum = 0
def rulename():
global rulenum
rulenum += 1
return f"__rule_{rulenum}"
if __name__ == "__main__":
logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s %(lineno)d | %(message)s", level=logging.DEBUG)
with open(sys.argv[1]) as rf:
text = rf.read()
ps = ParseState(text)
ps2 = parse_ruleset(ps)
if not ps2:
ps.printerror()
sys.exit(1)
db = psycopg2.connect(sys.argv[2])
procrusql.fit(db, ps2.ast)