Implement MVP of DSL
I implemented the parser manually since I couldn't get pypeg2 to emit useful error messages (I confess to be a bit proud of my multi-level color-coded reporting). The DSL isn't complete yet. Explicit dependencies and nested rules are still missing, as are most column types. But it's enough to create and populate some tables from a real-world project, so I consider it to have reached to MVP stage.
This commit is contained in:
parent
1f5ba40027
commit
dfc1e8bf8b
|
@ -0,0 +1,55 @@
|
|||
table service_type
|
||||
column service_type id serial primary key
|
||||
column service_type type text not null unique
|
||||
column service_type description text
|
||||
|
||||
data service_type {type: "group"} {}
|
||||
data service_type {type: "publ"} {}
|
||||
data service_type {type: "rolle"} {}
|
||||
|
||||
table service
|
||||
column service id serial primary key
|
||||
column service type text
|
||||
column service feature text
|
||||
column service description text
|
||||
column service hidden boolean
|
||||
|
||||
data service {type: "login", feature: "unix"} {description: "Can login on Unix systems", hidden: False} >> data_service_login_unix
|
||||
data service {type: "login", feature: "wds"} {description: "Can login into WDS", hidden: False} >> data_service_login_wds
|
||||
data service {type: "login", feature: "wwa"} {description: "Can login into WWA", hidden: False} >> data_service_login_wwa
|
||||
data service {type: "group", feature: "wifo"} {hidden: False} >> data_service_group_wifo
|
||||
data service {type: "group", feature: "wifo-intern"} {hidden: False} >> data_service_group_wifo_intern
|
||||
data service {type: "group", feature: "wsr"} {hidden: False} >> data_service_group_wsr
|
||||
data service {type: "mailinglist2", feature: "wifo-aktuell"} {hidden: False} >> data_service_mailinglist2_wifo_aktuell
|
||||
data service {type: "org", feature: "WIFO"} {hidden: False} >> data_service_org_WIFO
|
||||
data service {type: "org", feature: "WSR"} {hidden: False} >> data_service_org_WSR
|
||||
data service {type: "publ", feature: "wifo_intern"} {hidden: False} >> data_service_publ_wifo_intern
|
||||
data service {type: "publ", feature: "wifo_temporary"} {hidden: False} >> data_service_publ_wifo_temporary
|
||||
data service {type: "rolle", feature: "administrative_staff_member"} {hidden: False} >> data_service_rolle_administrative_staff_member
|
||||
data service {type: "rolle", feature: "associate"} {hidden: False} >> data_service_rolle_associate
|
||||
data service {type: "rolle", feature: "economist"} {hidden: False} >> data_service_rolle_economist
|
||||
data service {type: "rolle", feature: "emeritus_consultant"} {hidden: False} >> data_service_rolle_emeritus_consultant
|
||||
data service {type: "rolle", feature: "research_assistant"} {hidden: False} >> data_service_rolle_research_assistant
|
||||
data service {type: "rolle", feature: "scientific_administration_staff_member"} {hidden: False} >> data_service_rolle_scientific_administration_staff_member
|
||||
data service {type: "rolle", feature: "scientific_administrative_assistant"} {hidden: False} >> data_service_rolle_scientific_administrative_assistant
|
||||
data service {type: "rolle", feature: "scientific_consultant"} {hidden: False} >> data_service_rolle_scientific_consultant
|
||||
data service {type: "rolle", feature: "senior_economist"} {hidden: False} >> data_service_rolle_senior_economist
|
||||
|
||||
table template
|
||||
column template id serial primary key
|
||||
column template name text not null unique
|
||||
column template sortorder int
|
||||
column template email_pattern text
|
||||
column template email_after text
|
||||
column template funktion text
|
||||
column template status text
|
||||
|
||||
data template {name: "Standard Vorlage"} {sortorder: 1} >> data_template_std
|
||||
data template {name: "WIFO Wissenschaftliche Assistenz"} {sortorder: 10, "email_pattern": "vorname#.#nachname", "email_after": "wifo.ac.at", funktion: "FB (1-5)", status: "FU, FM"} >> data_template_wifo_wa
|
||||
|
||||
table template_service
|
||||
column template_service id serial primary key
|
||||
column template_service template int references template
|
||||
column template_service service int references service
|
||||
|
||||
data template_service {template: @data_template_std/0/id, service: @data_service_login_wwa/0/id} {}
|
|
@ -0,0 +1,307 @@
|
|||
#!/usr/bin/python3
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from pprint import pprint
|
||||
|
||||
import psycopg2
|
||||
|
||||
import procrusql
|
||||
|
||||
class RuleFile:
|
||||
def __init__(self, filename):
|
||||
with open(filename) as f:
|
||||
self.text = f.read()
|
||||
|
||||
class Failure:
|
||||
def __init__(self, message, position, parse_state):
|
||||
self.message = message
|
||||
self.position = position
|
||||
self.parse_state = parse_state
|
||||
|
||||
class ParseState:
|
||||
|
||||
def __init__(self, text, position=0):
|
||||
self.text = text
|
||||
self.position = position
|
||||
self.child_failure = None
|
||||
|
||||
def clone(self):
|
||||
ps = ParseState(self.text, self.position)
|
||||
return ps
|
||||
|
||||
@property
|
||||
def rest(self):
|
||||
return self.text[self.position:]
|
||||
|
||||
def printerror(self):
|
||||
if not self.child_failure:
|
||||
return
|
||||
position = self.child_failure.position
|
||||
message = self.child_failure.message
|
||||
linesbefore = self.text[:position].split("\n")
|
||||
linesafter = self.text[position:].split("\n")
|
||||
good = "\x1B[40;32m"
|
||||
bad = "\x1B[40;31m"
|
||||
reset = "\x1B[0m"
|
||||
s = reset + message + "\n"
|
||||
lines = []
|
||||
for ln in range(max(len(linesbefore) - 3, 0), len(linesbefore)):
|
||||
# XXX - join
|
||||
lines.append(reset + f"{ln+1:4}: " + good + linesbefore[ln] + reset)
|
||||
s += "\n".join(lines)
|
||||
s += bad + linesafter[0] + reset
|
||||
print(s)
|
||||
|
||||
if self.child_failure.parse_state:
|
||||
self.child_failure.parse_state.printerror()
|
||||
|
||||
def skip_whitespace_and_comments(self):
|
||||
self.match(r"(\s+|#.*)*")
|
||||
|
||||
def match(self, regexp):
|
||||
if m := re.match(regexp, self.text[self.position:]):
|
||||
self.position += len(m.group(0))
|
||||
return m
|
||||
|
||||
def match_newlines(self):
|
||||
self.match(r"(\s+|#.*)*\n")
|
||||
|
||||
def record_child_failure(self, ps_child, msg):
|
||||
if not self.child_failure or ps_child.position > self.child_failure.position:
|
||||
self.child_failure = Failure(position=ps_child.position, message=msg, parse_state=ps_child)
|
||||
|
||||
def parse_ruleset(ps):
|
||||
ps2 = ps.clone()
|
||||
ps2.ast = []
|
||||
while ps2.rest:
|
||||
ps3 = parse_table_rule(ps2) or parse_column_rule(ps2) or parse_data_rule(ps2)
|
||||
if ps3:
|
||||
ps2.ast.append(ps3.ast)
|
||||
ps2.position = ps3.position
|
||||
else:
|
||||
ps.record_child_failure(ps2, "expected one of: table rule, column rule, data rule")
|
||||
return
|
||||
return ps2
|
||||
|
||||
def parse_table_rule(ps):
|
||||
ps2 = ps.clone()
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if not ps2.match(r"table\b"):
|
||||
ps.record_child_failure(ps2, "expected “table”")
|
||||
return
|
||||
ps2.skip_whitespace_and_comments()
|
||||
ps3 = parse_table_name(ps2)
|
||||
if not ps3:
|
||||
ps.record_child_failure(ps2, "expected table name")
|
||||
return
|
||||
|
||||
ps2.ast = procrusql.HaveTable(rulename(), [], ps3.ast[0])
|
||||
ps2.position = ps3.position
|
||||
return ps2
|
||||
|
||||
def parse_column_rule(ps):
|
||||
ps2 = ps.clone()
|
||||
ps2.ast = []
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if not ps2.match(r"column\b"):
|
||||
ps.record_child_failure(ps2, "expected “column”")
|
||||
return
|
||||
|
||||
# The table name should be omitted if this is part of a table declaration.
|
||||
# I haven't decided if I want to make that optional in this rule or write a
|
||||
# different rule. Probably the latter. If the former, I may have to change
|
||||
# the syntax to avoid ambiguity.
|
||||
ps3 = parse_table_name(ps2)
|
||||
if not ps3:
|
||||
ps.record_child_failure(ps2, "expected table name")
|
||||
return
|
||||
table_name = ps3.ast[0]
|
||||
ps2.position = ps3.position
|
||||
|
||||
ps3 = parse_column_name(ps2)
|
||||
if not ps3:
|
||||
ps.record_child_failure(ps2, "expected column name")
|
||||
return
|
||||
column_name = ps3.ast[0]
|
||||
ps2.position = ps3.position
|
||||
|
||||
ps3 = parse_column_definition(ps2)
|
||||
if not ps3:
|
||||
ps.record_child_failure(ps2, "expected column definition")
|
||||
return
|
||||
column_definition = ps3.ast[0]
|
||||
ps2.position = ps3.position
|
||||
|
||||
ps2.ast = procrusql.HaveColumn(rulename(), [], table_name, column_name, column_definition)
|
||||
|
||||
ps2.match_newlines()
|
||||
|
||||
return ps2
|
||||
|
||||
def parse_data_rule(ps):
|
||||
ps2 = ps.clone()
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if not ps2.match(r"data\b"):
|
||||
ps.record_child_failure(ps2, "expected “data”")
|
||||
return
|
||||
|
||||
ps3 = parse_table_name(ps2)
|
||||
if not ps3:
|
||||
ps.record_child_failure(ps2, "expected table name")
|
||||
return
|
||||
table_name = ps3.ast[0]
|
||||
ps2.position = ps3.position
|
||||
|
||||
ps3 = parse_dict(ps2)
|
||||
if not ps3:
|
||||
ps.record_child_failure(ps2, "expected key data definition")
|
||||
return
|
||||
key_data = ps3.ast
|
||||
ps2.position = ps3.position
|
||||
|
||||
ps3 = parse_dict(ps2)
|
||||
if not ps3:
|
||||
ps.record_child_failure(ps2, "expected extra data definition")
|
||||
return
|
||||
extra_data = ps3.ast
|
||||
ps2.position = ps3.position
|
||||
|
||||
ps3 = parse_label(ps2)
|
||||
if ps3:
|
||||
label = ps3.ast
|
||||
ps2.position = ps3.position
|
||||
else:
|
||||
label = rulename()
|
||||
|
||||
ps2.ast = procrusql.HaveData(label, [], table_name, key_data, extra_data)
|
||||
|
||||
ps2.match_newlines()
|
||||
|
||||
return ps2
|
||||
|
||||
def parse_table_name(ps):
|
||||
# For now this matches only simple names, not schema-qualified names or
|
||||
# quoted names.
|
||||
ps2 = ps.clone()
|
||||
ps2.ast = []
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if ps2.rest[0].isalpha():
|
||||
m = ps2.match(r"\w+") # always succeeds since we already checked the first character
|
||||
ps2.ast.append(m.group(0))
|
||||
else:
|
||||
ps.record_child_failure(ps2, "expected table name")
|
||||
return ps2
|
||||
|
||||
def parse_column_name(ps):
|
||||
# For now this matches only simple names, not quoted names.
|
||||
# Also, this is an exact duplicate of parse_table_name, but they will
|
||||
# probably diverge, so I duplicated it.
|
||||
ps2 = ps.clone()
|
||||
ps2.ast = []
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if ps2.rest[0].isalpha():
|
||||
m = ps2.match(r"\w+") # always succeeds since we already checked the first character
|
||||
ps2.ast.append(m.group(0))
|
||||
return ps2
|
||||
else:
|
||||
ps.record_child_failure(ps2, "expected column name")
|
||||
return
|
||||
|
||||
def parse_column_definition(ps):
|
||||
ps2 = ps.clone()
|
||||
ps2.ast = []
|
||||
ps2.skip_whitespace_and_comments()
|
||||
m = ps2.match(r"(int|serial|text|boolean)(\s+not null)?(\s+(primary key|unique|references \w+))?\b")
|
||||
if not m:
|
||||
ps.record_child_failure(ps2, "expected column definition")
|
||||
return
|
||||
ps2.ast.append(m.group(0))
|
||||
return ps2
|
||||
|
||||
def parse_dict(ps):
|
||||
ps2 = ps.clone()
|
||||
d = {}
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if not ps2.match(r"{"):
|
||||
ps.record_child_failure(ps2, "expected “{”")
|
||||
return
|
||||
while True:
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if ps2.match(r'}'):
|
||||
break
|
||||
|
||||
m = ps2.match(r'\w+|"([^"]+)"')
|
||||
if not m:
|
||||
ps.record_child_failure(ps2, "expected column name")
|
||||
return
|
||||
# XXX - unquote properly
|
||||
if m.group(1):
|
||||
k = m.group(1)
|
||||
else:
|
||||
k = m.group(0)
|
||||
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if not ps2.match(":"):
|
||||
ps.record_child_failure(ps2, "expected “:”")
|
||||
return
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if m := ps2.match(r'[0-9]+'):
|
||||
v = int(m.group(0))
|
||||
elif m := ps2.match(r'"([^"]*)"'):
|
||||
# XXX - process backslash escapes
|
||||
v = m.group(1)
|
||||
elif m := ps2.match(r'[tT]rue'):
|
||||
v = True
|
||||
elif m := ps2.match(r'[fF]alse'):
|
||||
v = False
|
||||
elif m := ps2.match(r'None|null|NULL'):
|
||||
v = None
|
||||
elif m := ps2.match(r'@(\w+)/(\d+)/(\w+)'):
|
||||
v = procrusql.Ref(m.group(1), int(m.group(2)), m.group(3))
|
||||
else:
|
||||
ps.record_child_failure(ps2, "expected value")
|
||||
return
|
||||
|
||||
d[k] = v
|
||||
|
||||
ps2.skip_whitespace_and_comments()
|
||||
comma_found = ps2.match(r',')
|
||||
ps2.skip_whitespace_and_comments()
|
||||
if ps2.match(r'}'):
|
||||
break
|
||||
if not comma_found:
|
||||
ps.record_child_failure(ps2, "expected comma or close brace")
|
||||
return
|
||||
ps2.ast = d
|
||||
return ps2
|
||||
|
||||
def parse_label(ps):
|
||||
ps2 = ps.clone()
|
||||
if m := ps2.match(r"\s*>>\s*(\w+)"):
|
||||
ps2.ast = m.group(1)
|
||||
return ps2
|
||||
else:
|
||||
ps.record_child_failure(ps2, "expected label definition")
|
||||
return
|
||||
|
||||
rulenum = 0
|
||||
def rulename():
|
||||
global rulenum
|
||||
rulenum += 1
|
||||
return f"__rule_{rulenum}"
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s %(lineno)d | %(message)s", level=logging.DEBUG)
|
||||
with open(sys.argv[1]) as rf:
|
||||
text = rf.read()
|
||||
ps = ParseState(text)
|
||||
|
||||
ps2 = parse_ruleset(ps)
|
||||
|
||||
if not ps2:
|
||||
ps.printerror()
|
||||
sys.exit(1)
|
||||
|
||||
db = psycopg2.connect(sys.argv[2])
|
||||
procrusql.fit(db, ps2.ast)
|
Loading…
Reference in New Issue