Write used evidence to database

This commit is contained in:
Peter J. Holzer 2019-08-27 22:38:00 +02:00
parent e6a4ba72f1
commit c49d6847f3
1 changed files with 15 additions and 2 deletions

View File

@ -32,9 +32,17 @@ def add_message(msg):
msg.get_payload(decode=True) msg.get_payload(decode=True)
.decode(charset, errors='replace')) .decode(charset, errors='replace'))
def extract_features(): def extract_features(msgid):
db = psycopg2.connect("dbname=bayes") db = psycopg2.connect("dbname=bayes")
csr = db.cursor(cursor_factory=ppe.DictCursor) csr = db.cursor(cursor_factory=ppe.DictCursor)
csr.execute(
"""
insert into messages(id, message_id)
values(default, %s)
returning id
""",
(msgid,))
msg_pk = csr.fetchone()["id"]
evidence = [] evidence = []
for t in components: for t in components:
prev = {""} prev = {""}
@ -82,9 +90,14 @@ def extract_features():
p2 = 1 p2 = 1
for i in interesting_evidence: for i in interesting_evidence:
print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t") print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t")
csr.execute(
"insert into used_evidence(message, spam_prob, type, length, feature) values(%s, %s, %s, %s, %s)",
(msg_pk, i["spam_prob"], i["type"], i["length"], i["feature"]))
p1 *= i["spam_prob"] p1 *= i["spam_prob"]
p2 *= 1 - i["spam_prob"] p2 *= 1 - i["spam_prob"]
p = p1 / (p1 + p2) p = p1 / (p1 + p2)
csr.execute("update messages set type=%s where id = %s", ("%.6f" % p, msg_pk))
db.commit()
return p return p
def main(): def main():
@ -98,7 +111,7 @@ def main():
parser = email.parser.BytesParser(policy=email.policy.default) parser = email.parser.BytesParser(policy=email.policy.default)
msg = parser.parse(fh) msg = parser.parse(fh)
add_message(msg) add_message(msg)
p = extract_features() p = extract_features(msg["Message-Id"])
print(p, "spam" if p > 0.5 else "ham") print(p, "spam" if p > 0.5 else "ham")
main() main()