From c49d6847f382edbaa1d5dc6ca9ebbb9094b9c0d5 Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Tue, 27 Aug 2019 22:38:00 +0200 Subject: [PATCH] Write used evidence to database --- judge_message | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/judge_message b/judge_message index c52dae2..35d6eb3 100755 --- a/judge_message +++ b/judge_message @@ -32,9 +32,17 @@ def add_message(msg): msg.get_payload(decode=True) .decode(charset, errors='replace')) -def extract_features(): +def extract_features(msgid): db = psycopg2.connect("dbname=bayes") csr = db.cursor(cursor_factory=ppe.DictCursor) + csr.execute( + """ + insert into messages(id, message_id) + values(default, %s) + returning id + """, + (msgid,)) + msg_pk = csr.fetchone()["id"] evidence = [] for t in components: prev = {""} @@ -82,9 +90,14 @@ def extract_features(): p2 = 1 for i in interesting_evidence: print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t") + csr.execute( + "insert into used_evidence(message, spam_prob, type, length, feature) values(%s, %s, %s, %s, %s)", + (msg_pk, i["spam_prob"], i["type"], i["length"], i["feature"])) p1 *= i["spam_prob"] p2 *= 1 - i["spam_prob"] p = p1 / (p1 + p2) + csr.execute("update messages set type=%s where id = %s", ("%.6f" % p, msg_pk)) + db.commit() return p def main(): @@ -98,7 +111,7 @@ def main(): parser = email.parser.BytesParser(policy=email.policy.default) msg = parser.parse(fh) add_message(msg) - p = extract_features() + p = extract_features(msg["Message-Id"]) print(p, "spam" if p > 0.5 else "ham") main()