Write used evidence to database
This commit is contained in:
parent
e6a4ba72f1
commit
c49d6847f3
|
@ -32,9 +32,17 @@ def add_message(msg):
|
||||||
msg.get_payload(decode=True)
|
msg.get_payload(decode=True)
|
||||||
.decode(charset, errors='replace'))
|
.decode(charset, errors='replace'))
|
||||||
|
|
||||||
def extract_features():
|
def extract_features(msgid):
|
||||||
db = psycopg2.connect("dbname=bayes")
|
db = psycopg2.connect("dbname=bayes")
|
||||||
csr = db.cursor(cursor_factory=ppe.DictCursor)
|
csr = db.cursor(cursor_factory=ppe.DictCursor)
|
||||||
|
csr.execute(
|
||||||
|
"""
|
||||||
|
insert into messages(id, message_id)
|
||||||
|
values(default, %s)
|
||||||
|
returning id
|
||||||
|
""",
|
||||||
|
(msgid,))
|
||||||
|
msg_pk = csr.fetchone()["id"]
|
||||||
evidence = []
|
evidence = []
|
||||||
for t in components:
|
for t in components:
|
||||||
prev = {""}
|
prev = {""}
|
||||||
|
@ -82,9 +90,14 @@ def extract_features():
|
||||||
p2 = 1
|
p2 = 1
|
||||||
for i in interesting_evidence:
|
for i in interesting_evidence:
|
||||||
print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t")
|
print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t")
|
||||||
|
csr.execute(
|
||||||
|
"insert into used_evidence(message, spam_prob, type, length, feature) values(%s, %s, %s, %s, %s)",
|
||||||
|
(msg_pk, i["spam_prob"], i["type"], i["length"], i["feature"]))
|
||||||
p1 *= i["spam_prob"]
|
p1 *= i["spam_prob"]
|
||||||
p2 *= 1 - i["spam_prob"]
|
p2 *= 1 - i["spam_prob"]
|
||||||
p = p1 / (p1 + p2)
|
p = p1 / (p1 + p2)
|
||||||
|
csr.execute("update messages set type=%s where id = %s", ("%.6f" % p, msg_pk))
|
||||||
|
db.commit()
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -98,7 +111,7 @@ def main():
|
||||||
parser = email.parser.BytesParser(policy=email.policy.default)
|
parser = email.parser.BytesParser(policy=email.policy.default)
|
||||||
msg = parser.parse(fh)
|
msg = parser.parse(fh)
|
||||||
add_message(msg)
|
add_message(msg)
|
||||||
p = extract_features()
|
p = extract_features(msg["Message-Id"])
|
||||||
print(p, "spam" if p > 0.5 else "ham")
|
print(p, "spam" if p > 0.5 else "ham")
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in New Issue