Add option --no-used-evidence

This commit is contained in:
Peter J. Holzer 2019-09-14 12:09:36 +02:00
parent d96d1fc96e
commit e6dab8395f
1 changed files with 7 additions and 5 deletions

View File

@ -32,7 +32,7 @@ def add_message(msg):
msg.get_payload(decode=True) msg.get_payload(decode=True)
.decode(charset, errors='replace')) .decode(charset, errors='replace'))
def extract_features(msgid, verbose): def extract_features(msgid, verbose, used_evidence):
db = psycopg2.connect("dbname=bayes") db = psycopg2.connect("dbname=bayes")
csr = db.cursor(cursor_factory=ppe.DictCursor) csr = db.cursor(cursor_factory=ppe.DictCursor)
csr.execute( csr.execute(
@ -87,9 +87,10 @@ def extract_features(msgid, verbose):
for i in interesting_evidence: for i in interesting_evidence:
if verbose: if verbose:
print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t") print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t")
csr.execute( if used_evidence:
"insert into used_evidence(message, spam_prob, type, length, feature) values(%s, %s, %s, %s, %s)", csr.execute(
(msg_pk, i["spam_prob"], i["type"], i["length"], i["feature"])) "insert into used_evidence(message, spam_prob, type, length, feature) values(%s, %s, %s, %s, %s)",
(msg_pk, i["spam_prob"], i["type"], i["length"], i["feature"]))
p1 *= i["spam_prob"] p1 *= i["spam_prob"]
p2 *= 1 - i["spam_prob"] p2 *= 1 - i["spam_prob"]
p = p1 / (p1 + p2) p = p1 / (p1 + p2)
@ -100,6 +101,7 @@ def extract_features(msgid, verbose):
def main(): def main():
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument('--verbose', action='store_true') ap.add_argument('--verbose', action='store_true')
ap.add_argument('--no-used-evidence', action='store_false', dest='used_evidence')
ap.add_argument('file', nargs='?') ap.add_argument('file', nargs='?')
args = ap.parse_args() args = ap.parse_args()
if args.file: if args.file:
@ -109,7 +111,7 @@ def main():
parser = email.parser.BytesParser(policy=email.policy.default) parser = email.parser.BytesParser(policy=email.policy.default)
msg = parser.parse(fh) msg = parser.parse(fh)
add_message(msg) add_message(msg)
p = extract_features(msg["Message-Id"], args.verbose) p = extract_features(msg["Message-Id"], args.verbose, args.used_evidence)
print(p, "spam" if p > 0.5 else "ham") print(p, "spam" if p > 0.5 else "ham")
main() main()