Add option --no-used-evidence
This commit is contained in:
parent
d96d1fc96e
commit
e6dab8395f
|
@ -32,7 +32,7 @@ def add_message(msg):
|
||||||
msg.get_payload(decode=True)
|
msg.get_payload(decode=True)
|
||||||
.decode(charset, errors='replace'))
|
.decode(charset, errors='replace'))
|
||||||
|
|
||||||
def extract_features(msgid, verbose):
|
def extract_features(msgid, verbose, used_evidence):
|
||||||
db = psycopg2.connect("dbname=bayes")
|
db = psycopg2.connect("dbname=bayes")
|
||||||
csr = db.cursor(cursor_factory=ppe.DictCursor)
|
csr = db.cursor(cursor_factory=ppe.DictCursor)
|
||||||
csr.execute(
|
csr.execute(
|
||||||
|
@ -87,9 +87,10 @@ def extract_features(msgid, verbose):
|
||||||
for i in interesting_evidence:
|
for i in interesting_evidence:
|
||||||
if verbose:
|
if verbose:
|
||||||
print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t")
|
print("#", i["spam_prob"], i["type"], i["length"], i["feature"], sep="\t")
|
||||||
csr.execute(
|
if used_evidence:
|
||||||
"insert into used_evidence(message, spam_prob, type, length, feature) values(%s, %s, %s, %s, %s)",
|
csr.execute(
|
||||||
(msg_pk, i["spam_prob"], i["type"], i["length"], i["feature"]))
|
"insert into used_evidence(message, spam_prob, type, length, feature) values(%s, %s, %s, %s, %s)",
|
||||||
|
(msg_pk, i["spam_prob"], i["type"], i["length"], i["feature"]))
|
||||||
p1 *= i["spam_prob"]
|
p1 *= i["spam_prob"]
|
||||||
p2 *= 1 - i["spam_prob"]
|
p2 *= 1 - i["spam_prob"]
|
||||||
p = p1 / (p1 + p2)
|
p = p1 / (p1 + p2)
|
||||||
|
@ -100,6 +101,7 @@ def extract_features(msgid, verbose):
|
||||||
def main():
|
def main():
|
||||||
ap = argparse.ArgumentParser()
|
ap = argparse.ArgumentParser()
|
||||||
ap.add_argument('--verbose', action='store_true')
|
ap.add_argument('--verbose', action='store_true')
|
||||||
|
ap.add_argument('--no-used-evidence', action='store_false', dest='used_evidence')
|
||||||
ap.add_argument('file', nargs='?')
|
ap.add_argument('file', nargs='?')
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
if args.file:
|
if args.file:
|
||||||
|
@ -109,7 +111,7 @@ def main():
|
||||||
parser = email.parser.BytesParser(policy=email.policy.default)
|
parser = email.parser.BytesParser(policy=email.policy.default)
|
||||||
msg = parser.parse(fh)
|
msg = parser.parse(fh)
|
||||||
add_message(msg)
|
add_message(msg)
|
||||||
p = extract_features(msg["Message-Id"], args.verbose)
|
p = extract_features(msg["Message-Id"], args.verbose, args.used_evidence)
|
||||||
print(p, "spam" if p > 0.5 else "ham")
|
print(p, "spam" if p > 0.5 else "ham")
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in New Issue