#!/usr/bin/python3 import time import psycopg2 import psycopg2.extras as ppe t0 = time.time() db = psycopg2.connect("dbname=bayes") csr = db.cursor(cursor_factory=ppe.DictCursor) csr.execute( """ delete from features """) t1 = time.time() print(t1 - t0, "deleted", csr.rowcount, "rows") csr.execute( """ insert into features( type, length, feature, spam_count, ham_count, spam_prob, interesting ) with m as ( select count(*) filter(where type='spam') as spam_message_count, count(*) filter(where type='ham') as ham_message_count from messages ), f as ( select f.type, length, feature, count(*) filter (where m.type = 'spam') as spam_count, count(*) filter (where m.type = 'ham') as ham_count from message_features f join messages m on (f.message = m.id) group by f.type, length, feature ), f1 as ( select type, length, feature, spam_count, ham_count, (spam_count + 1.0) / (spam_message_count + 1.0) as spam_ratio, (ham_count + 1.0) / (ham_message_count + 1.0) as ham_ratio from f, m ), p as ( select type, length, feature, spam_count, ham_count, spam_ratio / (spam_ratio + ham_ratio) as spam_prob from f1 ), p2 as ( select type, length, feature, spam_count, ham_count, spam_prob, abs(spam_prob - 0.5) as interesting from p ) select * from p2 order by interesting desc """) t1 = time.time() print(t1 - t0, "inserted", csr.rowcount, "rows") db.commit()