diff --git a/reports/column_stats b/reports/column_stats new file mode 100755 index 0000000..0dca338 --- /dev/null +++ b/reports/column_stats @@ -0,0 +1,32 @@ +#!/usr/bin/python3 +import argparse +import psycopg2 +import psycopg2.extras + +ap = argparse.ArgumentParser() +ap.add_argument("--dbname") +ap.add_argument("--user") +ap.add_argument("--host") +ap.add_argument("--schema", default="public") +ap.add_argument("table") + +args = ap.parse_args() + +db = psycopg2.connect(host=args.host, dbname=args.dbname, user=args.user) +csr = db.cursor(cursor_factory=psycopg2.extras.NamedTupleCursor) +csr.execute( + """ + select * from information_schema.columns + where table_schema = %s and table_name = %s + order by ordinal_position + """, + (args.schema, args.table,)) +for c in csr.fetchall(): + q = "select" \ + f" min(pg_column_size({c.column_name}))," \ + f" avg(pg_column_size({c.column_name}))," \ + f" max(pg_column_size({c.column_name}))" \ + f" from {args.schema}.{args.table}" + csr.execute(q) + r = csr.fetchone() + print(f"{c.column_name:7s}: {c.data_type:17s} {r.min} {r.avg:4.1f} {r.max:3d}")