329 lines
10 KiB
Plaintext
329 lines
10 KiB
Plaintext
|
#!/usr/bin/python3
|
||
|
|
||
|
import datetime
|
||
|
import email.utils
|
||
|
import mailbox
|
||
|
import pdb
|
||
|
import re
|
||
|
import sys
|
||
|
|
||
|
|
||
|
def get_message_id(msg):
|
||
|
"""
|
||
|
Extract the message id from a message
|
||
|
|
||
|
Note that this assumes that there is (at least) one message id. If
|
||
|
this is not the case, it will raise an exception (currently an
|
||
|
IndexError, but we may use something more suitable in the future).
|
||
|
"""
|
||
|
match = re.search(r'<(.*?)>', msg["Message-ID"])
|
||
|
return match.group(1)
|
||
|
|
||
|
|
||
|
def encode_message_id(msgid):
|
||
|
encmsgid = re.sub('[^!"$(-.0-9:=@-z|~]', lambda x: "{%02x}" % (ord(x.group(0))), msgid)
|
||
|
return encmsgid
|
||
|
|
||
|
|
||
|
class Message:
|
||
|
def __init__(self, msgid, in_reply_to, references, date, mfrom, subject):
|
||
|
self.msgid = msgid
|
||
|
self.in_reply_to = in_reply_to
|
||
|
self.references = references
|
||
|
self.date = date
|
||
|
self.mfrom = mfrom
|
||
|
self.subject = subject
|
||
|
self.kids = False
|
||
|
if self.date.tzinfo is None:
|
||
|
# If timezone is missing, assume local time
|
||
|
self.date = self.date.astimezone()
|
||
|
|
||
|
def __repr__(self):
|
||
|
return (
|
||
|
self.msgid + " " +
|
||
|
self.date.strftime("%Y-%m-%d %H:%M:%S%z") +
|
||
|
" [" + ", ".join(self.references) + "]"
|
||
|
)
|
||
|
|
||
|
|
||
|
msg2thread = {}
|
||
|
|
||
|
class Thread:
|
||
|
def __init__(self):
|
||
|
self.messages = {}
|
||
|
self.threadid = None
|
||
|
|
||
|
def add_message(self, msg):
|
||
|
self.messages[msg.msgid] = msg
|
||
|
msg2thread[msg.msgid] = self
|
||
|
|
||
|
def merge_thread(self, other):
|
||
|
for msg in other.messages.values():
|
||
|
self.add_message(msg)
|
||
|
|
||
|
def __repr__(self):
|
||
|
if self.threadid:
|
||
|
s = self.threadid
|
||
|
else:
|
||
|
s = str(id(self))
|
||
|
if self.messages:
|
||
|
s += " {" + ", ".join(self.messages.keys()) + "}"
|
||
|
return s
|
||
|
|
||
|
def fixup_in_reply_tos(self):
|
||
|
# Fix up some problems with in_reply_to:
|
||
|
# Sometimes an in_reply_to refers to a message which isn't in the
|
||
|
# archive. Add a dummy message if this happens.
|
||
|
# Sometimes an in_reply_to refers to a message with a later date.
|
||
|
# In this case one of the two date headers must be wrong. We could try
|
||
|
# to analyze other headers (especially received), but for now we just
|
||
|
# assume that it is the referrer (although in the example I'm
|
||
|
# currently looking at it is the referree) and adjust that. We should
|
||
|
# preserve the original date header, though. Use separate sort_date and
|
||
|
# date?
|
||
|
missing = set()
|
||
|
for m in self.messages.values():
|
||
|
for r in m.in_reply_to:
|
||
|
if r not in self.messages:
|
||
|
missing.add(r)
|
||
|
for r in missing:
|
||
|
firstdate = sorted(self.messages.values(), key=lambda x: x.date)[0].date
|
||
|
missingdate = firstdate - datetime.timedelta(seconds=1)
|
||
|
self.add_message(
|
||
|
Message(r, [], [],
|
||
|
missingdate,
|
||
|
"unknown@invalid", "(not in archive)")
|
||
|
)
|
||
|
dates_ok = False
|
||
|
while not dates_ok:
|
||
|
dates_ok = True
|
||
|
for m in self.messages.values():
|
||
|
for r in m.in_reply_to:
|
||
|
rr = self.messages[r]
|
||
|
if rr.date >= m.date:
|
||
|
m.date = rr.date + datetime.timedelta(seconds=1)
|
||
|
dates_ok = False
|
||
|
|
||
|
|
||
|
|
||
|
def as_html(self):
|
||
|
self.fixup_in_reply_tos()
|
||
|
y = 0
|
||
|
x = 0
|
||
|
nodes = []
|
||
|
edges = []
|
||
|
lines = []
|
||
|
for m in sorted(self.messages.values(), key=lambda x: x.date):
|
||
|
# We have already fudged the in_reply_to field to always contain
|
||
|
# the latest reference(s), so we only need to consider that
|
||
|
if len(m.in_reply_to) == 0:
|
||
|
if y == 0:
|
||
|
# first message in thread
|
||
|
# Just add a node
|
||
|
nodes.append((x, y))
|
||
|
m.x = x
|
||
|
m.y = y
|
||
|
else:
|
||
|
# Not in reply to anything, but not the start of the thread
|
||
|
# either. This will happen if fixup_in_reply_tos adds more
|
||
|
# than one dummy message, but it might also happen if we
|
||
|
# use different criteria for matching threads (e.g. Subject
|
||
|
# or Thread-Index)
|
||
|
# Just start a new column to get out of the way
|
||
|
x += 1
|
||
|
nodes.append((x, y))
|
||
|
m.x = x
|
||
|
m.y = y
|
||
|
|
||
|
elif len(m.in_reply_to) == 1:
|
||
|
p = self.messages[m.in_reply_to[0]]
|
||
|
if p.kids:
|
||
|
# The parent already has kids, so we must move to the side
|
||
|
# to avoid running an edge through an existing kid. We
|
||
|
# could use a sophisticated algorithm to find the best
|
||
|
# position here, but I think it sufficient to just start a
|
||
|
# new column. This may waste some space (there might have
|
||
|
# been a suitable position in the existing columns, but it
|
||
|
# will avoid collisions and is very simple.
|
||
|
x += 1
|
||
|
m.x = x
|
||
|
m.y = y
|
||
|
else:
|
||
|
# Just put the new kid directly below the parent
|
||
|
m.x = p.x
|
||
|
m.y = y
|
||
|
nodes.append((m.x, m.y))
|
||
|
edges.append((p.x, p.y, m.x, m.y))
|
||
|
p.kids = True
|
||
|
else:
|
||
|
# Generic case with multiple references.
|
||
|
# I think this should always work well if we start a new
|
||
|
# column. There may be special cases where we can avoid it, not
|
||
|
# sure.
|
||
|
x += 1
|
||
|
m.x = x
|
||
|
m.y = y
|
||
|
nodes.append((m.x, m.y))
|
||
|
for r in m.in_reply_to:
|
||
|
p = self.messages[r]
|
||
|
edges.append((p.x, p.y, m.x, m.y))
|
||
|
lines.append((m.date, m.mfrom, m.subject))
|
||
|
y += 1
|
||
|
s = "<table class='thread'>"
|
||
|
s += "<tr>"
|
||
|
s += f"<td rowspan={y}>"
|
||
|
|
||
|
r = 4
|
||
|
fx = 16
|
||
|
fy = 32
|
||
|
s += f"<svg width={(x + 1) * fx} height={y * fy}>"
|
||
|
for e in edges:
|
||
|
if e[0] == e[2]:
|
||
|
s += f"<line x1={e[0] * fx + fx/2} y1={e[1] * fy + fy/2} x2={e[2] * fx + fx/2} y2={e[3] * fy + fy/2} stroke='black' />"
|
||
|
else:
|
||
|
if e[3] == e[1] + 1:
|
||
|
yc = (e[1] + e[2]) / 2
|
||
|
else:
|
||
|
yc = e[1] + 1
|
||
|
s += f"<path d='M {e[0] * fx + fx/2} {e[1] * fy + fy/2} Q {e[2] * fx + fx/2} {yc * fy + fy/2} {e[2] * fx + fx/2} {e[3] * fy + fy/2}' stroke='black' fill='none' />"
|
||
|
for n in nodes:
|
||
|
s += f"<circle cx={n[0] * fx + fx/2} cy={n[1] * fy + fy/2} r={r} />"
|
||
|
s += "</svg>"
|
||
|
s += "</td>"
|
||
|
|
||
|
# XXX - escape!
|
||
|
s += f"<td class='date'>{lines[0][0]}</td>"
|
||
|
s += f"<td class='from'>{lines[0][1]}</td>"
|
||
|
s += f"<td class='subject'>{lines[0][2]}</td>"
|
||
|
s += "</tr>"
|
||
|
|
||
|
for ln in lines[1:]:
|
||
|
s += "<tr>"
|
||
|
s += f"<td class='date'>{ln[0]}</td>"
|
||
|
s += f"<td class='from'>{ln[1]}</td>"
|
||
|
s += f"<td class='subject'>{ln[2]}</td>"
|
||
|
s += "</tr>"
|
||
|
s += "</table>"
|
||
|
return s
|
||
|
|
||
|
|
||
|
def add_message(msg):
|
||
|
mid = get_message_id(msg)
|
||
|
print("M", mid, file=sys.stderr)
|
||
|
encmid = encode_message_id(mid)
|
||
|
|
||
|
date = email.utils.parsedate_to_datetime(msg["Date"])
|
||
|
|
||
|
# In-Reply-To headers with more than one message-id are rare, but
|
||
|
# standard-conforming, and some MUAs (e.g., mutt) create them.
|
||
|
in_reply_to = msg["In-Reply-To"]
|
||
|
if in_reply_to:
|
||
|
if isinstance(in_reply_to, email.header.Header):
|
||
|
in_reply_to = in_reply_to.encode()
|
||
|
in_reply_to_msgids = re.findall(r'<(.*?)>', in_reply_to)
|
||
|
else:
|
||
|
in_reply_to_msgids = []
|
||
|
|
||
|
references = msg["References"]
|
||
|
if references:
|
||
|
references_msgids = re.findall(r'<(.*?)>', references)
|
||
|
else:
|
||
|
references_msgids = []
|
||
|
|
||
|
for msgid in in_reply_to_msgids:
|
||
|
if msgid not in references_msgids:
|
||
|
references_msgids.append(msgid)
|
||
|
if not in_reply_to_msgids and references_msgids:
|
||
|
in_reply_to_msgid = [references_msgids[-1]]
|
||
|
t = Thread()
|
||
|
t.add_message(
|
||
|
Message(
|
||
|
mid,
|
||
|
in_reply_to_msgids, references_msgids,
|
||
|
date,
|
||
|
msg["From"], msg["Subject"]))
|
||
|
|
||
|
|
||
|
|
||
|
for f in sys.argv[1:]:
|
||
|
print("F", f, file=sys.stderr)
|
||
|
mb = mailbox.mbox(f)
|
||
|
|
||
|
for m in mb:
|
||
|
add_message(m)
|
||
|
|
||
|
# Now I have a lot of 1 message threads
|
||
|
# Merge them
|
||
|
|
||
|
finished = False
|
||
|
while not finished:
|
||
|
finished = True
|
||
|
for msgid in list(msg2thread.keys()):
|
||
|
thread = msg2thread[msgid]
|
||
|
for msgid2 in list(thread.messages.keys()):
|
||
|
msg = thread.messages[msgid2]
|
||
|
for r in msg.references:
|
||
|
if r in thread.messages:
|
||
|
pass
|
||
|
else:
|
||
|
# references may contain non-existant messages, so
|
||
|
# be careful:
|
||
|
if r in msg2thread:
|
||
|
thread.merge_thread(msg2thread[r])
|
||
|
finished = False
|
||
|
|
||
|
thread_list = []
|
||
|
for thread in msg2thread.values():
|
||
|
if thread.threadid:
|
||
|
continue
|
||
|
messages = iter(thread.messages.values())
|
||
|
msg = next(messages)
|
||
|
thread.date = msg.date
|
||
|
thread.threadid = msg.msgid
|
||
|
for msg in messages:
|
||
|
if msg.date < thread.date:
|
||
|
thread.threadid = msg.msgid
|
||
|
thread.date = msg.date
|
||
|
thread_list.append(thread)
|
||
|
|
||
|
print("""
|
||
|
<!DOCTYPE html>
|
||
|
<html>
|
||
|
|
||
|
<head>
|
||
|
<meta charset="utf-8">
|
||
|
<style>
|
||
|
table.thread {
|
||
|
border-collapse: collapse;
|
||
|
}
|
||
|
table.thread tr {
|
||
|
height: 32px;
|
||
|
font-size: 16px;
|
||
|
background-color: #EFE;
|
||
|
}
|
||
|
table.thread td {
|
||
|
overflow: hidden;
|
||
|
white-space: nowrap;
|
||
|
}
|
||
|
.timestamp {
|
||
|
width: 8em;
|
||
|
padding-right: 0.5em;
|
||
|
}
|
||
|
.from {
|
||
|
max-width: 8em;
|
||
|
padding-left: 0.5em;
|
||
|
padding-right: 0.5em;
|
||
|
}
|
||
|
.subject {
|
||
|
max-width: 10em;
|
||
|
padding-left: 0.5em;
|
||
|
}
|
||
|
</style>
|
||
|
</head>
|
||
|
<body>
|
||
|
""")
|
||
|
for thread in sorted(thread_list, key=lambda x: x.date):
|
||
|
print(thread.as_html())
|
||
|
|
||
|
# vim: tw=79
|