Convert mbox files to standalone html files
No thread or date structure, just one isolated file per message. Only text/plain and some multipart formats
This commit is contained in:
parent
80352f727f
commit
b238c56edb
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import jinja2
|
||||
import mailbox
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
basedir = "."
|
||||
|
||||
jenv = jinja2.Environment(
|
||||
loader=jinja2.FileSystemLoader(["templates"]),
|
||||
autoescape=True,
|
||||
)
|
||||
|
||||
def get_message_id(msg):
|
||||
"""
|
||||
Extract the message id from a message
|
||||
|
||||
Note that this assumes that there is (at least) one message id. If
|
||||
this is not the case, it will raise an exception (currently an
|
||||
IndexError, but we may use something more suitable in the future).
|
||||
"""
|
||||
match = re.search(r'<(.*?)>', msg["Message-ID"])
|
||||
return match.group(1)
|
||||
|
||||
def encode_message_id(msgid):
|
||||
encmsgid = re.sub('[^!"$-.0-9:=@-z|~]', lambda x: "{%02x}" % (ord(x.group(0))), msgid)
|
||||
return encmsgid
|
||||
|
||||
def render_message(msg):
|
||||
msgtmpl = jenv.get_template("message2.html")
|
||||
bodyhtml = render_body(msg)
|
||||
context = {
|
||||
"message_id": msg["Message-Id"],
|
||||
"subject": msg["Subject"],
|
||||
"from": msg["From"],
|
||||
"date": msg["Date"],
|
||||
"bodyhtml": bodyhtml,
|
||||
}
|
||||
msghtml = msgtmpl.render(context)
|
||||
return jinja2.Markup(msghtml)
|
||||
|
||||
def render_body(msg):
|
||||
content_type = msg.get_content_type()
|
||||
if content_type == "text/plain":
|
||||
bodytmpl = jenv.get_template("body_text_plain.html")
|
||||
context = {
|
||||
"body": msg.get_payload()
|
||||
}
|
||||
bodyhtml = bodytmpl.render(context)
|
||||
return jinja2.Markup(bodyhtml)
|
||||
elif content_type == "multipart/mixed":
|
||||
partshtml = []
|
||||
for part in msg.get_payload():
|
||||
partshtml.append(render_body(part))
|
||||
bodytmpl = jenv.get_template("body_multipart_mixed.html")
|
||||
context = {
|
||||
"parts": partshtml
|
||||
}
|
||||
bodyhtml = bodytmpl.render(context)
|
||||
return jinja2.Markup(bodyhtml)
|
||||
elif content_type == "multipart/digest":
|
||||
partshtml = []
|
||||
for part in msg.get_payload():
|
||||
partshtml.append(render_message(part))
|
||||
bodytmpl = jenv.get_template("body_multipart_digest.html")
|
||||
context = {
|
||||
"parts": partshtml
|
||||
}
|
||||
bodyhtml = bodytmpl.render(context)
|
||||
return jinja2.Markup(bodyhtml)
|
||||
elif content_type == "message/rfc822":
|
||||
partshtml = []
|
||||
for part in msg.get_payload():
|
||||
partshtml.append(render_message(part))
|
||||
bodytmpl = jenv.get_template("body_message_rfc822.html")
|
||||
context = {
|
||||
"parts": partshtml
|
||||
}
|
||||
bodyhtml = bodytmpl.render(context)
|
||||
return jinja2.Markup(bodyhtml)
|
||||
else:
|
||||
raise RuntimeError("Content-type " + content_type + " not implemented yet")
|
||||
|
||||
|
||||
def archive(msg):
|
||||
mid = get_message_id(msg)
|
||||
encmid = encode_message_id(mid)
|
||||
msgdir = basedir + "/msg/" + encmid
|
||||
os.makedirs(msgdir, exist_ok=True)
|
||||
with open(msgdir + "/index.html", "w") as hfd:
|
||||
msgtmpl = jenv.get_template("message.html")
|
||||
bodyhtml = render_body(msg)
|
||||
context = {
|
||||
"list": "LUGA",
|
||||
"message_id": mid,
|
||||
"subject": msg["Subject"],
|
||||
"from": msg["From"],
|
||||
"date": msg["Date"],
|
||||
"bodyhtml": bodyhtml,
|
||||
}
|
||||
msghtml = msgtmpl.render(context)
|
||||
hfd.write(msghtml)
|
||||
|
||||
|
||||
for f in sys.argv[1:]:
|
||||
print("F", f)
|
||||
mb = mailbox.mbox(f)
|
||||
|
||||
for m in mb:
|
||||
archive(m)
|
||||
|
Loading…
Reference in New Issue