diff --git a/mbox2web b/mbox2web index d5d48dd..19c4f7a 100755 --- a/mbox2web +++ b/mbox2web @@ -74,26 +74,30 @@ def save_part(msg, disposition): os.makedirs("parts", exist_ok=True) with open("parts/" + filename, "wb") as fh: fh.write(payload) - template_name = disposition + "_" + content_type.replace("/", "_") + ".html" - bodytmpl = jenv.get_template(template_name) - context = { - "name": name, - "url": "../../parts/" + filename, - } - bodyhtml = bodytmpl.render(context) - return bodyhtml + url = "../../parts/" + filename + if disposition == "_url": + return url + else: + template_name = disposition + "_" + content_type.replace("/", "_") + ".html" + bodytmpl = jenv.get_template(template_name) + context = { + "name": name, + "url": url, + } + bodyhtml = bodytmpl.render(context) + return bodyhtml partial_message_cache = {} -def render_body(msg): - def render_text_plain(msg): +def render_body(msg, extra=None): + def render_text_plain(msg, extra=None): bodytmpl = jenv.get_template("body_text_plain.html") context = { "body": msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1") } return bodytmpl.render(context) - def render_multipart_mixed(msg): + def render_multipart_mixed(msg, extra=None): parts = msg.get_payload() if type(parts) == str: # mislabelled, assume text/plain @@ -106,7 +110,7 @@ def render_body(msg): "parts": partshtml } return bodytmpl.render(context) - def render_multipart_digest(msg): + def render_multipart_digest(msg, extra=None): partshtml = [] for part in msg.get_payload(): partshtml.append(render_message(part)) @@ -115,7 +119,7 @@ def render_body(msg): "parts": partshtml } return bodytmpl.render(context) - def render_message_rfc822(msg): + def render_message_rfc822(msg, extra=None): partshtml = [] for part in msg.get_payload(): partshtml.append(render_message(part)) @@ -124,15 +128,15 @@ def render_body(msg): "parts": partshtml } return bodytmpl.render(context) - def render_text_html(msg): - htmlpart = HTMLPart() + def render_text_html(msg, extra=None): + htmlpart = HTMLPart(extra) htmlpart.feed(msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1")) bodytmpl = jenv.get_template("body_text_html.html") context = { "body": jinja2.Markup(htmlpart.as_string()) } return bodytmpl.render(context) - def render_text_enriched(msg): + def render_text_enriched(msg, extra=None): payload = msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1") tepart = TextEnrichedPart(payload) bodytmpl = jenv.get_template("body_text_enriched.html") @@ -140,7 +144,7 @@ def render_body(msg): "body": jinja2.Markup(tepart.as_string()) } return bodytmpl.render(context) - def render_message_partial(msg): + def render_message_partial(msg, extra=None): # Default header for get_param is Content-Type whole_msg_id = msg.get_param("id") if not whole_msg_id in partial_message_cache: @@ -166,10 +170,10 @@ def render_body(msg): encode_message_id(whole_msg_id), html.escape(whole_msg_id)) - def render_application_octet_stream(msg): + def render_application_octet_stream(msg, extra=None): return save_part(msg, "attachment") - def render_multipart_signed(msg): + def render_multipart_signed(msg, extra=None): content, signature = msg.get_payload() with tempfile.NamedTemporaryFile(buffering=0) as content_fh: content_fh.write(content.as_bytes()) @@ -193,7 +197,7 @@ def render_body(msg): } return bodytmpl.render(context) - def render_application_pgp(msg): + def render_application_pgp(msg, extra=None): with tempfile.NamedTemporaryFile(buffering=0) as content_fh: content_fh.write(msg.get_payload(decode=True)) r = subprocess.run(["gpg", "--decrypt", content_fh.name], @@ -216,7 +220,7 @@ def render_body(msg): } return bodytmpl.render(context) - def render_multipart_alternative(msg): + def render_multipart_alternative(msg, extra=None): partshtml = [] partstypes = [] for part in msg.get_payload(): @@ -229,24 +233,24 @@ def render_body(msg): } return bodytmpl.render(context) - def render_application_x_unknown_content_type_scpfile(msg): + def render_application_x_unknown_content_type_scpfile(msg, extra=None): bodytmpl = jenv.get_template("body_application_x-unknown-content-type-scpfile.html") context = { "body": msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1") } return bodytmpl.render(context) - def render_application_pgp_signature(msg): + def render_application_pgp_signature(msg, extra=None): # A PGP signature outside of a multipart/signed - useless bodytmpl = jenv.get_template("body_application_pgp-signature.html") context = { } return bodytmpl.render(context) - def render_application_x_gzip(msg): + def render_application_x_gzip(msg, extra=None): return save_part(msg, "attachment") - def render_message_news(msg): + def render_message_news(msg, extra=None): partshtml = [] for part in msg.get_payload(): partshtml.append(render_message(part)) @@ -257,9 +261,36 @@ def render_body(msg): } return bodytmpl.render(context) - def render_image_gif(msg): + def render_image_gif(msg, extra=None): return save_part(msg, "inline") + def render_multipart_related(msg, extra=None): + start = msg.get_param("start") + start_part = None + # collect content-ids + content = {} + for i, part in enumerate(msg.get_payload()): + content_id = part.get("Content-Id") + if start_part is None and (start is None or content_id == start): + start_part = part + continue + if content_id: + content[content_id] = { + "i": i, + "part": part, + "url": save_part(part, "_url"), + } + + + + parthtml = render_body(start_part, content) + bodytmpl = jenv.get_template("body_multipart_related.html") + context = { + "msg": msg, + "parts": [parthtml], + } + return bodytmpl.render(context) + renderers = { "text/plain": render_text_plain, "multipart/mixed": render_multipart_mixed, @@ -277,6 +308,7 @@ def render_body(msg): "application/x-gzip": render_application_x_gzip, "message/news": render_message_news, "image/gif": render_image_gif, + "multipart/related": render_multipart_related, } content_type = msg.get_content_type() content_disposition = msg.get_content_disposition() @@ -287,7 +319,7 @@ def render_body(msg): # and not try to display. bodyhtml = save_part(msg, content_disposition) else: - bodyhtml = renderers[content_type](msg) + bodyhtml = renderers[content_type](msg, extra) return jinja2.Markup(bodyhtml) @@ -323,11 +355,12 @@ class HTMLPart(html.parser.HTMLParser): hide_tags = [ "title" ] ignore_tags = [ "html", "head", "body", "marquee", "meta", "form", ] - def __init__(self): + def __init__(self, extra): super().__init__() self.hide = False self.content = [] self.base = None + self.extra = extra or {} def handle_starttag(self, tag, attrs): if tag == "base": @@ -388,8 +421,7 @@ class HTMLPart(html.parser.HTMLParser): u = urllib.parse.urlparse(url) if u[0] == "cid": print("Encountered src cid attribute", a, file=sys.stderr) - # XXX - implement cid - clean_attrs.append((a[0], url)) + clean_attrs.append((a[0], self.extra["<" + u.path + ">"]["url"])) else: print("Ignored src attribute", a, file=sys.stderr) elif a[0] == "target": diff --git a/templates/body_multipart_related.html b/templates/body_multipart_related.html new file mode 100644 index 0000000..4f0222e --- /dev/null +++ b/templates/body_multipart_related.html @@ -0,0 +1,10 @@ +
+
+ multipart/related +
+
+ {% for part in parts %} + {{part}} + {% endfor %} +
+