From 5294100b2c5ce7aa16c308649e6e26e75b388fc4 Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Sun, 31 Mar 2019 23:48:57 +0200 Subject: [PATCH] Handle multipart/related For multipart related we need to be able to reference the other parts from the root part by content-id, so we need to pass an argument with the necessary information (imaginatively called "extra") to the render function. Of course since this is called indirectly, every render function needs to accept an extra argument, even if only render_text_html uses it. --- mbox2web | 92 ++++++++++++++++++--------- templates/body_multipart_related.html | 10 +++ 2 files changed, 72 insertions(+), 30 deletions(-) create mode 100644 templates/body_multipart_related.html diff --git a/mbox2web b/mbox2web index d5d48dd..19c4f7a 100755 --- a/mbox2web +++ b/mbox2web @@ -74,26 +74,30 @@ def save_part(msg, disposition): os.makedirs("parts", exist_ok=True) with open("parts/" + filename, "wb") as fh: fh.write(payload) - template_name = disposition + "_" + content_type.replace("/", "_") + ".html" - bodytmpl = jenv.get_template(template_name) - context = { - "name": name, - "url": "../../parts/" + filename, - } - bodyhtml = bodytmpl.render(context) - return bodyhtml + url = "../../parts/" + filename + if disposition == "_url": + return url + else: + template_name = disposition + "_" + content_type.replace("/", "_") + ".html" + bodytmpl = jenv.get_template(template_name) + context = { + "name": name, + "url": url, + } + bodyhtml = bodytmpl.render(context) + return bodyhtml partial_message_cache = {} -def render_body(msg): - def render_text_plain(msg): +def render_body(msg, extra=None): + def render_text_plain(msg, extra=None): bodytmpl = jenv.get_template("body_text_plain.html") context = { "body": msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1") } return bodytmpl.render(context) - def render_multipart_mixed(msg): + def render_multipart_mixed(msg, extra=None): parts = msg.get_payload() if type(parts) == str: # mislabelled, assume text/plain @@ -106,7 +110,7 @@ def render_body(msg): "parts": partshtml } return bodytmpl.render(context) - def render_multipart_digest(msg): + def render_multipart_digest(msg, extra=None): partshtml = [] for part in msg.get_payload(): partshtml.append(render_message(part)) @@ -115,7 +119,7 @@ def render_body(msg): "parts": partshtml } return bodytmpl.render(context) - def render_message_rfc822(msg): + def render_message_rfc822(msg, extra=None): partshtml = [] for part in msg.get_payload(): partshtml.append(render_message(part)) @@ -124,15 +128,15 @@ def render_body(msg): "parts": partshtml } return bodytmpl.render(context) - def render_text_html(msg): - htmlpart = HTMLPart() + def render_text_html(msg, extra=None): + htmlpart = HTMLPart(extra) htmlpart.feed(msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1")) bodytmpl = jenv.get_template("body_text_html.html") context = { "body": jinja2.Markup(htmlpart.as_string()) } return bodytmpl.render(context) - def render_text_enriched(msg): + def render_text_enriched(msg, extra=None): payload = msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1") tepart = TextEnrichedPart(payload) bodytmpl = jenv.get_template("body_text_enriched.html") @@ -140,7 +144,7 @@ def render_body(msg): "body": jinja2.Markup(tepart.as_string()) } return bodytmpl.render(context) - def render_message_partial(msg): + def render_message_partial(msg, extra=None): # Default header for get_param is Content-Type whole_msg_id = msg.get_param("id") if not whole_msg_id in partial_message_cache: @@ -166,10 +170,10 @@ def render_body(msg): encode_message_id(whole_msg_id), html.escape(whole_msg_id)) - def render_application_octet_stream(msg): + def render_application_octet_stream(msg, extra=None): return save_part(msg, "attachment") - def render_multipart_signed(msg): + def render_multipart_signed(msg, extra=None): content, signature = msg.get_payload() with tempfile.NamedTemporaryFile(buffering=0) as content_fh: content_fh.write(content.as_bytes()) @@ -193,7 +197,7 @@ def render_body(msg): } return bodytmpl.render(context) - def render_application_pgp(msg): + def render_application_pgp(msg, extra=None): with tempfile.NamedTemporaryFile(buffering=0) as content_fh: content_fh.write(msg.get_payload(decode=True)) r = subprocess.run(["gpg", "--decrypt", content_fh.name], @@ -216,7 +220,7 @@ def render_body(msg): } return bodytmpl.render(context) - def render_multipart_alternative(msg): + def render_multipart_alternative(msg, extra=None): partshtml = [] partstypes = [] for part in msg.get_payload(): @@ -229,24 +233,24 @@ def render_body(msg): } return bodytmpl.render(context) - def render_application_x_unknown_content_type_scpfile(msg): + def render_application_x_unknown_content_type_scpfile(msg, extra=None): bodytmpl = jenv.get_template("body_application_x-unknown-content-type-scpfile.html") context = { "body": msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1") } return bodytmpl.render(context) - def render_application_pgp_signature(msg): + def render_application_pgp_signature(msg, extra=None): # A PGP signature outside of a multipart/signed - useless bodytmpl = jenv.get_template("body_application_pgp-signature.html") context = { } return bodytmpl.render(context) - def render_application_x_gzip(msg): + def render_application_x_gzip(msg, extra=None): return save_part(msg, "attachment") - def render_message_news(msg): + def render_message_news(msg, extra=None): partshtml = [] for part in msg.get_payload(): partshtml.append(render_message(part)) @@ -257,9 +261,36 @@ def render_body(msg): } return bodytmpl.render(context) - def render_image_gif(msg): + def render_image_gif(msg, extra=None): return save_part(msg, "inline") + def render_multipart_related(msg, extra=None): + start = msg.get_param("start") + start_part = None + # collect content-ids + content = {} + for i, part in enumerate(msg.get_payload()): + content_id = part.get("Content-Id") + if start_part is None and (start is None or content_id == start): + start_part = part + continue + if content_id: + content[content_id] = { + "i": i, + "part": part, + "url": save_part(part, "_url"), + } + + + + parthtml = render_body(start_part, content) + bodytmpl = jenv.get_template("body_multipart_related.html") + context = { + "msg": msg, + "parts": [parthtml], + } + return bodytmpl.render(context) + renderers = { "text/plain": render_text_plain, "multipart/mixed": render_multipart_mixed, @@ -277,6 +308,7 @@ def render_body(msg): "application/x-gzip": render_application_x_gzip, "message/news": render_message_news, "image/gif": render_image_gif, + "multipart/related": render_multipart_related, } content_type = msg.get_content_type() content_disposition = msg.get_content_disposition() @@ -287,7 +319,7 @@ def render_body(msg): # and not try to display. bodyhtml = save_part(msg, content_disposition) else: - bodyhtml = renderers[content_type](msg) + bodyhtml = renderers[content_type](msg, extra) return jinja2.Markup(bodyhtml) @@ -323,11 +355,12 @@ class HTMLPart(html.parser.HTMLParser): hide_tags = [ "title" ] ignore_tags = [ "html", "head", "body", "marquee", "meta", "form", ] - def __init__(self): + def __init__(self, extra): super().__init__() self.hide = False self.content = [] self.base = None + self.extra = extra or {} def handle_starttag(self, tag, attrs): if tag == "base": @@ -388,8 +421,7 @@ class HTMLPart(html.parser.HTMLParser): u = urllib.parse.urlparse(url) if u[0] == "cid": print("Encountered src cid attribute", a, file=sys.stderr) - # XXX - implement cid - clean_attrs.append((a[0], url)) + clean_attrs.append((a[0], self.extra["<" + u.path + ">"]["url"])) else: print("Ignored src attribute", a, file=sys.stderr) elif a[0] == "target": diff --git a/templates/body_multipart_related.html b/templates/body_multipart_related.html new file mode 100644 index 0000000..4f0222e --- /dev/null +++ b/templates/body_multipart_related.html @@ -0,0 +1,10 @@ +
+
+ multipart/related +
+
+ {% for part in parts %} + {{part}} + {% endfor %} +
+