From 8d78b2ec2670df5602703bb59693259f2e21f710 Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Tue, 30 Apr 2019 21:55:21 +0200 Subject: [PATCH] Handle RFC 2047 encoded headers --- mbox2web | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/mbox2web b/mbox2web index 4a22c10..53b6a4c 100755 --- a/mbox2web +++ b/mbox2web @@ -1,5 +1,6 @@ #!/usr/bin/python3 +import email.header import email.parser import hashlib import html @@ -32,18 +33,39 @@ def get_message_id(msg): match = re.search(r'<(.*?)>', msg["Message-ID"]) return match.group(1) + def encode_message_id(msgid): encmsgid = re.sub('[^!"$(-.0-9:=@-z|~]', lambda x: "{%02x}" % (ord(x.group(0))), msgid) return encmsgid + +def decode_rfc2047(s): + if s is None: + return None + r = "" + for chunk in email.header.decode_header(s): + if chunk[1]: + try: + r += chunk[0].decode(chunk[1]) + except LookupError: + r += chunk[0].decode("windows-1252") + except UnicodeDecodeError: + r += chunk[0].decode("windows-1252") + elif type(chunk[0]) == bytes: + r += chunk[0].decode('us-ascii') + else: + r += chunk[0] + return r + + def render_message(msg): msgtmpl = jenv.get_template("message2.html") bodyhtml = render_body(msg) context = { "msg": msg, "message_id": msg["Message-Id"], - "subject": msg["Subject"], - "from": msg["From"], + "subject": decode_rfc2047(msg["Subject"]), + "from": decode_rfc2047(msg["From"]), "date": msg["Date"], "bodyhtml": bodyhtml, } @@ -338,8 +360,8 @@ def archive(msg): context = { "list": "LUGA", "message_id": mid, - "subject": msg["Subject"], - "from": msg["From"], + "subject": decode_rfc2047(msg["Subject"]), + "from": decode_rfc2047(msg["From"]), "date": msg["Date"], "bodyhtml": bodyhtml, }