Tolerate decoding errors

Sometimes the charset is just wrong, or it uses a non-standard name. Try to do something useful in these cases.
2019-05-20 23:06:08 +02:00 · 2019-05-20 23:06:08 +02:00 · 5e96a73744
parent b1dbb3d40c
commit 5e96a73744
1 changed files with 15 additions and 2 deletions
--- a/17
+++ b/17
@ -136,8 +136,16 @@ def render_body(msg, extra=None):
        format = ct_params.get("format", "fixed")
        if format == "fixed":
            bodytmpl = jenv.get_template("body_text_plain.html")
+            partbytes = msg.get_payload(decode=True)
+            try:
+                parttext = partbytes.decode(charset, errors="replace")
+            except LookupError as e:
+                # Unknown encoding? Probably win-1252
+                print(e, file=sys.stderr)
+                parttext = partbytes.decode("windows-1252", errors="replace")
+
            context = {
-                "body": msg.get_payload(decode=True).decode(charset)
+                "body": parttext
            }
            return bodytmpl.render(context)
        elif format == "flowed":
@ -653,7 +661,12 @@ class TextFlowedPart:
        charset = ct_params.get("charset", "iso-8859-1")
        format = ct_params.get("format", "fixed")
        delsp = ct_params.get("delsp", "no") == "yes"
-        raw_text = msg.get_payload(decode=True).decode(charset)
+        charset_map = {
+            "x-mac-roman": "mac_roman",
+        }
+        if charset in charset_map:
+            charset = charset_map[charset]
+        raw_text = msg.get_payload(decode=True).decode(charset, errors="replace")
        raw_lines = raw_text.split("\n")

        for rl in raw_lines: