Tolerate decoding errors

Sometimes the charset is just wrong, or it uses a non-standard name. Try
to do something useful in these cases.
This commit is contained in:
Peter J. Holzer 2019-05-20 23:06:08 +02:00
parent b1dbb3d40c
commit 5e96a73744
1 changed files with 15 additions and 2 deletions

View File

@ -136,8 +136,16 @@ def render_body(msg, extra=None):
format = ct_params.get("format", "fixed") format = ct_params.get("format", "fixed")
if format == "fixed": if format == "fixed":
bodytmpl = jenv.get_template("body_text_plain.html") bodytmpl = jenv.get_template("body_text_plain.html")
partbytes = msg.get_payload(decode=True)
try:
parttext = partbytes.decode(charset, errors="replace")
except LookupError as e:
# Unknown encoding? Probably win-1252
print(e, file=sys.stderr)
parttext = partbytes.decode("windows-1252", errors="replace")
context = { context = {
"body": msg.get_payload(decode=True).decode(charset) "body": parttext
} }
return bodytmpl.render(context) return bodytmpl.render(context)
elif format == "flowed": elif format == "flowed":
@ -653,7 +661,12 @@ class TextFlowedPart:
charset = ct_params.get("charset", "iso-8859-1") charset = ct_params.get("charset", "iso-8859-1")
format = ct_params.get("format", "fixed") format = ct_params.get("format", "fixed")
delsp = ct_params.get("delsp", "no") == "yes" delsp = ct_params.get("delsp", "no") == "yes"
raw_text = msg.get_payload(decode=True).decode(charset) charset_map = {
"x-mac-roman": "mac_roman",
}
if charset in charset_map:
charset = charset_map[charset]
raw_text = msg.get_payload(decode=True).decode(charset, errors="replace")
raw_lines = raw_text.split("\n") raw_lines = raw_text.split("\n")
for rl in raw_lines: for rl in raw_lines: