Tolerate decoding errors
Sometimes the charset is just wrong, or it uses a non-standard name. Try to do something useful in these cases.
This commit is contained in:
parent
b1dbb3d40c
commit
5e96a73744
17
mbox2web
17
mbox2web
|
@ -136,8 +136,16 @@ def render_body(msg, extra=None):
|
|||
format = ct_params.get("format", "fixed")
|
||||
if format == "fixed":
|
||||
bodytmpl = jenv.get_template("body_text_plain.html")
|
||||
partbytes = msg.get_payload(decode=True)
|
||||
try:
|
||||
parttext = partbytes.decode(charset, errors="replace")
|
||||
except LookupError as e:
|
||||
# Unknown encoding? Probably win-1252
|
||||
print(e, file=sys.stderr)
|
||||
parttext = partbytes.decode("windows-1252", errors="replace")
|
||||
|
||||
context = {
|
||||
"body": msg.get_payload(decode=True).decode(charset)
|
||||
"body": parttext
|
||||
}
|
||||
return bodytmpl.render(context)
|
||||
elif format == "flowed":
|
||||
|
@ -653,7 +661,12 @@ class TextFlowedPart:
|
|||
charset = ct_params.get("charset", "iso-8859-1")
|
||||
format = ct_params.get("format", "fixed")
|
||||
delsp = ct_params.get("delsp", "no") == "yes"
|
||||
raw_text = msg.get_payload(decode=True).decode(charset)
|
||||
charset_map = {
|
||||
"x-mac-roman": "mac_roman",
|
||||
}
|
||||
if charset in charset_map:
|
||||
charset = charset_map[charset]
|
||||
raw_text = msg.get_payload(decode=True).decode(charset, errors="replace")
|
||||
raw_lines = raw_text.split("\n")
|
||||
|
||||
for rl in raw_lines:
|
||||
|
|
Loading…
Reference in New Issue