Tolerate decoding errors
Sometimes the charset is just wrong, or it uses a non-standard name. Try to do something useful in these cases.
This commit is contained in:
parent
b1dbb3d40c
commit
5e96a73744
17
mbox2web
17
mbox2web
|
@ -136,8 +136,16 @@ def render_body(msg, extra=None):
|
||||||
format = ct_params.get("format", "fixed")
|
format = ct_params.get("format", "fixed")
|
||||||
if format == "fixed":
|
if format == "fixed":
|
||||||
bodytmpl = jenv.get_template("body_text_plain.html")
|
bodytmpl = jenv.get_template("body_text_plain.html")
|
||||||
|
partbytes = msg.get_payload(decode=True)
|
||||||
|
try:
|
||||||
|
parttext = partbytes.decode(charset, errors="replace")
|
||||||
|
except LookupError as e:
|
||||||
|
# Unknown encoding? Probably win-1252
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
parttext = partbytes.decode("windows-1252", errors="replace")
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
"body": msg.get_payload(decode=True).decode(charset)
|
"body": parttext
|
||||||
}
|
}
|
||||||
return bodytmpl.render(context)
|
return bodytmpl.render(context)
|
||||||
elif format == "flowed":
|
elif format == "flowed":
|
||||||
|
@ -653,7 +661,12 @@ class TextFlowedPart:
|
||||||
charset = ct_params.get("charset", "iso-8859-1")
|
charset = ct_params.get("charset", "iso-8859-1")
|
||||||
format = ct_params.get("format", "fixed")
|
format = ct_params.get("format", "fixed")
|
||||||
delsp = ct_params.get("delsp", "no") == "yes"
|
delsp = ct_params.get("delsp", "no") == "yes"
|
||||||
raw_text = msg.get_payload(decode=True).decode(charset)
|
charset_map = {
|
||||||
|
"x-mac-roman": "mac_roman",
|
||||||
|
}
|
||||||
|
if charset in charset_map:
|
||||||
|
charset = charset_map[charset]
|
||||||
|
raw_text = msg.get_payload(decode=True).decode(charset, errors="replace")
|
||||||
raw_lines = raw_text.split("\n")
|
raw_lines = raw_text.split("\n")
|
||||||
|
|
||||||
for rl in raw_lines:
|
for rl in raw_lines:
|
||||||
|
|
Loading…
Reference in New Issue