diff --git a/mbox2web b/mbox2web
index 692a160..2a29c36 100755
--- a/mbox2web
+++ b/mbox2web
@@ -134,7 +134,8 @@ def render_body(msg):
}
bodyhtml = bodytmpl.render(context)
elif content_type == "text/enriched":
- tepart = TextEnrichedPart(msg.get_payload())
+ payload = msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1")
+ tepart = TextEnrichedPart(payload)
bodytmpl = jenv.get_template("body_text_enriched.html")
context = {
"body": jinja2.Markup(tepart.as_string())
@@ -375,10 +376,15 @@ class HTMLPart(html.parser.HTMLParser):
class TextEnrichedPart:
class TEElement:
- def __init__(self, t):
+ def __init__(self, t, parent):
self.type = t.lower()
self.content = []
- self.filled = True
+ if self.type == "nofill":
+ self.filled = False
+ elif parent:
+ self.filled = parent.filled
+ else:
+ self.filled = True
def append_text(self, s):
s = s.replace("<<", "<")
@@ -395,6 +401,61 @@ class TextEnrichedPart:
elif self.type == "bold":
pre = ""
post = ""
+ elif self.type == "param":
+ # We shouldn't ever get here since the param should be consumed
+ # by the parent, but there are broken messages ...
+ return ""
+ elif self.type.startswith("x-"):
+ # Just ignore all experimental elements and render their
+ # contents.
+ pre = ""
+ post = ""
+ elif self.type == "flushleft":
+ pre = "
"
+ post = "
"
+ elif self.type == "smaller":
+ # HTML has a "small" element, but that is meant for "side
+ # comments such as small print", while t/e "smaller" is purely
+ # typographical
+ pre = ""
+ post = ""
+ elif self.type == "color":
+ param = self.content.pop(0)
+ if param.type != "param":
+ raise RuntimeError("Expected 'param', got '%s'" % param.type)
+ colorstring = param.content[0]
+ if re.match(r'^\w+$', colorstring):
+ # a single word, i.e. a colorname like "red" or cyan".
+ # The 8 colors in the spec aren't a subset of the 17 colors in CSS2,
+ # but recognized by most/all browsers. And if we encounter a non-standard
+ # color the best we can do is let the browser handle it.
+ pass
+ else:
+ m = re.match(r'([0-9a-f]{4}),([0-9a-f]{4}),([0-9a-f]{4})', colorstring, re.IGNORECASE)
+ if m:
+ # an RGB triple. Use only the top 8 bits of each component:
+ colorstring = "#%s%s%s" % (m.group(1)[:2], m.group(2)[:2], m.group(3)[:2])
+ else:
+ # syntax error. Replace with "black"
+ colorstring = "#000"
+ pre = "" % colorstring
+ post = ""
+ elif self.type == "nofill":
+ pre = ""
+ post = "
"
+ elif self.type == "fontfamily":
+ param = self.content.pop(0)
+ if param.type != "param":
+ raise RuntimeError("Expected 'param', got '%s'" % param.type)
+ fontfamily = param.content[0]
+ if "'" in fontfamily or '"' in fontfamily:
+ raise RuntimeError("Can't handle quotes in font names (%s)" % fontfamily)
+ pre = "" % fontfamily
+ post = ""
+ elif self.type == "bigger":
+ # HTML used to have a "big" element, but that has been removed from HTML5
+ pre = ""
+ post = ""
else:
raise NotImplementedError("Unknown type " + self.type)
@@ -408,24 +469,34 @@ class TextEnrichedPart:
return s
def __init__(self, s):
- self.stack = [ self.TEElement("") ]
+ self.stack = [ self.TEElement("", None) ]
while s:
stack_top = self.stack[-1]
m = re.match(r'(.*?)<(/?[A-Za-z0-9-]{,60})>(.*)', s, re.DOTALL)
if m:
- if m.group(2).lower == "param" and re.match(r'\s*', m.group(1)):
- stack_top.content.append(TEElement("param"))
+ text = m.group(1)
+ tag = m.group(2).lower()
+ if not (tag == "param" and re.match(r'\s*', text) or text == ""):
+ stack_top.append_text(text)
+ if tag[0] != "/":
+ new = self.TEElement(tag, stack_top)
+ stack_top.content.append(new)
+ self.stack.append(new)
else:
- stack_top.append_text(m.group(1))
- if m.group(2)[0] != "/":
- new = self.TEElement(m.group(2))
- stack_top.content.append(new)
- self.stack.append(new)
+ closed_tag = tag[1:]
+ if stack_top.type == closed_tag:
+ self.stack.pop()
+ elif closed_tag in [e.type for e in self.stack]:
+ # We close a tag which has been opened, but it
+ # wasn't the last one. This is clearly a nesting
+ # error, but there was broken software (e.g.
+ # http://www.fozztexx.com/Mynah/) which used
+ # non-closing tags, and by just popping them off
+ # the stack we can "re-synchronize".
+ while self.stack.pop().type != closed_tag:
+ pass
else:
- if stack_top.type == m.group(2)[1:]:
- self.stack.pop()
- else:
- raise RuntimeError("Nesting error: Expected %s, got %s near %s", self.stack[-1].type, m.group(2)[1:], s)
+ raise RuntimeError("Nesting error: Expected %s, got %s near %s" % (self.stack[-1].type, closed_tag, s))
s = m.group(3)
else:
stack_top.append_text(s)