diff --git a/mbox2web b/mbox2web index 692a160..2a29c36 100755 --- a/mbox2web +++ b/mbox2web @@ -134,7 +134,8 @@ def render_body(msg): } bodyhtml = bodytmpl.render(context) elif content_type == "text/enriched": - tepart = TextEnrichedPart(msg.get_payload()) + payload = msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1") + tepart = TextEnrichedPart(payload) bodytmpl = jenv.get_template("body_text_enriched.html") context = { "body": jinja2.Markup(tepart.as_string()) @@ -375,10 +376,15 @@ class HTMLPart(html.parser.HTMLParser): class TextEnrichedPart: class TEElement: - def __init__(self, t): + def __init__(self, t, parent): self.type = t.lower() self.content = [] - self.filled = True + if self.type == "nofill": + self.filled = False + elif parent: + self.filled = parent.filled + else: + self.filled = True def append_text(self, s): s = s.replace("<<", "<") @@ -395,6 +401,61 @@ class TextEnrichedPart: elif self.type == "bold": pre = "" post = "" + elif self.type == "param": + # We shouldn't ever get here since the param should be consumed + # by the parent, but there are broken messages ... + return "" + elif self.type.startswith("x-"): + # Just ignore all experimental elements and render their + # contents. + pre = "" + post = "" + elif self.type == "flushleft": + pre = "
" + post = "
" + elif self.type == "smaller": + # HTML has a "small" element, but that is meant for "side + # comments such as small print", while t/e "smaller" is purely + # typographical + pre = "" + post = "" + elif self.type == "color": + param = self.content.pop(0) + if param.type != "param": + raise RuntimeError("Expected 'param', got '%s'" % param.type) + colorstring = param.content[0] + if re.match(r'^\w+$', colorstring): + # a single word, i.e. a colorname like "red" or cyan". + # The 8 colors in the spec aren't a subset of the 17 colors in CSS2, + # but recognized by most/all browsers. And if we encounter a non-standard + # color the best we can do is let the browser handle it. + pass + else: + m = re.match(r'([0-9a-f]{4}),([0-9a-f]{4}),([0-9a-f]{4})', colorstring, re.IGNORECASE) + if m: + # an RGB triple. Use only the top 8 bits of each component: + colorstring = "#%s%s%s" % (m.group(1)[:2], m.group(2)[:2], m.group(3)[:2]) + else: + # syntax error. Replace with "black" + colorstring = "#000" + pre = "" % colorstring + post = "" + elif self.type == "nofill": + pre = "
" + post = "
" + elif self.type == "fontfamily": + param = self.content.pop(0) + if param.type != "param": + raise RuntimeError("Expected 'param', got '%s'" % param.type) + fontfamily = param.content[0] + if "'" in fontfamily or '"' in fontfamily: + raise RuntimeError("Can't handle quotes in font names (%s)" % fontfamily) + pre = "" % fontfamily + post = "" + elif self.type == "bigger": + # HTML used to have a "big" element, but that has been removed from HTML5 + pre = "" + post = "" else: raise NotImplementedError("Unknown type " + self.type) @@ -408,24 +469,34 @@ class TextEnrichedPart: return s def __init__(self, s): - self.stack = [ self.TEElement("") ] + self.stack = [ self.TEElement("", None) ] while s: stack_top = self.stack[-1] m = re.match(r'(.*?)<(/?[A-Za-z0-9-]{,60})>(.*)', s, re.DOTALL) if m: - if m.group(2).lower == "param" and re.match(r'\s*', m.group(1)): - stack_top.content.append(TEElement("param")) + text = m.group(1) + tag = m.group(2).lower() + if not (tag == "param" and re.match(r'\s*', text) or text == ""): + stack_top.append_text(text) + if tag[0] != "/": + new = self.TEElement(tag, stack_top) + stack_top.content.append(new) + self.stack.append(new) else: - stack_top.append_text(m.group(1)) - if m.group(2)[0] != "/": - new = self.TEElement(m.group(2)) - stack_top.content.append(new) - self.stack.append(new) + closed_tag = tag[1:] + if stack_top.type == closed_tag: + self.stack.pop() + elif closed_tag in [e.type for e in self.stack]: + # We close a tag which has been opened, but it + # wasn't the last one. This is clearly a nesting + # error, but there was broken software (e.g. + # http://www.fozztexx.com/Mynah/) which used + # non-closing tags, and by just popping them off + # the stack we can "re-synchronize". + while self.stack.pop().type != closed_tag: + pass else: - if stack_top.type == m.group(2)[1:]: - self.stack.pop() - else: - raise RuntimeError("Nesting error: Expected %s, got %s near %s", self.stack[-1].type, m.group(2)[1:], s) + raise RuntimeError("Nesting error: Expected %s, got %s near %s" % (self.stack[-1].type, closed_tag, s)) s = m.group(3) else: stack_top.append_text(s)