Implement more text/enriched tags: nofill, param, ...

Nofill and param are handled specially by the parser. The parser now also tolerates missing end tags. Handle quite a few tags on output including some (like color and fontfamily) that use a param.
2019-03-16 21:53:06 +01:00 · 2019-03-16 21:53:06 +01:00 · d5c5368bad
parent a997542cfe
commit d5c5368bad
1 changed files with 86 additions and 15 deletions
--- a/101
+++ b/101
@ -134,7 +134,8 @@ def render_body(msg):
        }
        bodyhtml = bodytmpl.render(context)
    elif content_type == "text/enriched":
-        tepart = TextEnrichedPart(msg.get_payload())
+        payload = msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1")
+        tepart = TextEnrichedPart(payload)
        bodytmpl = jenv.get_template("body_text_enriched.html")
        context = {
            "body": jinja2.Markup(tepart.as_string())
@ -375,10 +376,15 @@ class HTMLPart(html.parser.HTMLParser):

 class TextEnrichedPart:
    class TEElement:
-        def __init__(self, t):
+        def __init__(self, t, parent):
            self.type = t.lower()
            self.content = []
-            self.filled = True
+            if self.type == "nofill":
+                self.filled = False
+            elif parent:
+                self.filled = parent.filled
+            else:
+                self.filled = True
        
        def append_text(self, s):
            s = s.replace("<<", "<")
@ -395,6 +401,61 @@ class TextEnrichedPart:
            elif self.type == "bold":
                pre = "<b>"
                post = "</b>"
+            elif self.type == "param":
+                # We shouldn't ever get here since the param should be consumed
+                # by the parent, but there are broken messages ...
+                return ""
+            elif self.type.startswith("x-"):
+                # Just ignore all experimental elements and render their
+                # contents.
+                pre = ""
+                post = ""
+            elif self.type == "flushleft":
+                pre = "<div class='flushleft'>"
+                post = "</div>"
+            elif self.type == "smaller":
+                # HTML has a "small" element, but that is meant for "side
+                # comments such as small print", while t/e "smaller" is purely
+                # typographical
+                pre = "<span style='font-size: 0.9em'>"
+                post = "</span>"
+            elif self.type == "color":
+                param = self.content.pop(0)
+                if param.type != "param":
+                    raise RuntimeError("Expected 'param', got '%s'" % param.type)
+                colorstring = param.content[0]
+                if re.match(r'^\w+$', colorstring):
+                    # a single word, i.e. a colorname like "red" or cyan". 
+                    # The 8 colors in the spec aren't a subset of the 17 colors in CSS2,
+                    # but recognized by most/all browsers. And if we encounter a non-standard
+                    # color the best we can do is let the browser handle it.
+                    pass
+                else:
+                    m = re.match(r'([0-9a-f]{4}),([0-9a-f]{4}),([0-9a-f]{4})', colorstring, re.IGNORECASE)
+                    if m:
+                        # an RGB triple. Use only the top 8 bits of each component:
+                        colorstring = "#%s%s%s" % (m.group(1)[:2], m.group(2)[:2], m.group(3)[:2])
+                    else:
+                        # syntax error. Replace with "black"
+                        colorstring = "#000"
+                pre = "<span style='color: %s'>" % colorstring
+                post = "</span>"
+            elif self.type == "nofill":
+                pre = "<div class='nofill'>"
+                post = "</div>"
+            elif self.type == "fontfamily":
+                param = self.content.pop(0)
+                if param.type != "param":
+                    raise RuntimeError("Expected 'param', got '%s'" % param.type)
+                fontfamily = param.content[0]
+                if "'" in fontfamily or '"' in fontfamily:
+                    raise RuntimeError("Can't handle quotes in font names (%s)" % fontfamily)
+                pre = "<span style='font-family: \"%s\"'>" % fontfamily
+                post = "</span>"
+            elif self.type == "bigger":
+                # HTML used to have a "big" element, but that has been removed from HTML5
+                pre = "<span style='font-size: 1.1em'>"
+                post = "</span>"
            else:
                raise NotImplementedError("Unknown type " + self.type)

@ -408,24 +469,34 @@ class TextEnrichedPart:
            return s

    def __init__(self, s):
-        self.stack = [ self.TEElement("") ]
+        self.stack = [ self.TEElement("", None) ]
        while s:
            stack_top = self.stack[-1]
            m = re.match(r'(.*?)<(/?[A-Za-z0-9-]{,60})>(.*)', s, re.DOTALL)
            if m:
-                if m.group(2).lower == "param" and re.match(r'\s*', m.group(1)):
-                    stack_top.content.append(TEElement("param"))
+                text = m.group(1)
+                tag = m.group(2).lower()
+                if not (tag == "param" and re.match(r'\s*', text) or text == ""):
+                    stack_top.append_text(text)
+                if tag[0] != "/":
+                    new = self.TEElement(tag, stack_top)
+                    stack_top.content.append(new)
+                    self.stack.append(new)
                else:
-                    stack_top.append_text(m.group(1))
-                    if m.group(2)[0] != "/":
-                        new = self.TEElement(m.group(2))
-                        stack_top.content.append(new)
-                        self.stack.append(new)
+                    closed_tag = tag[1:]
+                    if stack_top.type == closed_tag:
+                        self.stack.pop()
+                    elif closed_tag in [e.type for e in self.stack]:
+                        # We close a tag which has been opened, but it
+                        # wasn't the last one. This is clearly a nesting
+                        # error, but there was broken software (e.g.
+                        # http://www.fozztexx.com/Mynah/) which used
+                        # non-closing tags, and by just popping them off
+                        # the stack we can "re-synchronize".
+                        while self.stack.pop().type != closed_tag:
+                            pass
                    else:
-                        if stack_top.type == m.group(2)[1:]:
-                            self.stack.pop()
-                        else:
-                            raise RuntimeError("Nesting error: Expected %s, got %s near %s", self.stack[-1].type, m.group(2)[1:], s)
+                        raise RuntimeError("Nesting error: Expected %s, got %s near %s" % (self.stack[-1].type, closed_tag, s))
                s = m.group(3)
            else:
                stack_top.append_text(s)