Implement more text/enriched tags: nofill, param, ...

Nofill and param are handled specially by the parser. The parser now
also tolerates missing end tags.

Handle quite a few tags on output including some (like color and
fontfamily) that use a param.
This commit is contained in:
Peter J. Holzer 2019-03-16 21:53:06 +01:00
parent a997542cfe
commit d5c5368bad
1 changed files with 86 additions and 15 deletions

101
mbox2web
View File

@ -134,7 +134,8 @@ def render_body(msg):
}
bodyhtml = bodytmpl.render(context)
elif content_type == "text/enriched":
tepart = TextEnrichedPart(msg.get_payload())
payload = msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1")
tepart = TextEnrichedPart(payload)
bodytmpl = jenv.get_template("body_text_enriched.html")
context = {
"body": jinja2.Markup(tepart.as_string())
@ -375,10 +376,15 @@ class HTMLPart(html.parser.HTMLParser):
class TextEnrichedPart:
class TEElement:
def __init__(self, t):
def __init__(self, t, parent):
self.type = t.lower()
self.content = []
self.filled = True
if self.type == "nofill":
self.filled = False
elif parent:
self.filled = parent.filled
else:
self.filled = True
def append_text(self, s):
s = s.replace("<<", "<")
@ -395,6 +401,61 @@ class TextEnrichedPart:
elif self.type == "bold":
pre = "<b>"
post = "</b>"
elif self.type == "param":
# We shouldn't ever get here since the param should be consumed
# by the parent, but there are broken messages ...
return ""
elif self.type.startswith("x-"):
# Just ignore all experimental elements and render their
# contents.
pre = ""
post = ""
elif self.type == "flushleft":
pre = "<div class='flushleft'>"
post = "</div>"
elif self.type == "smaller":
# HTML has a "small" element, but that is meant for "side
# comments such as small print", while t/e "smaller" is purely
# typographical
pre = "<span style='font-size: 0.9em'>"
post = "</span>"
elif self.type == "color":
param = self.content.pop(0)
if param.type != "param":
raise RuntimeError("Expected 'param', got '%s'" % param.type)
colorstring = param.content[0]
if re.match(r'^\w+$', colorstring):
# a single word, i.e. a colorname like "red" or cyan".
# The 8 colors in the spec aren't a subset of the 17 colors in CSS2,
# but recognized by most/all browsers. And if we encounter a non-standard
# color the best we can do is let the browser handle it.
pass
else:
m = re.match(r'([0-9a-f]{4}),([0-9a-f]{4}),([0-9a-f]{4})', colorstring, re.IGNORECASE)
if m:
# an RGB triple. Use only the top 8 bits of each component:
colorstring = "#%s%s%s" % (m.group(1)[:2], m.group(2)[:2], m.group(3)[:2])
else:
# syntax error. Replace with "black"
colorstring = "#000"
pre = "<span style='color: %s'>" % colorstring
post = "</span>"
elif self.type == "nofill":
pre = "<div class='nofill'>"
post = "</div>"
elif self.type == "fontfamily":
param = self.content.pop(0)
if param.type != "param":
raise RuntimeError("Expected 'param', got '%s'" % param.type)
fontfamily = param.content[0]
if "'" in fontfamily or '"' in fontfamily:
raise RuntimeError("Can't handle quotes in font names (%s)" % fontfamily)
pre = "<span style='font-family: \"%s\"'>" % fontfamily
post = "</span>"
elif self.type == "bigger":
# HTML used to have a "big" element, but that has been removed from HTML5
pre = "<span style='font-size: 1.1em'>"
post = "</span>"
else:
raise NotImplementedError("Unknown type " + self.type)
@ -408,24 +469,34 @@ class TextEnrichedPart:
return s
def __init__(self, s):
self.stack = [ self.TEElement("") ]
self.stack = [ self.TEElement("", None) ]
while s:
stack_top = self.stack[-1]
m = re.match(r'(.*?)<(/?[A-Za-z0-9-]{,60})>(.*)', s, re.DOTALL)
if m:
if m.group(2).lower == "param" and re.match(r'\s*', m.group(1)):
stack_top.content.append(TEElement("param"))
text = m.group(1)
tag = m.group(2).lower()
if not (tag == "param" and re.match(r'\s*', text) or text == ""):
stack_top.append_text(text)
if tag[0] != "/":
new = self.TEElement(tag, stack_top)
stack_top.content.append(new)
self.stack.append(new)
else:
stack_top.append_text(m.group(1))
if m.group(2)[0] != "/":
new = self.TEElement(m.group(2))
stack_top.content.append(new)
self.stack.append(new)
closed_tag = tag[1:]
if stack_top.type == closed_tag:
self.stack.pop()
elif closed_tag in [e.type for e in self.stack]:
# We close a tag which has been opened, but it
# wasn't the last one. This is clearly a nesting
# error, but there was broken software (e.g.
# http://www.fozztexx.com/Mynah/) which used
# non-closing tags, and by just popping them off
# the stack we can "re-synchronize".
while self.stack.pop().type != closed_tag:
pass
else:
if stack_top.type == m.group(2)[1:]:
self.stack.pop()
else:
raise RuntimeError("Nesting error: Expected %s, got %s near %s", self.stack[-1].type, m.group(2)[1:], s)
raise RuntimeError("Nesting error: Expected %s, got %s near %s" % (self.stack[-1].type, closed_tag, s))
s = m.group(3)
else:
stack_top.append_text(s)