Implement more text/enriched tags: nofill, param, ...
Nofill and param are handled specially by the parser. The parser now also tolerates missing end tags. Handle quite a few tags on output including some (like color and fontfamily) that use a param.
This commit is contained in:
parent
a997542cfe
commit
d5c5368bad
101
mbox2web
101
mbox2web
|
@ -134,7 +134,8 @@ def render_body(msg):
|
|||
}
|
||||
bodyhtml = bodytmpl.render(context)
|
||||
elif content_type == "text/enriched":
|
||||
tepart = TextEnrichedPart(msg.get_payload())
|
||||
payload = msg.get_payload(decode=True).decode(msg.get_charset() or "iso-8859-1")
|
||||
tepart = TextEnrichedPart(payload)
|
||||
bodytmpl = jenv.get_template("body_text_enriched.html")
|
||||
context = {
|
||||
"body": jinja2.Markup(tepart.as_string())
|
||||
|
@ -375,10 +376,15 @@ class HTMLPart(html.parser.HTMLParser):
|
|||
|
||||
class TextEnrichedPart:
|
||||
class TEElement:
|
||||
def __init__(self, t):
|
||||
def __init__(self, t, parent):
|
||||
self.type = t.lower()
|
||||
self.content = []
|
||||
self.filled = True
|
||||
if self.type == "nofill":
|
||||
self.filled = False
|
||||
elif parent:
|
||||
self.filled = parent.filled
|
||||
else:
|
||||
self.filled = True
|
||||
|
||||
def append_text(self, s):
|
||||
s = s.replace("<<", "<")
|
||||
|
@ -395,6 +401,61 @@ class TextEnrichedPart:
|
|||
elif self.type == "bold":
|
||||
pre = "<b>"
|
||||
post = "</b>"
|
||||
elif self.type == "param":
|
||||
# We shouldn't ever get here since the param should be consumed
|
||||
# by the parent, but there are broken messages ...
|
||||
return ""
|
||||
elif self.type.startswith("x-"):
|
||||
# Just ignore all experimental elements and render their
|
||||
# contents.
|
||||
pre = ""
|
||||
post = ""
|
||||
elif self.type == "flushleft":
|
||||
pre = "<div class='flushleft'>"
|
||||
post = "</div>"
|
||||
elif self.type == "smaller":
|
||||
# HTML has a "small" element, but that is meant for "side
|
||||
# comments such as small print", while t/e "smaller" is purely
|
||||
# typographical
|
||||
pre = "<span style='font-size: 0.9em'>"
|
||||
post = "</span>"
|
||||
elif self.type == "color":
|
||||
param = self.content.pop(0)
|
||||
if param.type != "param":
|
||||
raise RuntimeError("Expected 'param', got '%s'" % param.type)
|
||||
colorstring = param.content[0]
|
||||
if re.match(r'^\w+$', colorstring):
|
||||
# a single word, i.e. a colorname like "red" or cyan".
|
||||
# The 8 colors in the spec aren't a subset of the 17 colors in CSS2,
|
||||
# but recognized by most/all browsers. And if we encounter a non-standard
|
||||
# color the best we can do is let the browser handle it.
|
||||
pass
|
||||
else:
|
||||
m = re.match(r'([0-9a-f]{4}),([0-9a-f]{4}),([0-9a-f]{4})', colorstring, re.IGNORECASE)
|
||||
if m:
|
||||
# an RGB triple. Use only the top 8 bits of each component:
|
||||
colorstring = "#%s%s%s" % (m.group(1)[:2], m.group(2)[:2], m.group(3)[:2])
|
||||
else:
|
||||
# syntax error. Replace with "black"
|
||||
colorstring = "#000"
|
||||
pre = "<span style='color: %s'>" % colorstring
|
||||
post = "</span>"
|
||||
elif self.type == "nofill":
|
||||
pre = "<div class='nofill'>"
|
||||
post = "</div>"
|
||||
elif self.type == "fontfamily":
|
||||
param = self.content.pop(0)
|
||||
if param.type != "param":
|
||||
raise RuntimeError("Expected 'param', got '%s'" % param.type)
|
||||
fontfamily = param.content[0]
|
||||
if "'" in fontfamily or '"' in fontfamily:
|
||||
raise RuntimeError("Can't handle quotes in font names (%s)" % fontfamily)
|
||||
pre = "<span style='font-family: \"%s\"'>" % fontfamily
|
||||
post = "</span>"
|
||||
elif self.type == "bigger":
|
||||
# HTML used to have a "big" element, but that has been removed from HTML5
|
||||
pre = "<span style='font-size: 1.1em'>"
|
||||
post = "</span>"
|
||||
else:
|
||||
raise NotImplementedError("Unknown type " + self.type)
|
||||
|
||||
|
@ -408,24 +469,34 @@ class TextEnrichedPart:
|
|||
return s
|
||||
|
||||
def __init__(self, s):
|
||||
self.stack = [ self.TEElement("") ]
|
||||
self.stack = [ self.TEElement("", None) ]
|
||||
while s:
|
||||
stack_top = self.stack[-1]
|
||||
m = re.match(r'(.*?)<(/?[A-Za-z0-9-]{,60})>(.*)', s, re.DOTALL)
|
||||
if m:
|
||||
if m.group(2).lower == "param" and re.match(r'\s*', m.group(1)):
|
||||
stack_top.content.append(TEElement("param"))
|
||||
text = m.group(1)
|
||||
tag = m.group(2).lower()
|
||||
if not (tag == "param" and re.match(r'\s*', text) or text == ""):
|
||||
stack_top.append_text(text)
|
||||
if tag[0] != "/":
|
||||
new = self.TEElement(tag, stack_top)
|
||||
stack_top.content.append(new)
|
||||
self.stack.append(new)
|
||||
else:
|
||||
stack_top.append_text(m.group(1))
|
||||
if m.group(2)[0] != "/":
|
||||
new = self.TEElement(m.group(2))
|
||||
stack_top.content.append(new)
|
||||
self.stack.append(new)
|
||||
closed_tag = tag[1:]
|
||||
if stack_top.type == closed_tag:
|
||||
self.stack.pop()
|
||||
elif closed_tag in [e.type for e in self.stack]:
|
||||
# We close a tag which has been opened, but it
|
||||
# wasn't the last one. This is clearly a nesting
|
||||
# error, but there was broken software (e.g.
|
||||
# http://www.fozztexx.com/Mynah/) which used
|
||||
# non-closing tags, and by just popping them off
|
||||
# the stack we can "re-synchronize".
|
||||
while self.stack.pop().type != closed_tag:
|
||||
pass
|
||||
else:
|
||||
if stack_top.type == m.group(2)[1:]:
|
||||
self.stack.pop()
|
||||
else:
|
||||
raise RuntimeError("Nesting error: Expected %s, got %s near %s", self.stack[-1].type, m.group(2)[1:], s)
|
||||
raise RuntimeError("Nesting error: Expected %s, got %s near %s" % (self.stack[-1].type, closed_tag, s))
|
||||
s = m.group(3)
|
||||
else:
|
||||
stack_top.append_text(s)
|
||||
|
|
Loading…
Reference in New Issue